hook gvariant vectors up to kdbus
[platform/upstream/glib.git] / glib / update-pcre / ucp.patch
1 From 23d48c5fc7aa889dc7798f9c64acd43d9cb34683 Mon Sep 17 00:00:00 2001
2 From: Christian Persch <chpe@gnome.org>
3 Date: Sun, 12 Feb 2012 21:20:33 +0100
4 Subject: [PATCH] regex: Use glib for unicode data
5
6 Use g_unichar_type() and g_unichar_get_script() instead of pcre tables.
7 ---
8  glib/pcre/pcre_compile.c  |   26 +++---
9  glib/pcre/pcre_dfa_exec.c |   96 ++++++++--------
10  glib/pcre/pcre_exec.c     |   26 +++---
11  glib/pcre/pcre_internal.h |   11 +--
12  glib/pcre/pcre_tables.c   |   16 +++
13  glib/pcre/pcre_xclass.c   |   24 ++--
14  glib/pcre/ucp.h           |  265 +++++++++++++++++++++++----------------------
15  7 files changed, 239 insertions(+), 225 deletions(-)
16
17 diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
18 index 21bef80..a6c84e1 100644
19 --- a/glib/pcre/pcre_compile.c
20 +++ b/glib/pcre/pcre_compile.c
21 @@ -2920,43 +2920,43 @@ Returns:       TRUE if auto-possessifying is OK
22  static BOOL
23  check_char_prop(int c, int ptype, int pdata, BOOL negated)
24  {
25 -const ucd_record *prop = GET_UCD(c);
26 +const pcre_uint8 chartype = UCD_CHARTYPE(c);
27  switch(ptype)
28    {
29    case PT_LAMP:
30 -  return (prop->chartype == ucp_Lu ||
31 -          prop->chartype == ucp_Ll ||
32 -          prop->chartype == ucp_Lt) == negated;
33 +  return (chartype == ucp_Lu ||
34 +          chartype == ucp_Ll ||
35 +          chartype == ucp_Lt) == negated;
36  
37    case PT_GC:
38 -  return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
39 +  return (pdata == PRIV(ucp_gentype)[chartype]) == negated;
40  
41    case PT_PC:
42 -  return (pdata == prop->chartype) == negated;
43 +  return (pdata == chartype) == negated;
44  
45    case PT_SC:
46 -  return (pdata == prop->script) == negated;
47 +  return (pdata == UCD_SCRIPT(c)) == negated;
48  
49    /* These are specials */
50  
51    case PT_ALNUM:
52 -  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
53 -          PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
54 +  return (PRIV(ucp_gentype)[chartype] == ucp_L ||
55 +          PRIV(ucp_gentype)[chartype] == ucp_N) == negated;
56  
57    case PT_SPACE:    /* Perl space */
58 -  return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
59 +  return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
60            c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
61            == negated;
62  
63    case PT_PXSPACE:  /* POSIX space */
64 -  return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
65 +  return (PRIV(ucp_gentype)[chartype] == ucp_Z ||
66            c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
67            c == CHAR_FF || c == CHAR_CR)
68            == negated;
69  
70    case PT_WORD:
71 -  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
72 -          PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
73 +  return (PRIV(ucp_gentype)[chartype] == ucp_L ||
74 +          PRIV(ucp_gentype)[chartype] == ucp_N ||
75            c == CHAR_UNDERSCORE) == negated;
76    }
77  return FALSE;
78 diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c
79 index 9565d46..3f913ce 100644
80 --- a/glib/pcre/pcre_dfa_exec.c
81 +++ b/glib/pcre/pcre_dfa_exec.c
82 @@ -1060,7 +1060,7 @@ for (;;)
83        if (clen > 0)
84          {
85          BOOL OK;
86 -        const ucd_record * prop = GET_UCD(c);
87 +        const pcre_uint8 chartype = UCD_CHARTYPE(c);
88          switch(code[1])
89            {
90            case PT_ANY:
91 @@ -1068,43 +1068,43 @@ for (;;)
92            break;
93  
94            case PT_LAMP:
95 -          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
96 -               prop->chartype == ucp_Lt;
97 +          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
98 +               chartype == ucp_Lt;
99            break;
100  
101            case PT_GC:
102 -          OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
103 +          OK = PRIV(ucp_gentype)[chartype] == code[2];
104            break;
105  
106            case PT_PC:
107 -          OK = prop->chartype == code[2];
108 +          OK = chartype == code[2];
109            break;
110  
111            case PT_SC:
112 -          OK = prop->script == code[2];
113 +          OK = UCD_SCRIPT(c) == code[2];
114            break;
115  
116            /* These are specials for combination cases. */
117  
118            case PT_ALNUM:
119 -          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
120 -               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
121 +          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
122 +               PRIV(ucp_gentype)[chartype] == ucp_N;
123            break;
124  
125            case PT_SPACE:    /* Perl space */
126 -          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
127 +          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
128                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
129            break;
130  
131            case PT_PXSPACE:  /* POSIX space */
132 -          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
133 +          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
134                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
135                 c == CHAR_FF || c == CHAR_CR;
136            break;
137  
138            case PT_WORD:
139 -          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
140 -               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
141 +          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
142 +               PRIV(ucp_gentype)[chartype] == ucp_N ||
143                 c == CHAR_UNDERSCORE;
144            break;
145  
146 @@ -1294,7 +1294,7 @@ for (;;)
147        if (clen > 0)
148          {
149          BOOL OK;
150 -        const ucd_record * prop = GET_UCD(c);
151 +        const pcre_uint8 chartype = UCD_CHARTYPE(c);
152          switch(code[2])
153            {
154            case PT_ANY:
155 @@ -1302,43 +1302,43 @@ for (;;)
156            break;
157  
158            case PT_LAMP:
159 -          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
160 -            prop->chartype == ucp_Lt;
161 +          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
162 +            chartype == ucp_Lt;
163            break;
164  
165            case PT_GC:
166 -          OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
167 +          OK = PRIV(ucp_gentype)[chartype] == code[3];
168            break;
169  
170            case PT_PC:
171 -          OK = prop->chartype == code[3];
172 +          OK = chartype == code[3];
173            break;
174  
175            case PT_SC:
176 -          OK = prop->script == code[3];
177 +          OK = UCD_SCRIPT(c) == code[3];
178            break;
179  
180            /* These are specials for combination cases. */
181  
182            case PT_ALNUM:
183 -          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
184 -               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
185 +          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
186 +               PRIV(ucp_gentype)[chartype] == ucp_N;
187            break;
188  
189            case PT_SPACE:    /* Perl space */
190 -          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
191 +          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
192                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
193            break;
194  
195            case PT_PXSPACE:  /* POSIX space */
196 -          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
197 +          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
198                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
199                 c == CHAR_FF || c == CHAR_CR;
200            break;
201  
202            case PT_WORD:
203 -          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
204 -               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
205 +          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
206 +               PRIV(ucp_gentype)[chartype] == ucp_N ||
207                 c == CHAR_UNDERSCORE;
208            break;
209  
210 @@ -1541,7 +1541,7 @@ for (;;)
211        if (clen > 0)
212          {
213          BOOL OK;
214 -        const ucd_record * prop = GET_UCD(c);
215 +        const pcre_uint8 chartype = UCD_CHARTYPE(c);
216          switch(code[2])
217            {
218            case PT_ANY:
219 @@ -1549,43 +1549,43 @@ for (;;)
220            break;
221  
222            case PT_LAMP:
223 -          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
224 -            prop->chartype == ucp_Lt;
225 +          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
226 +            chartype == ucp_Lt;
227            break;
228  
229            case PT_GC:
230 -          OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
231 +          OK = PRIV(ucp_gentype)[chartype] == code[3];
232            break;
233  
234            case PT_PC:
235 -          OK = prop->chartype == code[3];
236 +          OK = chartype == code[3];
237            break;
238  
239            case PT_SC:
240 -          OK = prop->script == code[3];
241 +          OK = UCD_SCRIPT(c) == code[3];
242            break;
243  
244            /* These are specials for combination cases. */
245  
246            case PT_ALNUM:
247 -          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
248 -               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
249 +          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
250 +               PRIV(ucp_gentype)[chartype] == ucp_N;
251            break;
252  
253            case PT_SPACE:    /* Perl space */
254 -          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
255 +          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
256                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
257            break;
258  
259            case PT_PXSPACE:  /* POSIX space */
260 -          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
261 +          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
262                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
263                 c == CHAR_FF || c == CHAR_CR;
264            break;
265  
266            case PT_WORD:
267 -          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
268 -               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
269 +          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
270 +               PRIV(ucp_gentype)[chartype] == ucp_N ||
271                 c == CHAR_UNDERSCORE;
272            break;
273  
274 @@ -1813,7 +1813,7 @@ for (;;)
275        if (clen > 0)
276          {
277          BOOL OK;
278 -        const ucd_record * prop = GET_UCD(c);
279 +        const pcre_uint8 chartype = UCD_CHARTYPE(c);
280          switch(code[1 + IMM2_SIZE + 1])
281            {
282            case PT_ANY:
283 @@ -1821,43 +1821,43 @@ for (;;)
284            break;
285  
286            case PT_LAMP:
287 -          OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
288 -            prop->chartype == ucp_Lt;
289 +          OK = chartype == ucp_Lu || chartype == ucp_Ll ||
290 +            chartype == ucp_Lt;
291            break;
292  
293            case PT_GC:
294 -          OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
295 +          OK = PRIV(ucp_gentype)[chartype] == code[1 + IMM2_SIZE + 2];
296            break;
297  
298            case PT_PC:
299 -          OK = prop->chartype == code[1 + IMM2_SIZE + 2];
300 +          OK = chartype == code[1 + IMM2_SIZE + 2];
301            break;
302  
303            case PT_SC:
304 -          OK = prop->script == code[1 + IMM2_SIZE + 2];
305 +          OK = UCD_SCRIPT(c) == code[1 + IMM2_SIZE + 2];
306            break;
307  
308            /* These are specials for combination cases. */
309  
310            case PT_ALNUM:
311 -          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
312 -               PRIV(ucp_gentype)[prop->chartype] == ucp_N;
313 +          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
314 +               PRIV(ucp_gentype)[chartype] == ucp_N;
315            break;
316  
317            case PT_SPACE:    /* Perl space */
318 -          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
319 +          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
320                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
321            break;
322  
323            case PT_PXSPACE:  /* POSIX space */
324 -          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
325 +          OK = PRIV(ucp_gentype)[chartype] == ucp_Z ||
326                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
327                 c == CHAR_FF || c == CHAR_CR;
328            break;
329  
330            case PT_WORD:
331 -          OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
332 -               PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
333 +          OK = PRIV(ucp_gentype)[chartype] == ucp_L ||
334 +               PRIV(ucp_gentype)[chartype] == ucp_N ||
335                 c == CHAR_UNDERSCORE;
336            break;
337  
338 diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c
339 index 830b8b5..c89a3f9 100644
340 --- a/glib/pcre/pcre_exec.c
341 +++ b/glib/pcre/pcre_exec.c
342 @@ -2565,7 +2565,7 @@ for (;;)
343        }
344      GETCHARINCTEST(c, eptr);
345        {
346 -      const ucd_record *prop = GET_UCD(c);
347 +      const pcre_uint8 chartype = UCD_CHARTYPE(c);
348  
349        switch(ecode[1])
350          {
351 @@ -2574,44 +2574,44 @@ for (;;)
352          break;
353  
354          case PT_LAMP:
355 -        if ((prop->chartype == ucp_Lu ||
356 -             prop->chartype == ucp_Ll ||
357 -             prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
358 +        if ((chartype == ucp_Lu ||
359 +             chartype == ucp_Ll ||
360 +             chartype == ucp_Lt) == (op == OP_NOTPROP))
361            RRETURN(MATCH_NOMATCH);
362          break;
363  
364          case PT_GC:
365 -        if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
366 +        if ((ecode[2] != PRIV(ucp_gentype)[chartype]) == (op == OP_PROP))
367            RRETURN(MATCH_NOMATCH);
368          break;
369  
370          case PT_PC:
371 -        if ((ecode[2] != prop->chartype) == (op == OP_PROP))
372 +        if ((ecode[2] != chartype) == (op == OP_PROP))
373            RRETURN(MATCH_NOMATCH);
374          break;
375  
376          case PT_SC:
377 -        if ((ecode[2] != prop->script) == (op == OP_PROP))
378 +        if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
379            RRETURN(MATCH_NOMATCH);
380          break;
381  
382          /* These are specials */
383  
384          case PT_ALNUM:
385 -        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
386 -             PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
387 +        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
388 +             PRIV(ucp_gentype)[chartype] == ucp_N) == (op == OP_NOTPROP))
389            RRETURN(MATCH_NOMATCH);
390          break;
391  
392          case PT_SPACE:    /* Perl space */
393 -        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
394 +        if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
395               c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
396                 == (op == OP_NOTPROP))
397            RRETURN(MATCH_NOMATCH);
398          break;
399  
400          case PT_PXSPACE:  /* POSIX space */
401 -        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
402 +        if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
403               c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
404               c == CHAR_FF || c == CHAR_CR)
405                 == (op == OP_NOTPROP))
406 @@ -2619,8 +2619,8 @@ for (;;)
407          break;
408  
409          case PT_WORD:
410 -        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
411 -             PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
412 +        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
413 +             PRIV(ucp_gentype)[chartype] == ucp_N ||
414               c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
415            RRETURN(MATCH_NOMATCH);
416          break;
417 diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h
418 index 181c312..234af1b 100644
419 --- a/glib/pcre/pcre_internal.h
420 +++ b/glib/pcre/pcre_internal.h
421 @@ -2329,15 +2329,12 @@ extern const int         PRIV(ucp_typerange)[];
422  #ifdef SUPPORT_UCP
423  /* UCD access macros */
424  
425 -#define UCD_BLOCK_SIZE 128
426 -#define GET_UCD(ch) (PRIV(ucd_records) + \
427 -        PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \
428 -        UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])
429 +unsigned int _pcre_ucp_othercase(const unsigned int c);
430  
431 -#define UCD_CHARTYPE(ch)  GET_UCD(ch)->chartype
432 -#define UCD_SCRIPT(ch)    GET_UCD(ch)->script
433 +#define UCD_CHARTYPE(ch)  (pcre_uint8)g_unichar_type((gunichar)(ch))
434 +#define UCD_SCRIPT(ch)    (pcre_uint8)g_unichar_get_script((gunichar)(ch))
435  #define UCD_CATEGORY(ch)  PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
436 -#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)
437 +#define UCD_OTHERCASE(ch) (_pcre_ucp_othercase(ch))
438  
439  #endif /* SUPPORT_UCP */
440  
441 diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c
442 index 7ac2d89..e401974 100644
443 --- a/glib/pcre/pcre_tables.c
444 +++ b/glib/pcre/pcre_tables.c
445 @@ -584,6 +584,22 @@ const ucp_type_table PRIV(utt)[] = {
446  
447  const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
448  
449 +unsigned int 
450 +_pcre_ucp_othercase(const unsigned int c)
451 +{
452 +  int other_case = NOTACHAR;
453 +
454 +  if (g_unichar_islower(c))
455 +    other_case = g_unichar_toupper(c);
456 +  else if (g_unichar_isupper(c))
457 +    other_case = g_unichar_tolower(c);
458 +
459 +  if (other_case == c)
460 +    other_case = NOTACHAR;
461 +
462 +  return other_case;
463 +}
464 +
465  #endif /* SUPPORT_UTF */
466  
467  /* End of pcre_tables.c */
468 diff --git a/glib/pcre/pcre_xclass.c b/glib/pcre/pcre_xclass.c
469 index dca7a39..e5a55d7 100644
470 --- a/glib/pcre/pcre_xclass.c
471 +++ b/glib/pcre/pcre_xclass.c
472 @@ -127,7 +127,7 @@ while ((t = *data++) != XCL_END)
473  #ifdef SUPPORT_UCP
474    else  /* XCL_PROP & XCL_NOTPROP */
475      {
476 -    const ucd_record *prop = GET_UCD(c);
477 +    const pcre_uint8 chartype = UCD_CHARTYPE(c);
478  
479      switch(*data)
480        {
481 @@ -136,46 +136,46 @@ while ((t = *data++) != XCL_END)
482        break;
483  
484        case PT_LAMP:
485 -      if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
486 -           prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
487 +      if ((chartype == ucp_Lu || chartype == ucp_Ll ||
488 +           chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
489        break;
490  
491        case PT_GC:
492 -      if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))
493 +      if ((data[1] == PRIV(ucp_gentype)[chartype]) == (t == XCL_PROP))
494          return !negated;
495        break;
496  
497        case PT_PC:
498 -      if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
499 +      if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
500        break;
501  
502        case PT_SC:
503 -      if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
504 +      if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
505        break;
506  
507        case PT_ALNUM:
508 -      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
509 -           PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))
510 +      if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
511 +           PRIV(ucp_gentype)[chartype] == ucp_N) == (t == XCL_PROP))
512          return !negated;
513        break;
514  
515        case PT_SPACE:    /* Perl space */
516 -      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
517 +      if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
518             c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
519               == (t == XCL_PROP))
520          return !negated;
521        break;
522  
523        case PT_PXSPACE:  /* POSIX space */
524 -      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
525 +      if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
526             c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
527             c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
528          return !negated;
529        break;
530  
531        case PT_WORD:
532 -      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
533 -           PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
534 +      if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
535 +           PRIV(ucp_gentype)[chartype] == ucp_N || c == CHAR_UNDERSCORE)
536               == (t == XCL_PROP))
537          return !negated;
538        break;
539 diff --git a/glib/pcre/ucp.h b/glib/pcre/ucp.h
540 index 59c3bec..53a48c9 100644
541 --- a/glib/pcre/ucp.h
542 +++ b/glib/pcre/ucp.h
543 @@ -10,6 +10,7 @@ the UCD access macros. New values that are added for new releases of Unicode
544  should always be at the end of each enum, for backwards compatibility. */
545  
546  /* These are the general character categories. */
547 +#include "gunicode.h"
548  
549  enum {
550    ucp_C,     /* Other */
551 @@ -24,148 +25,148 @@ enum {
552  /* These are the particular character types. */
553  
554  enum {
555 -  ucp_Cc,    /* Control */
556 -  ucp_Cf,    /* Format */
557 -  ucp_Cn,    /* Unassigned */
558 -  ucp_Co,    /* Private use */
559 -  ucp_Cs,    /* Surrogate */
560 -  ucp_Ll,    /* Lower case letter */
561 -  ucp_Lm,    /* Modifier letter */
562 -  ucp_Lo,    /* Other letter */
563 -  ucp_Lt,    /* Title case letter */
564 -  ucp_Lu,    /* Upper case letter */
565 -  ucp_Mc,    /* Spacing mark */
566 -  ucp_Me,    /* Enclosing mark */
567 -  ucp_Mn,    /* Non-spacing mark */
568 -  ucp_Nd,    /* Decimal number */
569 -  ucp_Nl,    /* Letter number */
570 -  ucp_No,    /* Other number */
571 -  ucp_Pc,    /* Connector punctuation */
572 -  ucp_Pd,    /* Dash punctuation */
573 -  ucp_Pe,    /* Close punctuation */
574 -  ucp_Pf,    /* Final punctuation */
575 -  ucp_Pi,    /* Initial punctuation */
576 -  ucp_Po,    /* Other punctuation */
577 -  ucp_Ps,    /* Open punctuation */
578 -  ucp_Sc,    /* Currency symbol */
579 -  ucp_Sk,    /* Modifier symbol */
580 -  ucp_Sm,    /* Mathematical symbol */
581 -  ucp_So,    /* Other symbol */
582 -  ucp_Zl,    /* Line separator */
583 -  ucp_Zp,    /* Paragraph separator */
584 -  ucp_Zs     /* Space separator */
585 +  ucp_Cc = G_UNICODE_CONTROL,                   /* Control */
586 +  ucp_Cf = G_UNICODE_FORMAT,                    /* Format */
587 +  ucp_Cn = G_UNICODE_UNASSIGNED,                /* Unassigned */
588 +  ucp_Co = G_UNICODE_PRIVATE_USE,               /* Private use */
589 +  ucp_Cs = G_UNICODE_SURROGATE,                 /* Surrogate */
590 +  ucp_Ll = G_UNICODE_LOWERCASE_LETTER,          /* Lower case letter */
591 +  ucp_Lm = G_UNICODE_MODIFIER_LETTER,           /* Modifier letter */
592 +  ucp_Lo = G_UNICODE_OTHER_LETTER,              /* Other letter */
593 +  ucp_Lt = G_UNICODE_TITLECASE_LETTER,          /* Title case letter */
594 +  ucp_Lu = G_UNICODE_UPPERCASE_LETTER,          /* Upper case letter */
595 +  ucp_Mc = G_UNICODE_SPACING_MARK,              /* Spacing mark */
596 +  ucp_Me = G_UNICODE_ENCLOSING_MARK,            /* Enclosing mark */
597 +  ucp_Mn = G_UNICODE_NON_SPACING_MARK,          /* Non-spacing mark */
598 +  ucp_Nd = G_UNICODE_DECIMAL_NUMBER,            /* Decimal number */
599 +  ucp_Nl = G_UNICODE_LETTER_NUMBER,             /* Letter number */
600 +  ucp_No = G_UNICODE_OTHER_NUMBER,              /* Other number */
601 +  ucp_Pc = G_UNICODE_CONNECT_PUNCTUATION,       /* Connector punctuation */
602 +  ucp_Pd = G_UNICODE_DASH_PUNCTUATION,          /* Dash punctuation */
603 +  ucp_Pe = G_UNICODE_CLOSE_PUNCTUATION,         /* Close punctuation */
604 +  ucp_Pf = G_UNICODE_FINAL_PUNCTUATION,         /* Final punctuation */
605 +  ucp_Pi = G_UNICODE_INITIAL_PUNCTUATION,       /* Initial punctuation */
606 +  ucp_Po = G_UNICODE_OTHER_PUNCTUATION,         /* Other punctuation */
607 +  ucp_Ps = G_UNICODE_OPEN_PUNCTUATION,          /* Open punctuation */
608 +  ucp_Sc = G_UNICODE_CURRENCY_SYMBOL,           /* Currency symbol */
609 +  ucp_Sk = G_UNICODE_MODIFIER_SYMBOL,           /* Modifier symbol */
610 +  ucp_Sm = G_UNICODE_MATH_SYMBOL,               /* Mathematical symbol */
611 +  ucp_So = G_UNICODE_OTHER_SYMBOL,              /* Other symbol */
612 +  ucp_Zl = G_UNICODE_LINE_SEPARATOR,            /* Line separator */
613 +  ucp_Zp = G_UNICODE_PARAGRAPH_SEPARATOR,       /* Paragraph separator */
614 +  ucp_Zs = G_UNICODE_SPACE_SEPARATOR            /* Space separator */
615  };
616  
617  /* These are the script identifications. */
618  
619  enum {
620 -  ucp_Arabic,
621 -  ucp_Armenian,
622 -  ucp_Bengali,
623 -  ucp_Bopomofo,
624 -  ucp_Braille,
625 -  ucp_Buginese,
626 -  ucp_Buhid,
627 -  ucp_Canadian_Aboriginal,
628 -  ucp_Cherokee,
629 -  ucp_Common,
630 -  ucp_Coptic,
631 -  ucp_Cypriot,
632 -  ucp_Cyrillic,
633 -  ucp_Deseret,
634 -  ucp_Devanagari,
635 -  ucp_Ethiopic,
636 -  ucp_Georgian,
637 -  ucp_Glagolitic,
638 -  ucp_Gothic,
639 -  ucp_Greek,
640 -  ucp_Gujarati,
641 -  ucp_Gurmukhi,
642 -  ucp_Han,
643 -  ucp_Hangul,
644 -  ucp_Hanunoo,
645 -  ucp_Hebrew,
646 -  ucp_Hiragana,
647 -  ucp_Inherited,
648 -  ucp_Kannada,
649 -  ucp_Katakana,
650 -  ucp_Kharoshthi,
651 -  ucp_Khmer,
652 -  ucp_Lao,
653 -  ucp_Latin,
654 -  ucp_Limbu,
655 -  ucp_Linear_B,
656 -  ucp_Malayalam,
657 -  ucp_Mongolian,
658 -  ucp_Myanmar,
659 -  ucp_New_Tai_Lue,
660 -  ucp_Ogham,
661 -  ucp_Old_Italic,
662 -  ucp_Old_Persian,
663 -  ucp_Oriya,
664 -  ucp_Osmanya,
665 -  ucp_Runic,
666 -  ucp_Shavian,
667 -  ucp_Sinhala,
668 -  ucp_Syloti_Nagri,
669 -  ucp_Syriac,
670 -  ucp_Tagalog,
671 -  ucp_Tagbanwa,
672 -  ucp_Tai_Le,
673 -  ucp_Tamil,
674 -  ucp_Telugu,
675 -  ucp_Thaana,
676 -  ucp_Thai,
677 -  ucp_Tibetan,
678 -  ucp_Tifinagh,
679 -  ucp_Ugaritic,
680 -  ucp_Yi,
681 +  ucp_Arabic = G_UNICODE_SCRIPT_ARABIC,
682 +  ucp_Armenian = G_UNICODE_SCRIPT_ARMENIAN,
683 +  ucp_Bengali = G_UNICODE_SCRIPT_BENGALI,
684 +  ucp_Bopomofo = G_UNICODE_SCRIPT_BOPOMOFO,
685 +  ucp_Braille = G_UNICODE_SCRIPT_BRAILLE,
686 +  ucp_Buginese = G_UNICODE_SCRIPT_BUGINESE,
687 +  ucp_Buhid = G_UNICODE_SCRIPT_BUHID,
688 +  ucp_Canadian_Aboriginal = G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL,
689 +  ucp_Cherokee = G_UNICODE_SCRIPT_CHEROKEE,
690 +  ucp_Common = G_UNICODE_SCRIPT_COMMON,
691 +  ucp_Coptic = G_UNICODE_SCRIPT_COPTIC,
692 +  ucp_Cypriot = G_UNICODE_SCRIPT_CYPRIOT,
693 +  ucp_Cyrillic = G_UNICODE_SCRIPT_CYRILLIC,
694 +  ucp_Deseret = G_UNICODE_SCRIPT_DESERET,
695 +  ucp_Devanagari = G_UNICODE_SCRIPT_DEVANAGARI,
696 +  ucp_Ethiopic = G_UNICODE_SCRIPT_ETHIOPIC,
697 +  ucp_Georgian = G_UNICODE_SCRIPT_GEORGIAN,
698 +  ucp_Glagolitic = G_UNICODE_SCRIPT_GLAGOLITIC,
699 +  ucp_Gothic = G_UNICODE_SCRIPT_GOTHIC,
700 +  ucp_Greek = G_UNICODE_SCRIPT_GREEK,
701 +  ucp_Gujarati = G_UNICODE_SCRIPT_GUJARATI,
702 +  ucp_Gurmukhi = G_UNICODE_SCRIPT_GURMUKHI,
703 +  ucp_Han = G_UNICODE_SCRIPT_HAN,
704 +  ucp_Hangul = G_UNICODE_SCRIPT_HANGUL,
705 +  ucp_Hanunoo = G_UNICODE_SCRIPT_HANUNOO,
706 +  ucp_Hebrew = G_UNICODE_SCRIPT_HEBREW,
707 +  ucp_Hiragana = G_UNICODE_SCRIPT_HIRAGANA,
708 +  ucp_Inherited = G_UNICODE_SCRIPT_INHERITED,
709 +  ucp_Kannada = G_UNICODE_SCRIPT_KANNADA,
710 +  ucp_Katakana = G_UNICODE_SCRIPT_KATAKANA,
711 +  ucp_Kharoshthi = G_UNICODE_SCRIPT_KHAROSHTHI,
712 +  ucp_Khmer = G_UNICODE_SCRIPT_KHMER,
713 +  ucp_Lao = G_UNICODE_SCRIPT_LAO,
714 +  ucp_Latin = G_UNICODE_SCRIPT_LATIN,
715 +  ucp_Limbu = G_UNICODE_SCRIPT_LIMBU,
716 +  ucp_Linear_B = G_UNICODE_SCRIPT_LINEAR_B,
717 +  ucp_Malayalam = G_UNICODE_SCRIPT_MALAYALAM,
718 +  ucp_Mongolian = G_UNICODE_SCRIPT_MONGOLIAN,
719 +  ucp_Myanmar = G_UNICODE_SCRIPT_MYANMAR,
720 +  ucp_New_Tai_Lue = G_UNICODE_SCRIPT_NEW_TAI_LUE,
721 +  ucp_Ogham = G_UNICODE_SCRIPT_OGHAM,
722 +  ucp_Old_Italic = G_UNICODE_SCRIPT_OLD_ITALIC,
723 +  ucp_Old_Persian = G_UNICODE_SCRIPT_OLD_PERSIAN,
724 +  ucp_Oriya = G_UNICODE_SCRIPT_ORIYA,
725 +  ucp_Osmanya = G_UNICODE_SCRIPT_OSMANYA,
726 +  ucp_Runic = G_UNICODE_SCRIPT_RUNIC,
727 +  ucp_Shavian = G_UNICODE_SCRIPT_SHAVIAN,
728 +  ucp_Sinhala = G_UNICODE_SCRIPT_SINHALA,
729 +  ucp_Syloti_Nagri = G_UNICODE_SCRIPT_SYLOTI_NAGRI,
730 +  ucp_Syriac = G_UNICODE_SCRIPT_SYRIAC,
731 +  ucp_Tagalog = G_UNICODE_SCRIPT_TAGALOG,
732 +  ucp_Tagbanwa = G_UNICODE_SCRIPT_TAGBANWA,
733 +  ucp_Tai_Le = G_UNICODE_SCRIPT_TAI_LE,
734 +  ucp_Tamil = G_UNICODE_SCRIPT_TAMIL,
735 +  ucp_Telugu = G_UNICODE_SCRIPT_TELUGU,
736 +  ucp_Thaana = G_UNICODE_SCRIPT_THAANA,
737 +  ucp_Thai = G_UNICODE_SCRIPT_THAI,
738 +  ucp_Tibetan = G_UNICODE_SCRIPT_TIBETAN,
739 +  ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
740 +  ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
741 +  ucp_Yi = G_UNICODE_SCRIPT_YI,
742    /* New for Unicode 5.0: */
743 -  ucp_Balinese,
744 -  ucp_Cuneiform,
745 -  ucp_Nko,
746 -  ucp_Phags_Pa,
747 -  ucp_Phoenician,
748 +  ucp_Balinese = G_UNICODE_SCRIPT_BALINESE,
749 +  ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,
750 +  ucp_Nko = G_UNICODE_SCRIPT_NKO,
751 +  ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,
752 +  ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN,
753    /* New for Unicode 5.1: */
754 -  ucp_Carian,
755 -  ucp_Cham,
756 -  ucp_Kayah_Li,
757 -  ucp_Lepcha,
758 -  ucp_Lycian,
759 -  ucp_Lydian,
760 -  ucp_Ol_Chiki,
761 -  ucp_Rejang,
762 -  ucp_Saurashtra,
763 -  ucp_Sundanese,
764 -  ucp_Vai,
765 +  ucp_Carian = G_UNICODE_SCRIPT_CARIAN,
766 +  ucp_Cham = G_UNICODE_SCRIPT_CHAM,
767 +  ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI,
768 +  ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA,
769 +  ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN,
770 +  ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN,
771 +  ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI,
772 +  ucp_Rejang = G_UNICODE_SCRIPT_REJANG,
773 +  ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA,
774 +  ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE,
775 +  ucp_Vai = G_UNICODE_SCRIPT_VAI,
776    /* New for Unicode 5.2: */
777 -  ucp_Avestan,
778 -  ucp_Bamum,
779 -  ucp_Egyptian_Hieroglyphs,
780 -  ucp_Imperial_Aramaic,
781 -  ucp_Inscriptional_Pahlavi,
782 -  ucp_Inscriptional_Parthian,
783 -  ucp_Javanese,
784 -  ucp_Kaithi,
785 -  ucp_Lisu,
786 -  ucp_Meetei_Mayek,
787 -  ucp_Old_South_Arabian,
788 -  ucp_Old_Turkic,
789 -  ucp_Samaritan,
790 -  ucp_Tai_Tham,
791 -  ucp_Tai_Viet,
792 +  ucp_Avestan = G_UNICODE_SCRIPT_AVESTAN,
793 +  ucp_Bamum = G_UNICODE_SCRIPT_BAMUM,
794 +  ucp_Egyptian_Hieroglyphs = G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS,
795 +  ucp_Imperial_Aramaic = G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC,
796 +  ucp_Inscriptional_Pahlavi = G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI,
797 +  ucp_Inscriptional_Parthian = G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN,
798 +  ucp_Javanese = G_UNICODE_SCRIPT_JAVANESE,
799 +  ucp_Kaithi = G_UNICODE_SCRIPT_KAITHI,
800 +  ucp_Lisu = G_UNICODE_SCRIPT_LISU,
801 +  ucp_Meetei_Mayek = G_UNICODE_SCRIPT_MEETEI_MAYEK,
802 +  ucp_Old_South_Arabian = G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN,
803 +  ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKIC,
804 +  ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN,
805 +  ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM,
806 +  ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET,
807    /* New for Unicode 6.0.0: */
808 -  ucp_Batak,
809 -  ucp_Brahmi,
810 -  ucp_Mandaic,
811 +  ucp_Batak = G_UNICODE_SCRIPT_BATAK,
812 +  ucp_Brahmi = G_UNICODE_SCRIPT_BRAHMI,
813 +  ucp_Mandaic = G_UNICODE_SCRIPT_MANDAIC,
814    /* New for Unicode 6.1.0: */
815 -  ucp_Chakma,
816 -  ucp_Meroitic_Cursive,
817 -  ucp_Meroitic_Hieroglyphs,
818 -  ucp_Miao,
819 -  ucp_Sharada,
820 -  ucp_Sora_Sompeng,
821 -  ucp_Takri
822 +  ucp_Chakma = G_UNICODE_SCRIPT_CHAKMA,
823 +  ucp_Meroitic_Cursive = G_UNICODE_SCRIPT_MEROITIC_CURSIVE,
824 +  ucp_Meroitic_Hieroglyphs = G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS,
825 +  ucp_Miao = G_UNICODE_SCRIPT_MIAO,
826 +  ucp_Sharada = G_UNICODE_SCRIPT_SHARADA,
827 +  ucp_Sora_Sompeng = G_UNICODE_SCRIPT_SORA_SOMPENG,
828 +  ucp_Takri = G_UNICODE_SCRIPT_TAKRI,
829  };
830  
831  #endif
832 -- 
833 1.7.5.1.217.g4e3aa.dirty
834