Update.

author Ulrich Drepper <drepper@redhat.com>

Thu, 20 Nov 2003 23:36:40 +0000 (23:36 +0000)

committer Ulrich Drepper <drepper@redhat.com>

Thu, 20 Nov 2003 23:36:40 +0000 (23:36 +0000)
author Ulrich Drepper <drepper@redhat.com>
Thu, 20 Nov 2003 23:36:40 +0000 (23:36 +0000)
committer Ulrich Drepper <drepper@redhat.com>
Thu, 20 Nov 2003 23:36:40 +0000 (23:36 +0000)
diff --git a/ChangeLog b/ChangeLog

index 4fa706acc8283c4b88a5c099a0fdff13d298fb56..4eaa92c6192d338a213805ed48ca8c9450b744ae 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,35 @@
+2003-11-20  Ulrich Drepper  <drepper@redhat.com>
+
+       * posix/PTESTS: Fix first test in GA143.
+
+2003-11-20  Jakub Jelinek  <jakub@redhat.com>
+
+       * posix/regex_internal.h (re_dfastate_t): Remove trtable_search.
+       Add word_trtable.
+       * posix/regex_internal.c (create_newstate_common, free_state):
+       Don't free trtable_search.
+       * posix/regexec.c (check_matching): Remove fl_search argument.
+       (transit_state_sb): Likewise.  #ifdef out as unused.
+       (build_trtable): Remove fl_search argument.  Set state->word_trtable
+       and state->trtable.  Build separate word and non-word tables if
+       multi-byte and they differ for some character.
+       (transit_state): Remove fl_search argument.  Don't update
+       state->trtable here.  Handle state->word_trtable.
+       #ifdef out unused call to transit_state_sb.
+       (re_search_internal): Update check_matching caller.
+       (group_nodes_into_DFAstates): Don't clear non-ascii chars in accepts
+       bitmask for multi-byte locales.
+       * posix/bug-regex19.c (tests): Enable some commented out tests, add
+       2 new tests.
+       * posix/tst-rxspencer.c (mb_tests): Don't test [[=b=]] for now as
+       multi-byte.  Don't run identical multi-byte tests multiple times
+       unnecessarily.
+       (main): Check setlocale return value.
+       * posix/Makefile (tst-rxspencer-ARGS): Add --utf8 argument.
+       (tst-rxspencer-ENV): Remove MALLOC_TRACE, add LOCPATH.
+       ($(objpfx)tst-rxspencer-mem): Run another tst-rxspencer test
+       here, without --utf8 argument but with MALLOC_TRACE.
+
  2003-11-19  Jakub Jelinek  <jakub@redhat.com>
  
         * posix/regexec.c (extend_buffers): Don't allocate
diff --git a/localedata/ChangeLog b/localedata/ChangeLog

index 77ace9d0ce8b528b46d78889ef36bf9b68e5dac1..fcce659c0c18d5ad13f0170a9e8af0d1bd28976c 100644 (file)
--- a/localedata/ChangeLog
+++ b/localedata/ChangeLog
@@ -1,3 +1,7 @@
+2003-11-20  Jakub Jelinek  <jakub@redhat.com>
+
+       * Makefile (LOCALES): Add cs_CZ.UTF-8.
+
  2003-11-15  Ulrich Drepper  <drepper@redhat.com>
  
         * Makefile (tst-leaks-ENV): Add LOCPATH.
diff --git a/localedata/Makefile b/localedata/Makefile

index ebba83a42a1816de81785c9d5a977b3e2b93e79d..68dd9e8feffcc28eccc7435b5dd88d7ca0023579 100644 (file)
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -132,7 +132,7 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \
            en_US.ISO-8859-1 ja_JP.EUC-JP da_DK.ISO-8859-1 \
            hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 ja_JP.SJIS fr_FR.ISO-8859-1 \
            vi_VN.TCVN5712-1 nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 \
-          tr_TR.UTF-8
+          tr_TR.UTF-8 cs_CZ.UTF-8
  LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g')
  CHARMAPS := $(shell echo "$(LOCALES)" | \
                     sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g)
diff --git a/posix/Makefile b/posix/Makefile

index 692b474764b23210a0e51e2ba18aea5bb44d347e..c74b6312418770db510a80fafcbf34346450a3e7 100644 (file)
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -148,7 +148,6 @@ tst-exec-ARGS = -- $(built-program-cmd)
  tst-spawn-ARGS = -- $(built-program-cmd)
  tst-dir-ARGS = `pwd` `cd $(common-objdir)/$(subdir); pwd` `cd $(common-objdir); pwd` $(objpfx)tst-dir
  tst-chmod-ARGS = `pwd`
-tst-rxspencer-ARGS = rxspencer/tests
  
  tst-fnmatch-ENV = LOCPATH=$(common-objpfx)localedata
  tst-regexloc-ENV = LOCPATH=$(common-objpfx)localedata
@@ -160,6 +159,8 @@ bug-regex17-ENV = LOCPATH=$(common-objpfx)localedata
  bug-regex18-ENV = LOCPATH=$(common-objpfx)localedata
  bug-regex19-ENV = LOCPATH=$(common-objpfx)localedata
  bug-regex20-ENV = LOCPATH=$(common-objpfx)localedata
+tst-rxspencer-ARGS = --utf8 rxspencer/tests
+tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata
  
  testcases.h: TESTS TESTS2C.sed
         sed -f TESTS2C.sed < $< > $@T
@@ -207,9 +208,13 @@ bug-regex21-ENV = MALLOC_TRACE=$(objpfx)bug-regex21.mtrace
  $(objpfx)bug-regex21-mem: $(objpfx)bug-regex21.out
         $(common-objpfx)malloc/mtrace $(objpfx)bug-regex21.mtrace > $@
  
-tst-rxspencer-ENV = MALLOC_TRACE=$(objpfx)tst-rxspencer.mtrace
-
+# tst-rxspencer.mtrace is generated only when run without --utf8
+# option, since otherwise the file has almost 100M and takes very long
+# time to process.
  $(objpfx)tst-rxspencer-mem: $(objpfx)tst-rxspencer.out
+       MALLOC_TRACE=$(objpfx)tst-rxspencer.mtrace $(tst-rxspencer-ENV) \
+         $(run-program-prefix) $(objpfx)tst-rxspencer rxspencer/tests \
+         > /dev/null
         $(common-objpfx)malloc/mtrace $(objpfx)tst-rxspencer.mtrace > $@
  
  $(objpfx)tst-getconf.out: tst-getconf.sh $(objpfx)getconf
diff --git a/posix/PTESTS b/posix/PTESTS

index 8235384fec1abb2a21808928f3e8f78f8d514351..7d2676e20f11057f6207d48731e90211f485a8c5 100644 (file)
--- a/posix/PTESTS
+++ b/posix/PTESTS
@@ -270,7 +270,7 @@
  1¦63¦a\{1,63\}¦aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa¦
  # 2.8.3.4  BRE Precedence
  # GA143
-2¦20¦\^\[[[.].]]\\(\\1\\)\\*\\{1,2\\}\$¦a^[]\(1\)\*\{1,2\}$b¦
+2¦20¦\^\[[[.].]]\\(\\1\\)\*\\{1,2\\}\$¦a^[]\(1\)*\{1,2\}$b¦
  1¦6¦[[=*=]][[=\=]][[=]=]][[===]][[...]][[:punct:]]¦*\]=.;¦
  1¦6¦[$\(*\)^]*¦$\()*^¦
  1¦1¦[\1]¦1¦
diff --git a/posix/bug-regex19.c b/posix/bug-regex19.c

index 837ab654bc84890c85d97ce01dfd49893f33b772..fb870338c3655edeb1499a74d6ebc7b05832078a 100644 (file)
--- a/posix/bug-regex19.c
+++ b/posix/bug-regex19.c
@@ -37,17 +37,21 @@ static struct
       \xc3\x96          LATIN CAPITAL LETTER O WITH DIAERESIS
       \xe2\x80\x94      EM DASH  */
    /* Should not match.  */
+  {RE_SYNTAX_POSIX_BASIC, "\\<A", "aOAA", 0, -1},
    {RE_SYNTAX_POSIX_BASIC, "\\<A", "aOAA", 2, -1},
    {RE_SYNTAX_POSIX_BASIC, "A\\>", "aAAO", 1, -1},
+  {RE_SYNTAX_POSIX_BASIC, "\\bA", "aOAA", 0, -1},
    {RE_SYNTAX_POSIX_BASIC, "\\bA", "aOAA", 2, -1},
    {RE_SYNTAX_POSIX_BASIC, "A\\b", "aAAO", 1, -1},
+  {RE_SYNTAX_POSIX_BASIC, "\\<\xc3\x84", "a\xc3\x96\xc3\x84\xc3\x84", 0, -1},
    {RE_SYNTAX_POSIX_BASIC, "\\<\xc3\x84", "a\xc3\x96\xc3\x84\xc3\x84", 3, -1},
    {RE_SYNTAX_POSIX_BASIC, "\xc3\x84\\>", "a\xc3\x84\xc3\x84\xc3\x96", 1, -1},
  #if 0
-  /* XXX Not used since they fail so far.  */
+  /* XXX these 2 tests still fail.  */
+  {RE_SYNTAX_POSIX_BASIC, "\\b\xc3\x84", "a\xc3\x96\xc3\x84\xc3\x84", 0, -1},
    {RE_SYNTAX_POSIX_BASIC, "\\b\xc3\x84", "a\xc3\x96\xc3\x84\xc3\x84", 3, -1},
-  {RE_SYNTAX_POSIX_BASIC, "\xc3\x84\\b", "a\xc3\x84\xc3\x84\xc3\x96", 1, -1},
  #endif
+  {RE_SYNTAX_POSIX_BASIC, "\xc3\x84\\b", "a\xc3\x84\xc3\x84\xc3\x96", 1, -1},
    /* Should match.  */
    {RE_SYNTAX_POSIX_BASIC, "\\<A", "AA", 0, 0},
    {RE_SYNTAX_POSIX_BASIC, "\\<A", "a-AA", 2, 2},
@@ -57,8 +61,6 @@ static struct
    {RE_SYNTAX_POSIX_BASIC, "\\bA", "a-AA", 2, 2},
    {RE_SYNTAX_POSIX_BASIC, "A\\b", "aAA-", 1, 2},
    {RE_SYNTAX_POSIX_BASIC, "A\\b", "aAA", 1, 2},
-#if 0
-  /* XXX Not used since they fail so far.  */
    {RE_SYNTAX_POSIX_BASIC, "\\<\xc3\x84", "\xc3\x84\xc3\x84", 0, 0},
    {RE_SYNTAX_POSIX_BASIC, "\\<\xc3\x84", "a\xe2\x80\x94\xc3\x84\xc3\x84", 4, 4},
    {RE_SYNTAX_POSIX_BASIC, "\xc3\x84\\>", "a\xc3\x84\xc3\x84\xe2\x80\x94", 1, 3},
@@ -67,7 +69,6 @@ static struct
    {RE_SYNTAX_POSIX_BASIC, "\\b\xc3\x84", "a\xe2\x80\x94\xc3\x84\xc3\x84", 4, 4},
    {RE_SYNTAX_POSIX_BASIC, "\xc3\x84\\b", "a\xc3\x84\xc3\x84\xe2\x80\x94", 1, 3},
    {RE_SYNTAX_POSIX_BASIC, "\xc3\x84\\b", "a\xc3\x84\xc3\x84", 1, 3}
-#endif
  };
  
  int
diff --git a/posix/regex_internal.c b/posix/regex_internal.c

index 859fe16c616eba0bc50551f460609343e922842e..71496ab90692c4d336c31bb271d4af3d99d6b4d5 100644 (file)
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -1207,7 +1207,6 @@ create_newstate_common (dfa, nodes, hash)
        return NULL;
      }
    newstate->trtable = NULL;
-  newstate->trtable_search = NULL;
    newstate->hash = hash;
    return newstate;
  }
@@ -1369,6 +1368,5 @@ free_state (state)
      }
    re_node_set_free (&state->nodes);
    re_free (state->trtable);
-  re_free (state->trtable_search);
    re_free (state);
  }
diff --git a/posix/regex_internal.h b/posix/regex_internal.h

index 628dc94066a21face9d41fcf725e1f4884fcb615..0230b5d73e26bc1159cc08a9dead4a76036444b3 100644 (file)
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -456,7 +456,6 @@ struct re_dfastate_t
    re_node_set nodes;
    re_node_set *entrance_nodes;
    struct re_dfastate_t **trtable;
-  struct re_dfastate_t **trtable_search;
    /* If this state is a special state.
       A state is a special state if the state is the halt state, or
       a anchor.  */
@@ -469,6 +468,7 @@ struct re_dfastate_t
    /* If this state has backreference node(s).  */
    unsigned int has_backref : 1;
    unsigned int has_constraint : 1;
+  unsigned int word_trtable : 1;
  };
  typedef struct re_dfastate_t re_dfastate_t;
  
diff --git a/posix/regexec.c b/posix/regexec.c

index 4688c9babb81c557ccdf606043517acd76f071e1..91c48b3c4e0e3aeb73719abc70e28d06bf5bfaf3 100644 (file)
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -57,7 +57,7 @@ static re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
  static reg_errcode_t prune_impossible_nodes (const regex_t *preg,
                                              re_match_context_t *mctx);
  static int check_matching (const regex_t *preg, re_match_context_t *mctx,
-                          int fl_search, int fl_longest_match);
+                          int fl_longest_match);
  static int check_halt_node_context (const re_dfa_t *dfa, int node,
                                     unsigned int context);
  static int check_halt_state_context (const regex_t *preg,
@@ -123,15 +123,16 @@ static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
                                         re_dfastate_t **src, int num);
  static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
                                      re_match_context_t *mctx,
-                                    re_dfastate_t *state, int fl_search);
+                                    re_dfastate_t *state);
  static reg_errcode_t check_subexp_matching_top (re_dfa_t *dfa,
                                                 re_match_context_t *mctx,
                                                 re_node_set *cur_nodes,
                                                 int str_idx);
+#if 0
  static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
                                         re_dfastate_t *pstate,
-                                       int fl_search,
                                         re_match_context_t *mctx);
+#endif
  #ifdef RE_ENABLE_I18N
  static reg_errcode_t transit_state_mb (const regex_t *preg,
                                        re_dfastate_t *pstate,
@@ -173,8 +174,7 @@ static reg_errcode_t expand_bkref_cache (const regex_t *preg,
                                          int last_str, int subexp_num,
                                          int fl_open);
  static re_dfastate_t **build_trtable (const regex_t *dfa,
-                                     const re_dfastate_t *state,
-                                     int fl_search);
+                                     re_dfastate_t *state);
  #ifdef RE_ENABLE_I18N
  static int check_node_accept_bytes (const regex_t *preg, int node_idx,
                                     const re_string_t *input, int idx);
@@ -741,7 +741,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
           /* It seems to be appropriate one, then use the matcher.  */
           /* We assume that the matching starts from 0.  */
           mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
-         match_last = check_matching (preg, &mctx, 0, fl_longest_match);
+         match_last = check_matching (preg, &mctx, fl_longest_match);
           if (match_last != -1)
             {
               if (BE (match_last == -2, 0))
@@ -919,8 +919,8 @@ acquire_init_state_context (err, preg, mctx, idx)
    if (dfa->init_state->has_constraint)
      {
        unsigned int context;
-      context =  re_string_context_at (mctx->input, idx - 1, mctx->eflags,
-                                      preg->newline_anchor);
+      context = re_string_context_at (mctx->input, idx - 1, mctx->eflags,
+                                     preg->newline_anchor);
        if (IS_WORD_CONTEXT (context))
         return dfa->init_state_word;
        else if (IS_ORDINARY_CONTEXT (context))
@@ -947,16 +947,15 @@ acquire_init_state_context (err, preg, mctx, idx)
  /* Check whether the regular expression match input string INPUT or not,
     and return the index where the matching end, return -1 if not match,
     or return -2 in case of an error.
-   FL_SEARCH means we must search where the matching starts,
     FL_LONGEST_MATCH means we want the POSIX longest matching.
     Note that the matcher assume that the maching starts from the current
     index of the buffer.  */
  
  static int
-check_matching (preg, mctx, fl_search, fl_longest_match)
+check_matching (preg, mctx, fl_longest_match)
      const regex_t *preg;
      re_match_context_t *mctx;
-    int fl_search, fl_longest_match;
+    int fl_longest_match;
  {
    re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
    reg_errcode_t err;
@@ -1006,31 +1005,15 @@ check_matching (preg, mctx, fl_search, fl_longest_match)
  
    while (!re_string_eoi (mctx->input))
      {
-      cur_state = transit_state (&err, preg, mctx, cur_state,
-                                fl_search && !match);
+      cur_state = transit_state (&err, preg, mctx, cur_state);
        if (cur_state == NULL) /* Reached at the invalid state or an error.  */
         {
           cur_str_idx = re_string_cur_idx (mctx->input);
           if (BE (err != REG_NOERROR, 0))
             return -2;
-         if (fl_search && !match)
-           {
-             /* Restart from initial state, since we are searching
-                the point from where matching start.  */
-#ifdef RE_ENABLE_I18N
-             if (dfa->mb_cur_max == 1
-                 || re_string_first_byte (mctx->input, cur_str_idx))
-#endif /* RE_ENABLE_I18N */
-               cur_state = acquire_init_state_context (&err, preg, mctx,
-                                                       cur_str_idx);
-             if (BE (cur_state == NULL && err != REG_NOERROR, 0))
-               return -2;
-             if (mctx->state_log != NULL)
-               mctx->state_log[cur_str_idx] = cur_state;
-           }
-         else if (!fl_longest_match && match)
+         if (!fl_longest_match && match)
             break;
-         else /* (fl_longest_match && match) || (!fl_search && !match)  */
+         else
             {
               if (mctx->state_log == NULL)
                 break;
@@ -2069,12 +2052,11 @@ sift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx)
     update the destination of STATE_LOG.  */
  
  static re_dfastate_t *
-transit_state (err, preg, mctx, state, fl_search)
+transit_state (err, preg, mctx, state)
       reg_errcode_t *err;
       const regex_t *preg;
       re_match_context_t *mctx;
       re_dfastate_t *state;
-     int fl_search;
  {
    re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
    re_dfastate_t **trtable, *next_state;
@@ -2113,24 +2095,40 @@ transit_state (err, preg, mctx, state, fl_search)
         {
           /* Use transition table  */
           ch = re_string_fetch_byte (mctx->input);
-         trtable = fl_search ? state->trtable_search : state->trtable;
+         trtable = state->trtable;
           if (trtable == NULL)
             {
-             trtable = build_trtable (preg, state, fl_search);
-             if (fl_search)
-               state->trtable_search = trtable;
+             trtable = build_trtable (preg, state);
+             if (trtable == NULL)
+               {
+                 *err = REG_ESPACE;
+                 return NULL;
+               }
+           }
+         if (BE (state->word_trtable, 0))
+           {
+             unsigned int context;
+             context
+               = re_string_context_at (mctx->input,
+                                       re_string_cur_idx (mctx->input) - 1,
+                                       mctx->eflags, preg->newline_anchor);
+             if (IS_WORD_CONTEXT (context))
+               next_state = trtable[ch + SBC_MAX];
               else
-               state->trtable = trtable;
+               next_state = trtable[ch];
             }
-         next_state = trtable[ch];
+         else
+           next_state = trtable[ch];
         }
+#if 0
        else
         {
           /* don't use transition table  */
-         next_state = transit_state_sb (err, preg, state, fl_search, mctx);
+         next_state = transit_state_sb (err, preg, state, mctx);
           if (BE (next_state == NULL && err != REG_NOERROR, 0))
             return NULL;
         }
+#endif
      }
  
    cur_idx = re_string_cur_idx (mctx->input);
@@ -2242,15 +2240,15 @@ check_subexp_matching_top (dfa, mctx, cur_nodes, str_idx)
    return REG_NOERROR;
  }
  
+#if 0
  /* Return the next state to which the current state STATE will transit by
     accepting the current input byte.  */
  
  static re_dfastate_t *
-transit_state_sb (err, preg, state, fl_search, mctx)
+transit_state_sb (err, preg, state, mctx)
       reg_errcode_t *err;
       const regex_t *preg;
       re_dfastate_t *state;
-     int fl_search;
       re_match_context_t *mctx;
  {
    re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
@@ -2276,29 +2274,6 @@ transit_state_sb (err, preg, state, fl_search, mctx)
             }
         }
      }
-  if (fl_search)
-    {
-#ifdef RE_ENABLE_I18N
-      int not_initial = 0;
-      if (dfa->mb_cur_max > 1)
-       for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt)
-         if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER)
-           {
-             not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial;
-             break;
-           }
-      if (!not_initial)
-#endif
-       {
-         *err = re_node_set_merge (&next_nodes,
-                                   dfa->init_state->entrance_nodes);
-         if (BE (*err != REG_NOERROR, 0))
-           {
-             re_node_set_free (&next_nodes);
-             return NULL;
-           }
-       }
-    }
    context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags,
                                   preg->newline_anchor);
    next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
@@ -2309,6 +2284,7 @@ transit_state_sb (err, preg, state, fl_search, mctx)
    re_string_skip_bytes (mctx->input, 1);
    return next_state;
  }
+#endif
  
  #ifdef RE_ENABLE_I18N
  static reg_errcode_t
@@ -3117,10 +3093,9 @@ expand_bkref_cache (preg, mctx, cur_nodes, cur_str, last_str, subexp_num,
     Return the new table if succeeded, otherwise return NULL.  */
  
  static re_dfastate_t **
-build_trtable (preg, state, fl_search)
+build_trtable (preg, state)
      const regex_t *preg;
-    const re_dfastate_t *state;
-    int fl_search;
+    re_dfastate_t *state;
  {
    reg_errcode_t err;
    re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
@@ -3154,6 +3129,7 @@ build_trtable (preg, state, fl_search)
  
    /* Initialize transiton table.  */
    trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
+  state->word_trtable = 0;
    if (BE (trtable == NULL, 0))
      {
        if (dests_node_malloced)
@@ -3170,7 +3146,10 @@ build_trtable (preg, state, fl_search)
         free (dests_node);
        /* Return NULL in case of an error, trtable otherwise.  */
        if (ndests == 0)
-       return trtable;
+       {
+         state->trtable = trtable;
+         return trtable;
+       }
        free (trtable);
        return NULL;
      }
@@ -3224,26 +3203,6 @@ out_free:
                 goto out_free;
             }
         }
-      /* If search flag is set, merge the initial state.  */
-      if (fl_search)
-       {
-#ifdef RE_ENABLE_I18N
-         int not_initial = 0;
-         for (j = 0; j < follows.nelem; ++j)
-           if (dfa->nodes[follows.elems[j]].type == CHARACTER)
-             {
-               not_initial = dfa->nodes[follows.elems[j]].mb_partial;
-               break;
-             }
-         if (!not_initial)
-#endif
-           {
-             err = re_node_set_merge (&follows,
-                                      dfa->init_state->entrance_nodes);
-             if (BE (err != REG_NOERROR, 0))
-               goto out_free;
-           }
-       }
        dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
        if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
         goto out_free;
@@ -3274,31 +3233,41 @@ out_free:
      for (j = 0; j < UINT_BITS; ++j, ++ch)
        if ((acceptable[i] >> j) & 1)
         {
-         /* The current state accepts the character ch.  */
-         if (IS_WORD_CHAR (ch))
-           {
-             for (k = 0; k < ndests; ++k)
-               if ((dests_ch[k][i] >> j) & 1)
+         for (k = 0; k < ndests; ++k)
+           if ((dests_ch[k][i] >> j) & 1)
+             {
+               /* k-th destination accepts the word character ch.  */
+               if (state->word_trtable)
                   {
-                   /* k-th destination accepts the word character ch.  */
-                   trtable[ch] = dest_states_word[k];
-                   /* There must be only one destination which accepts
-                      character ch.  See group_nodes_into_DFAstates.  */
-                   break;
+                   trtable[ch] = dest_states[k];
+                   trtable[ch + SBC_MAX] = dest_states_word[k];
                   }
-           }
-         else /* not WORD_CHAR */
-           {
-             for (k = 0; k < ndests; ++k)
-               if ((dests_ch[k][i] >> j) & 1)
+               else if (dfa->mb_cur_max > 1
+                        && dest_states[k] != dest_states_word[k])
                   {
-                   /* k-th destination accepts the non-word character ch.  */
+                   re_dfastate_t **new_trtable;
+
+                   new_trtable = (re_dfastate_t **)
+                                 realloc (trtable,
+                                          sizeof (re_dfastate_t *)
+                                          * 2 * SBC_MAX);
+                   if (BE (new_trtable == NULL, 0))
+                     goto out_free;
+                   memcpy (new_trtable + SBC_MAX, new_trtable,
+                           sizeof (re_dfastate_t *) * SBC_MAX);
+                   trtable = new_trtable;
+                   state->word_trtable = 1;
                     trtable[ch] = dest_states[k];
-                   /* There must be only one destination which accepts
-                      character ch.  See group_nodes_into_DFAstates.  */
-                   break;
+                   trtable[ch + SBC_MAX] = dest_states_word[k];
                   }
-           }
+               else if (IS_WORD_CHAR (ch))
+                 trtable[ch] = dest_states_word[k];
+               else
+                 trtable[ch] = dest_states[k];
+               /* There must be only one destination which accepts
+                  character ch.  See group_nodes_into_DFAstates.  */
+               break;
+             }
         }
    /* new line */
    if (bitset_contain (acceptable, NEWLINE_CHAR))
@@ -3309,6 +3278,8 @@ out_free:
           {
             /* k-th destination accepts newline character.  */
             trtable[NEWLINE_CHAR] = dest_states_nl[k];
+           if (state->word_trtable)
+             trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[k];
             /* There must be only one destination which accepts
                newline.  See group_nodes_into_DFAstates.  */
             break;
@@ -3325,6 +3296,7 @@ out_free:
    if (dests_node_malloced)
      free (dests_node);
  
+  state->trtable = trtable;
    return trtable;
  }
  
@@ -3386,6 +3358,8 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
          match it the context.  */
        if (constraint)
         {
+         int word_char_max;
+
           if (constraint & NEXT_NEWLINE_CONSTRAINT)
             {
               int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
@@ -3400,11 +3374,16 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
               bitset_empty (accepts);
               continue;
             }
+
+         /* This assumes ASCII compatible locale.  We cannot say
+            anything about the non-ascii chars.  */
+         word_char_max
+           = dfa->mb_cur_max > 1 ? BITSET_UINTS / 2 : BITSET_UINTS;
           if (constraint & NEXT_WORD_CONSTRAINT)
-           for (j = 0; j < BITSET_UINTS; ++j)
+           for (j = 0; j < word_char_max; ++j)
               accepts[j] &= dfa->word_char[j];
           if (constraint & NEXT_NOTWORD_CONSTRAINT)
-           for (j = 0; j < BITSET_UINTS; ++j)
+           for (j = 0; j < word_char_max; ++j)
               accepts[j] &= ~dfa->word_char[j];
         }
  
diff --git a/posix/tst-rxspencer.c b/posix/tst-rxspencer.c

index 45bafda7a7348751502a1e726a3dba0a09dadaf8..1b4b56f333dd6a7a35b5980af10db72ef5625048 100644 (file)
--- a/posix/tst-rxspencer.c
+++ b/posix/tst-rxspencer.c
@@ -350,16 +350,28 @@ mb_tests (const char *pattern, int cflags, const char *string, int eflags,
    if (strstr (pattern, "[:xdigit:]"))
      return 0;
  
+  /* XXX: regex ATM handles only single byte equivalence classes.  */
+  if (strstr (pattern, "[[=b=]]"))
+    return 0;
+
    for (i = 1; i < 16; ++i)
      {
        char *p = letters;
-      if (i & 1)
+      if ((i & 1)
+         && (strchr (pattern, 'a') || strchr (string, 'a')
+             || strchr (pattern, 'A') || strchr (string, 'A')))
         *p++ = 'a', *p++ = 'A';
-      if (i & 2)
+      if ((i & 2)
+         && (strchr (pattern, 'b') || strchr (string, 'b')
+             || strchr (pattern, 'B') || strchr (string, 'B')))
          *p++ = 'b', *p++ = 'B';
-      if (i & 4)
+      if ((i & 4)
+         && (strchr (pattern, 'c') || strchr (string, 'c')
+             || strchr (pattern, 'C') || strchr (string, 'C')))
          *p++ = 'c', *p++ = 'C';
-      if (i & 8)
+      if ((i & 8)
+         && (strchr (pattern, 'd') || strchr (string, 'd')
+             || strchr (pattern, 'D') || strchr (string, 'D')))
          *p++ = 'd', *p++ = 'D';
        *p++ = '\0';
        sprintf (fail, "UTF-8 %s FAIL", letters);
@@ -489,7 +501,11 @@ main (int argc, char **argv)
             replace_special_chars (matches);
          }
  
-      setlocale (LC_ALL, "C");
+      if (setlocale (LC_ALL, "C") == NULL)
+       {
+         puts ("setlocale C failed");
+         ret = 1;
+       }
        if (test (pattern, cflags, string, eflags, expect, matches, "FAIL")
           || (try_bre_ere
               && test (pattern, cflags & ~REG_EXTENDED, string, eflags,
@@ -497,12 +513,16 @@ main (int argc, char **argv)
         ret = 1;
        else if (test_utf8)
         {
-         setlocale (LC_ALL, "cs_CZ.UTF-8");
-         if (test (pattern, cflags, string, eflags, expect, matches,
-                   "UTF-8 FAIL")
-             || (try_bre_ere
-                 && test (pattern, cflags & ~REG_EXTENDED, string, eflags,
-                          expect, matches, "UTF-8 FAIL")))
+         if (setlocale (LC_ALL, "cs_CZ.UTF-8") == NULL)
+           {
+             puts ("setlocale cs_CZ.UTF-8 failed");
+             ret = 1;
+           }
+         else if (test (pattern, cflags, string, eflags, expect, matches,
+                        "UTF-8 FAIL")
+                  || (try_bre_ere
+                      && test (pattern, cflags & ~REG_EXTENDED, string,
+                               eflags, expect, matches, "UTF-8 FAIL")))
             ret = 1;
           else if (mb_tests (pattern, cflags, string, eflags, expect, matches)
                    || (try_bre_ere
author	Ulrich Drepper <drepper@redhat.com>
	Thu, 20 Nov 2003 23:36:40 +0000 (23:36 +0000)
committer	Ulrich Drepper <drepper@redhat.com>
	Thu, 20 Nov 2003 23:36:40 +0000 (23:36 +0000)
ChangeLog		patch \| blob \| history
localedata/ChangeLog		patch \| blob \| history
localedata/Makefile		patch \| blob \| history
posix/Makefile		patch \| blob \| history
posix/PTESTS		patch \| blob \| history
posix/bug-regex19.c		patch \| blob \| history
posix/regex_internal.c		patch \| blob \| history
posix/regex_internal.h		patch \| blob \| history
posix/regexec.c		patch \| blob \| history
posix/tst-rxspencer.c		patch \| blob \| history