From d8f73de86a4073070f5b626dc66a330d1597df88 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Mon, 6 Dec 2004 03:03:01 +0000 Subject: [PATCH] Update. 2004-12-01 Paolo Bonzini * posix/regcomp.c (free_dfa_content, init_dfa): Remove references to re_dfa_t's subexps field. (parse_sub_exp, parse_expression): Do not use it. Use completed_bkref_map instead. (create_initial_state, peek_token): Store a backreference \N with opr.idx = N-1. * posix/regexec.c (proceed_next_node, check_dst_limits, get_subexp): Likewise. (check_subexp_limits): Remove useless condition. * posix/regex_internal.h (re_subexp_t): Remove. (re_dfa_t): Remove subexps and subexps_alloc field, add completed_bkref_map. --- ChangeLog | 15 ++++++++ posix/regex_internal.h | 12 ++---- posix/regexec.c | 99 ++++++++++++++++++++++++-------------------------- 3 files changed, 65 insertions(+), 61 deletions(-) diff --git a/ChangeLog b/ChangeLog index b2b5a75..9903bc9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +2004-12-01 Paolo Bonzini + + * posix/regcomp.c (free_dfa_content, init_dfa): Remove + references to re_dfa_t's subexps field. + (parse_sub_exp, parse_expression): Do not use it. Use + completed_bkref_map instead. + (create_initial_state, peek_token): Store a backreference \N + with opr.idx = N-1. + * posix/regexec.c (proceed_next_node, check_dst_limits, get_subexp): + Likewise. + (check_subexp_limits): Remove useless condition. + * posix/regex_internal.h (re_subexp_t): Remove. + (re_dfa_t): Remove subexps and subexps_alloc field, add + completed_bkref_map. + 2004-12-05 Roland McGrath * Makeconfig: Comment typo fix. diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 703d409..1345067 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -496,13 +496,6 @@ struct re_dfastate_t }; typedef struct re_dfastate_t re_dfastate_t; -typedef struct -{ - /* start <= node < end */ - int start; - int end; -} re_subexp_t; - struct re_state_table_entry { int num; @@ -607,7 +600,6 @@ struct re_fail_stack_t struct re_dfa_t { - re_subexp_t *subexps; re_token_t *nodes; int nodes_alloc; int nodes_len; @@ -627,13 +619,15 @@ struct re_dfa_t int str_tree_storage_idx; /* number of subexpressions `re_nsub' is in regex_t. */ - int subexps_alloc; unsigned int state_hash_mask; int states_alloc; int init_node; int nbackref; /* The number of backreference in this dfa. */ + /* Bitmap expressing which backreference is used. */ unsigned int used_bkref_map; + unsigned int completed_bkref_map; + unsigned int has_plural_match : 1; /* If this dfa has "multibyte node", which is a backreference or a node which can accept multibyte character or multi character diff --git a/posix/regexec.c b/posix/regexec.c index 5877ade..22b8064 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -1260,7 +1260,7 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs) #endif /* RE_ENABLE_I18N */ if (type == OP_BACK_REF) { - int subexp_idx = dfa->nodes[node].opr.idx; + int subexp_idx = dfa->nodes[node].opr.idx + 1; naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; if (fs != NULL) { @@ -1853,7 +1853,7 @@ check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx) int subexp_idx; struct re_backref_cache_entry *ent; ent = mctx->bkref_ents + limits->elems[lim_idx]; - subexp_idx = dfa->nodes[ent->node].opr.idx - 1; + subexp_idx = dfa->nodes[ent->node].opr.idx; dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], subexp_idx, dst_node, dst_idx, @@ -1891,49 +1891,48 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx) switch (dfa->nodes[node].type) { case OP_BACK_REF: - { - struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; - do - { - int dst, cpos; - - if (ent->node != node) - continue; - - if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map) - && (ent->eps_reachable_subexps_map - & (1 << (subexp_idx - 1))) == 0) - continue; + if (bkref_idx != -1) + { + struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; + do + { + int dst, cpos; - /* Recurse trying to reach the OP_OPEN_SUBEXP and - OP_CLOSE_SUBEXP cases below. But, if the - destination node is the same node as the source - node, don't recurse because it would cause an - infinite loop: a regex that exhibits this behavior - is ()\1*\1* */ - dst = dfa->edests[node].elems[0]; - if (dst == from_node) - { - if (boundaries & 1) - return -1; - else /* if (boundaries & 2) */ - return 0; - } + if (ent->node != node) + continue; - cpos = check_dst_limits_calc_pos_1 (mctx, boundaries, - subexp_idx, dst, bkref_idx); + if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map) + && !(ent->eps_reachable_subexps_map & (1 << subexp_idx))) + continue; - if (cpos == -1 /* && (boundaries & 1) */) - return -1; + /* Recurse trying to reach the OP_OPEN_SUBEXP and + OP_CLOSE_SUBEXP cases below. But, if the + destination node is the same node as the source + node, don't recurse because it would cause an + infinite loop: a regex that exhibits this behavior + is ()\1*\1* */ + dst = dfa->edests[node].elems[0]; + if (dst == from_node) + { + if (boundaries & 1) + return -1; + else /* if (boundaries & 2) */ + return 0; + } - if (cpos == 0 && (boundaries & 2)) - return 0; + cpos = + check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + dst, bkref_idx); + if (cpos == -1 /* && (boundaries & 1) */) + return -1; + if (cpos == 0 && (boundaries & 2)) + return 0; - ent->eps_reachable_subexps_map &= ~(1 << (subexp_idx - 1)); - } - while (ent++->more); - break; - } + ent->eps_reachable_subexps_map &= ~(1 << subexp_idx); + } + while (ent++->more); + } + break; case OP_OPEN_SUBEXP: if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx) @@ -2003,7 +2002,7 @@ check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx) if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) continue; /* This is unrelated limitation. */ - subexp_idx = dfa->nodes[ent->node].opr.idx - 1; + subexp_idx = dfa->nodes[ent->node].opr.idx; if (ent->subexp_to == str_idx) { int ops_node = -1; @@ -2060,16 +2059,12 @@ check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx) { if (subexp_idx != dfa->nodes[node].opr.idx) continue; - if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx) - || (type == OP_OPEN_SUBEXP)) - { - /* It is against this limitation. - Remove it form the current sifted state. */ - err = sub_epsilon_src_nodes (dfa, node, dest_nodes, - candidates); - if (BE (err != REG_NOERROR, 0)) - return err; - } + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; } } } @@ -2656,7 +2651,7 @@ get_subexp (mctx, bkref_node, bkref_str_idx) while (entry++->more); } - subexp_num = dfa->nodes[bkref_node].opr.idx - 1; + subexp_num = dfa->nodes[bkref_node].opr.idx; /* For each sub expression */ for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx) -- 2.7.4