From a9388965cca987526247e93b96dc65f4ec63cc9e Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Thu, 28 Feb 2002 07:43:13 +0000 Subject: [PATCH] Update. 2002-02-28 Isamu Hasegawa * posix/regcomp.c (regcomp): Remove a redundant condition. (init_word_char): Add a check on malloc failure. (create_initial_state): Likewise. (duplicate_node): Likewise. (calc_eclosure): Likewise. (calc_eclosure_iter): Likewise. (parse_expression): Likewise. (parse_bracket_exp): Remove unnecessary malloc invocations. (build_equiv_class): Likewise. (build_charclass): Likewise. * posix/regex_internal.c (re_node_set_intersect): Add a check on malloc failure. (re_node_set_add_intersect): Likewise. (re_node_set_merge): Likewise. (re_acquire_state): Likewise. (re_acquire_state_context): Likewise. (create_newstate_common): Likewise. (register_state): Likewise. (create_ci_newstate): Likewise. (create_cd_newstate): Likewise. * posix/regex_internal.h: Fix prototypes of re_acquire_state and re_acquire_state_context. * posix/regexec.c (regexec): Suit it to the error handling of re_search_internal. (re_match): Likewise. (re_search): Likewise. (re_search_internal): Add a check on malloc failure. (acquire_init_state_context): Likewise. (check_matching): Likewise. (proceed_next_node): Likewise. (set_regs): Likewise. (sift_states_backward): Likewise. (sift_states_iter_bkref): Likewise. (add_epsilon_backreference): Likewise. (transit_state): Likewise. (transit_state_sb): Likewise. (transit_state_mb): Likewise. (transit_state_bkref_loop): Likewise. (build_trtable): Likewise. (group_nodes_into_DFAstates): Likewise. (match_ctx_init): Likewise. (match_ctx_add_entry): Likewise. --- ChangeLog | 45 +++++ posix/regcomp.c | 240 ++++++++++++++----------- posix/regex_internal.c | 139 +++++++++++---- posix/regex_internal.h | 5 +- posix/regexec.c | 468 ++++++++++++++++++++++++++++++++++--------------- 5 files changed, 614 insertions(+), 283 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7cc81d7..e17162f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,48 @@ +2002-02-28 Isamu Hasegawa + + * posix/regcomp.c (regcomp): Remove a redundant condition. + (init_word_char): Add a check on malloc failure. + (create_initial_state): Likewise. + (duplicate_node): Likewise. + (calc_eclosure): Likewise. + (calc_eclosure_iter): Likewise. + (parse_expression): Likewise. + (parse_bracket_exp): Remove unnecessary malloc invocations. + (build_equiv_class): Likewise. + (build_charclass): Likewise. + * posix/regex_internal.c (re_node_set_intersect): Add a check + on malloc failure. + (re_node_set_add_intersect): Likewise. + (re_node_set_merge): Likewise. + (re_acquire_state): Likewise. + (re_acquire_state_context): Likewise. + (create_newstate_common): Likewise. + (register_state): Likewise. + (create_ci_newstate): Likewise. + (create_cd_newstate): Likewise. + * posix/regex_internal.h: Fix prototypes of re_acquire_state + and re_acquire_state_context. + * posix/regexec.c (regexec): Suit it to the error handling of + re_search_internal. + (re_match): Likewise. + (re_search): Likewise. + (re_search_internal): Add a check on malloc failure. + (acquire_init_state_context): Likewise. + (check_matching): Likewise. + (proceed_next_node): Likewise. + (set_regs): Likewise. + (sift_states_backward): Likewise. + (sift_states_iter_bkref): Likewise. + (add_epsilon_backreference): Likewise. + (transit_state): Likewise. + (transit_state_sb): Likewise. + (transit_state_mb): Likewise. + (transit_state_bkref_loop): Likewise. + (build_trtable): Likewise. + (group_nodes_into_DFAstates): Likewise. + (match_ctx_init): Likewise. + (match_ctx_add_entry): Likewise. + 2002-02-27 Ulrich Drepper * elf/dl-load.c (_dl_map_object_from_fd): Always add SONAME to diff --git a/posix/regcomp.c b/posix/regcomp.c index 12da043..0b85f7d 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -63,7 +63,7 @@ static void re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, char *fastmap); static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len); -static void init_word_char (re_dfa_t *dfa); +static reg_errcode_t init_word_char (re_dfa_t *dfa); static void free_charset (re_charset_t *cset); static void free_workarea_compile (regex_t *preg); static reg_errcode_t create_initial_state (re_dfa_t *dfa); @@ -72,10 +72,11 @@ static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node); static void calc_first (re_dfa_t *dfa, bin_tree_t *node); static void calc_next (re_dfa_t *dfa, bin_tree_t *node); static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node); -static int duplicate_node (re_dfa_t *dfa, int org_idx, - unsigned int constraint); +static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx, + unsigned int constraint); static reg_errcode_t calc_eclosure (re_dfa_t *dfa); -static re_node_set calc_eclosure_iter (re_dfa_t *dfa, int node, int root); +static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, + int node, int root); static void calc_inveclosure (re_dfa_t *dfa); static int fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax); @@ -446,8 +447,8 @@ regcomp (preg, pattern, cflags) if (ret == REG_ERPAREN) ret = REG_EPAREN; - /* XXX Why the test for preg->fastmap != NULL? */ - if (ret == REG_NOERROR && preg->fastmap != NULL) + /* We have already checked preg->fastmap != NULL. */ + if (ret == REG_NOERROR) { /* Compute the fastmap now, since regexec cannot modify the pattern buffer. */ @@ -772,16 +773,19 @@ init_dfa (dfa, pat_len) "word". In this case "word" means that it is the word construction character used by some operators like "\<", "\>", etc. */ -static void +static reg_errcode_t init_word_char (dfa) re_dfa_t *dfa; { int i, j, ch; dfa->word_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1); + if (dfa->word_char == NULL) + return REG_ESPACE; for (i = 0, ch = 0; i < BITSET_UINTS; ++i) for (j = 0; j < UINT_BITS; ++j, ++ch) if (isalnum (ch) || ch == '_') dfa->word_char[i] |= 1 << j; + return REG_NOERROR; } /* Free the work area which are only used while compiling. */ @@ -844,24 +848,28 @@ create_initial_state (dfa) } /* It must be the first time to invoke acquire_state. */ - dfa->init_state = re_acquire_state_context (dfa, &init_nodes, 0); + dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); + /* We don't check ERR here, since the initial state must not be NULL. */ + if (dfa->init_state == NULL) + return err; if (dfa->init_state->has_constraint) { - dfa->init_state_word = re_acquire_state_context (dfa, &init_nodes, + dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, CONTEXT_WORD); - dfa->init_state_nl = re_acquire_state_context (dfa, &init_nodes, + dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, CONTEXT_NEWLINE); - dfa->init_state_begbuf = re_acquire_state_context (dfa, &init_nodes, + dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, + &init_nodes, CONTEXT_NEWLINE | CONTEXT_BEGBUF); + if (dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL) + return err; } else dfa->init_state_word = dfa->init_state_nl = dfa->init_state_begbuf = dfa->init_state; - if (dfa->init_state == NULL || dfa->init_state_word == NULL - || dfa->init_state_nl == NULL || dfa->init_state_begbuf == NULL ) - return REG_ESPACE; re_node_set_free (&init_nodes); return REG_NOERROR; } @@ -1114,20 +1122,30 @@ calc_epsdest (dfa, node) } } -static int -duplicate_node (dfa, org_idx, constraint) +/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. + The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded, + otherwise return the error code. */ + +static reg_errcode_t +duplicate_node (new_idx, dfa, org_idx, constraint) re_dfa_t *dfa; - int org_idx; + int *new_idx, org_idx; unsigned int constraint; { re_token_t dup; int dup_idx; + reg_errcode_t err; dup.type = OP_CONTEXT_NODE; if (dfa->nodes[org_idx].type == OP_CONTEXT_NODE) { + /* If the node whose index is ORG_IDX is the same as the intended + node, use it. */ if (dfa->nodes[org_idx].constraint == constraint) - return org_idx; + { + *new_idx = org_idx; + return REG_NOERROR; + } dup.constraint = constraint | dfa->nodes[org_idx].constraint; } @@ -1137,23 +1155,32 @@ duplicate_node (dfa, org_idx, constraint) /* In case that `entity' points OP_CONTEXT_NODE, we correct `entity' to real entity in calc_inveclosures(). */ dup.opr.ctx_info = malloc (sizeof (*dup.opr.ctx_info)); + dup_idx = re_dfa_add_node (dfa, dup, 1); + if (dup.opr.ctx_info == NULL || dup_idx == -1) + return REG_ESPACE; dup.opr.ctx_info->entity = org_idx; dup.opr.ctx_info->bkref_eclosure = NULL; - dup_idx = re_dfa_add_node (dfa, dup, 1); - dfa->nodes[dup_idx].duplicated = 1; + dfa->nodes[dup_idx].duplicated = 1; dfa->firsts[dup_idx] = dfa->firsts[org_idx]; dfa->nexts[dup_idx] = dfa->nexts[org_idx]; - re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx); + err = re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx); + if (err != REG_NOERROR) + return err; /* Since we don't duplicate epsilon nodes, epsilon closure have only itself. */ - re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx); - re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx); + err = re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx); + if (err != REG_NOERROR) + return err; + err = re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx); + if (err != REG_NOERROR) + return err; /* Then we must update inveclosure for this node. We process them at last part of calc_eclosure(), since we don't complete to calculate them here. */ - return dup_idx; + *new_idx = dup_idx; + return REG_NOERROR; } static void @@ -1193,6 +1220,7 @@ calc_eclosure (dfa) /* For each nodes, calculate epsilon closure. */ for (node_idx = 0, max = dfa->nodes_len; ; ++node_idx) { + reg_errcode_t err; re_node_set eclosure_elem; if (node_idx == max) { @@ -1210,7 +1238,9 @@ calc_eclosure (dfa) if (dfa->eclosures[node_idx].nelem != 0) continue; /* Calculate epsilon closure of `node_idx'. */ - eclosure_elem = calc_eclosure_iter (dfa, node_idx, 1); + err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1); + if (err != REG_NOERROR) + return err; if (dfa->eclosures[node_idx].nelem == 0) { @@ -1241,7 +1271,13 @@ calc_eclosure (dfa) { int dest_node_idx = dfa->eclosures[dfa->nexts[idx]].elems[i]; if (!IS_EPSILON_NODE (dfa->nodes[dest_node_idx].type)) - dest_node_idx = duplicate_node (dfa, dest_node_idx, constraint); + { + reg_errcode_t err; + err = duplicate_node (&dest_node_idx, dfa, dest_node_idx, + constraint); + if (err != REG_NOERROR) + return err; + } re_node_set_insert (bkref_eclosure, dest_node_idx); } dfa->nodes[idx].opr.ctx_info->bkref_eclosure = bkref_eclosure; @@ -1252,15 +1288,19 @@ calc_eclosure (dfa) /* Calculate epsilon closure of NODE. */ -static re_node_set -calc_eclosure_iter (dfa, node, root) +static reg_errcode_t +calc_eclosure_iter (new_set, dfa, node, root) + re_node_set *new_set; re_dfa_t *dfa; int node, root; { + reg_errcode_t err; unsigned int constraint; int i, max, incomplete = 0; re_node_set eclosure; - re_node_set_alloc (&eclosure, 1); + err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); + if (err != REG_NOERROR) + return err; /* This indicates that we are calculating this node now. We reference this value to avoid infinite loop. */ @@ -1285,7 +1325,11 @@ calc_eclosure_iter (dfa, node, root) /* If we haven't calculated the epsilon closure of `edest' yet, calculate now. Otherwise use calculated epsilon closure. */ if (dfa->eclosures[edest].nelem == 0) - eclosure_elem = calc_eclosure_iter (dfa, edest, 0); + { + err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0); + if (err != REG_NOERROR) + return err; + } else eclosure_elem = dfa->eclosures[edest]; /* Merge the epsilon closure of `edest'. */ @@ -1307,7 +1351,11 @@ calc_eclosure_iter (dfa, node, root) int dest = eclosure.elems[i]; if (!IS_EPSILON_NODE (dfa->nodes[dest].type)) { - int dup_dest = duplicate_node (dfa, dest, constraint); + int dup_dest; + reg_errcode_t err; + err = duplicate_node (&dup_dest, dfa, dest, constraint); + if (err != REG_NOERROR) + return err; if (dest != dup_dest) { re_node_set_remove_at (&eclosure, i--); @@ -1323,7 +1371,8 @@ calc_eclosure_iter (dfa, node, root) dfa->eclosures[node].nelem = 0; else dfa->eclosures[node] = eclosure; - return eclosure; + *new_set = eclosure; + return REG_NOERROR; } /* Functions for token which are used in the parser. */ @@ -1865,7 +1914,11 @@ parse_expression (regexp, preg, token, syntax, nest, err) break; case ANCHOR: if (dfa->word_char == NULL) - init_word_char (dfa); + { + *err = init_word_char (dfa); + if (*err != REG_NOERROR) + return NULL; + } if (token->opr.ctx_type == WORD_DELIM) { bin_tree_t *tree_first, *tree_last; @@ -2137,28 +2190,6 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err) I'm not sure, but maybe enough. */ #define BRACKET_NAME_BUF_SIZE 32 -static inline void * -extend_array_for_cset (array, num, alloc, type_size) - void *array; - int num, *alloc, type_size; -{ - void *new_array = array; - if (*alloc == num) - { - if (*alloc == 0) - { - new_array = malloc (type_size); - *alloc = 1; - } - else - { - new_array = realloc (array, type_size * num * 2); - *alloc = 2 * num; - } - } - return new_array; -} - /* This function parse bracket expression like "[abc]", "[a-c]", "[[.a-a.]]" etc. */ @@ -2299,22 +2330,15 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) uint32_t *new_array_end; int new_nranges; - /* XXX If mbcset->range_starts and mbcset->range_ends are NULL - if *range_alloc == 0 then we do not need the if. */ - if (*range_alloc == 0) - { - new_nranges = 1; - new_array_start = re_malloc (uint32_t, 1); - new_array_end = re_malloc (uint32_t, 1); - } - else - { - new_nranges = 2 * mbcset->nranges; - new_array_start = re_realloc (mbcset->range_starts, uint32_t, - new_nranges); - new_array_end = re_realloc (mbcset->range_ends, uint32_t, - new_nranges); - } + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + /* Use realloc since mbcset->range_starts and mbcset->range_ends + are NULL if *range_alloc == 0. */ + new_array_start = re_realloc (mbcset->range_starts, uint32_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, uint32_t, + new_nranges); + if (new_array_start == NULL || new_array_end == NULL) return REG_ESPACE; @@ -2394,13 +2418,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) /* Got valid collation sequence, add it as a new entry. */ /* Check the space of the arrays. */ - mbcset->coll_syms = extend_array_for_cset (mbcset->coll_syms, - mbcset->ncoll_syms, - coll_sym_alloc, - sizeof (int32_t)); - if (mbcset->coll_syms == NULL) - return REG_ESPACE; - + if (*coll_sym_alloc == mbcset->ncoll_syms) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->ncoll_syms is 0. */ + *coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; + /* Use realloc since mbcset->coll_syms is NULL + if *alloc == 0. */ + mbcset->coll_syms = re_realloc (mbcset->coll_syms, int32_t, + *coll_sym_alloc); + if (mbcset->coll_syms == NULL) + return REG_ESPACE; + } mbcset->coll_syms[mbcset->ncoll_syms++] = idx; return REG_NOERROR; } @@ -2557,12 +2586,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) bitset_set (sbcset, start_elem.opr.ch); break; case MB_CHAR: - mbcset->mbchars = extend_array_for_cset (mbcset->mbchars, - mbcset->nmbchars, - &mbchar_alloc, - sizeof (wchar_t)); - if (mbcset->mbchars == NULL) - goto parse_bracket_exp_espace; + /* Check whether the array has enough space. */ + if (mbchar_alloc == mbcset->nmbchars) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nmbchars is 0. */ + mbchar_alloc = 2 * mbcset->nmbchars + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + mbcset->mbchars = re_realloc (mbcset->mbchars, wchar_t, + mbchar_alloc); + if (mbcset->mbchars == NULL) + goto parse_bracket_exp_espace; + } mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; break; case EQUIV_CLASS: @@ -2779,14 +2814,18 @@ build_equiv_class (mbcset, sbcset, equiv_class_alloc, name) bitset_set (sbcset, ch); } } - /* Check the space of the arrays, and extend if we need. */ - mbcset->equiv_classes = extend_array_for_cset (mbcset->equiv_classes, - mbcset->nequiv_classes, - equiv_class_alloc, - sizeof (int32_t)); - if (mbcset->equiv_classes == NULL) - return REG_ESPACE; - + /* Check whether the array has enough space. */ + if (*equiv_class_alloc == mbcset->nequiv_classes) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nequiv_classes is 0. */ + *equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; + /* Use realloc since the array is NULL if *alloc == 0. */ + mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t, + *equiv_class_alloc); + if (mbcset->equiv_classes == NULL) + return REG_ESPACE; + } mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; } else @@ -2815,12 +2854,17 @@ build_charclass (mbcset, sbcset, char_class_alloc, name) int i; /* Check the space of the arrays. */ - mbcset->char_classes = extend_array_for_cset (mbcset->char_classes, - mbcset->nchar_classes, - char_class_alloc, - sizeof (wctype_t)); - if (mbcset->char_classes == NULL) - return REG_ESPACE; + if (*char_class_alloc == mbcset->nchar_classes) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nchar_classes is 0. */ + *char_class_alloc = 2 * mbcset->nchar_classes + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t, + *char_class_alloc); + if (mbcset->char_classes == NULL) + return REG_ESPACE; + } mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); diff --git a/posix/regex_internal.c b/posix/regex_internal.c index 63bed42..11726b3 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -68,6 +68,8 @@ static reg_errcode_t re_string_translate_buffer (re_string_t *pstr, static re_dfastate_t *create_newstate_common (re_dfa_t *dfa, const re_node_set *nodes, unsigned int hash); +static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate, + unsigned int hash); static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa, const re_node_set *nodes, unsigned int hash); @@ -473,6 +475,10 @@ re_node_set_init_copy (dest, src) return REG_NOERROR; } +/* Calculate the intersection of the sets SRC1 and SRC2. And store it in + DEST. Return value indicate the error code or REG_NOERROR if succeeded. + Note: We assume dest->elems is NULL, when dest->alloc is 0. */ + static reg_errcode_t re_node_set_intersect (dest, src1, src2) re_node_set *dest; @@ -483,31 +489,28 @@ re_node_set_intersect (dest, src1, src2) { if (src1->nelem + src2->nelem > dest->alloc) { - int *new_array; - if (dest->alloc == 0) - new_array = re_malloc (int, src1->nelem + src2->nelem); - else - new_array = re_realloc (dest->elems, int, - src1->nelem + src2->nelem); dest->alloc = src1->nelem + src2->nelem; - if (new_array == NULL) + dest->elems = re_realloc (dest->elems, int, dest->alloc); + if (dest->elems == NULL) return REG_ESPACE; - dest->elems = new_array; } } else { + /* The intersection of empty sets is also empty set. */ dest->nelem = 0; return REG_NOERROR; } - for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) + for (i1 = i2 = id = 0; i1 < src1->nelem && i2 < src2->nelem; ) { if (src1->elems[i1] > src2->elems[i2]) { ++i2; continue; } + /* The intersection must have the elements which are in both of + SRC1 and SRC2. */ if (src1->elems[i1] == src2->elems[i2]) dest->elems[id++] = src2->elems[i2++]; ++i1; @@ -516,6 +519,10 @@ re_node_set_intersect (dest, src1, src2) return REG_NOERROR; } +/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. + Note: We assume dest->elems is NULL, when dest->alloc is 0. */ + static reg_errcode_t re_node_set_add_intersect (dest, src1, src2) re_node_set *dest; @@ -526,16 +533,10 @@ re_node_set_add_intersect (dest, src1, src2) { if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) { - int *new_array; - if (dest->alloc == 0) - new_array = re_malloc (int, src1->nelem + src2->nelem); - else - new_array = re_realloc (dest->elems, int, - src1->nelem + src2->nelem + dest->nelem); dest->alloc = src1->nelem + src2->nelem + dest->nelem; - if (new_array == NULL) + dest->elems = re_realloc (dest->elems, int, dest->alloc); + if (dest->elems == NULL) return REG_ESPACE; - dest->elems = new_array; } } else @@ -567,6 +568,9 @@ re_node_set_add_intersect (dest, src1, src2) return REG_NOERROR; } +/* Calculate the union set of the sets SRC1 and SRC2. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + static reg_errcode_t re_node_set_init_union (dest, src1, src2) re_node_set *dest; @@ -617,6 +621,9 @@ re_node_set_init_union (dest, src1, src2) return REG_NOERROR; } +/* Calculate the union set of the sets DEST and SRC. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + static reg_errcode_t re_node_set_merge (dest, src) re_node_set *dest; @@ -628,12 +635,16 @@ re_node_set_merge (dest, src) else if (dest == NULL) { dest = re_malloc (re_node_set, 1); + if (dest == NULL) + return REG_ESPACE; return re_node_set_init_copy (dest, src); } if (dest->alloc < src->nelem + dest->nelem) { dest->alloc = 2 * (src->nelem + dest->alloc); dest->elems = re_realloc (dest->elems, int, dest->alloc); + if (dest->elems == NULL) + return REG_ESPACE; } for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;) @@ -860,18 +871,28 @@ calc_state_hash (nodes, context) /* Search for the state whose node_set is equivalent to NODES. Return the pointer to the state, if we found it in the DFA. - Otherwise create the new one and return it. */ - -static re_dfastate_t * -re_acquire_state (dfa, nodes) + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t* +re_acquire_state (err, dfa, nodes) + reg_errcode_t *err; re_dfa_t *dfa; const re_node_set *nodes; { unsigned int hash; + re_dfastate_t *new_state; struct re_state_table_entry *spot; int i; if (nodes->nelem == 0) - return NULL; + { + *err = REG_NOERROR; + return NULL; + } hash = calc_state_hash (nodes, 0); spot = dfa->state_table + (hash & dfa->state_hash_mask); @@ -893,25 +914,42 @@ re_acquire_state (dfa, nodes) } /* There are no appropriate state in the dfa, create the new one. */ - return create_ci_newstate (dfa, nodes, hash); + new_state = create_ci_newstate (dfa, nodes, hash); + if (new_state != NULL) + return new_state; + else + { + *err = REG_ESPACE; + return NULL; + } } /* Search for the state whose node_set is equivalent to NODES and whose context is equivalent to CONTEXT. Return the pointer to the state, if we found it in the DFA. - Otherwise create the new one and return it. */ - -static re_dfastate_t * -re_acquire_state_context (dfa, nodes, context) + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t* +re_acquire_state_context (err, dfa, nodes, context) + reg_errcode_t *err; re_dfa_t *dfa; const re_node_set *nodes; unsigned int context; { unsigned int hash; + re_dfastate_t *new_state; struct re_state_table_entry *spot; int i; if (nodes->nelem == 0) - return NULL; + { + *err = REG_NOERROR; + return NULL; + } hash = calc_state_hash (nodes, context); spot = dfa->state_table + (hash & dfa->state_hash_mask); @@ -934,9 +972,19 @@ re_acquire_state_context (dfa, nodes, context) return state; } /* There are no appropriate state in `dfa', create the new one. */ - return create_cd_newstate (dfa, nodes, context, hash); + new_state = create_cd_newstate (dfa, nodes, context, hash); + if (new_state != NULL) + return new_state; + else + { + *err = REG_ESPACE; + return NULL; + } } +/* Allocate memory for DFA state and initialize common properties. + Return the new state if succeeded, otherwise return NULL. */ + static re_dfastate_t * create_newstate_common (dfa, nodes, hash) re_dfa_t *dfa; @@ -945,6 +993,8 @@ create_newstate_common (dfa, nodes, hash) { re_dfastate_t *newstate; newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (newstate == NULL) + return NULL; re_node_set_init_copy (&newstate->nodes, nodes); newstate->trtable = NULL; newstate->trtable_search = NULL; @@ -952,7 +1002,10 @@ create_newstate_common (dfa, nodes, hash) return newstate; } -static void +/* Store the new state NEWSTATE whose hash value is HASH in appropriate + position. Return value indicate the error code if failed. */ + +static reg_errcode_t register_state (dfa, newstate, hash) re_dfa_t *dfa; re_dfastate_t *newstate; @@ -972,8 +1025,7 @@ register_state (dfa, newstate, hash) spot->alloc = 4; new_array = re_malloc (re_dfastate_t *, spot->alloc); if (new_array == NULL) - /* XXX return value */ - return; + return REG_ESPACE; new_array[0] = spot->entry.state; } else @@ -985,8 +1037,12 @@ register_state (dfa, newstate, hash) spot->entry.array = new_array; } spot->entry.array[spot->num++] = newstate; + return REG_NOERROR; } +/* Create the new state which is independ of contexts. + Return the new state if succeeded, otherwise return NULL. */ + static re_dfastate_t * create_ci_newstate (dfa, nodes, hash) re_dfa_t *dfa; @@ -994,8 +1050,11 @@ create_ci_newstate (dfa, nodes, hash) unsigned int hash; { int i; + reg_errcode_t err; re_dfastate_t *newstate; newstate = create_newstate_common (dfa, nodes, hash); + if (newstate == NULL) + return NULL; newstate->entrance_nodes = &newstate->nodes; for (i = 0 ; i < nodes->nelem ; i++) @@ -1021,11 +1080,13 @@ create_ci_newstate (dfa, nodes, hash) newstate->halt = 1; } } - - register_state (dfa, newstate, hash); - return newstate; + err = register_state (dfa, newstate, hash); + return (err != REG_NOERROR) ? NULL : newstate; } +/* Create the new state which is depend on the context CONTEXT. + Return the new state if succeeded, otherwise return NULL. */ + static re_dfastate_t * create_cd_newstate (dfa, nodes, context, hash) re_dfa_t *dfa; @@ -1033,9 +1094,12 @@ create_cd_newstate (dfa, nodes, context, hash) unsigned int context, hash; { int i, nctx_nodes = 0; + reg_errcode_t err; re_dfastate_t *newstate; newstate = create_newstate_common (dfa, nodes, hash); + if (newstate == NULL) + return NULL; newstate->context = context; newstate->entrance_nodes = &newstate->nodes; @@ -1076,7 +1140,6 @@ create_cd_newstate (dfa, nodes, context, hash) { newstate->entrance_nodes = re_malloc (re_node_set, 1); if (newstate->entrance_nodes == NULL) - /* XXX Return which value? */ return NULL; re_node_set_init_copy (newstate->entrance_nodes, nodes); nctx_nodes = 0; @@ -1090,6 +1153,6 @@ create_cd_newstate (dfa, nodes, context, hash) } } } - register_state (dfa, newstate, hash); - return newstate; + err = register_state (dfa, newstate, hash); + return (err != REG_NOERROR) ? NULL : newstate; } diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 35f9f4a..38d68d4 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -426,9 +426,10 @@ static void re_node_set_remove_at (re_node_set *set, int idx); #define re_node_set_empty(p) ((p)->nelem = 0) #define re_node_set_free(set) re_free ((set)->elems) static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode); -static re_dfastate_t *re_acquire_state (re_dfa_t *dfa, +static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa, const re_node_set *nodes); -static re_dfastate_t *re_acquire_state_context (re_dfa_t *dfa, +static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err, + re_dfa_t *dfa, const re_node_set *nodes, unsigned int context); diff --git a/posix/regexec.c b/posix/regexec.c index cf8f304..dc60e50 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -38,15 +38,19 @@ #include "regex.h" #include "regex_internal.h" -static void match_ctx_init (re_match_context_t *cache, int eflags, int n); +static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, + int n); static void match_ctx_free (re_match_context_t *cache); -static void match_ctx_add_entry (re_match_context_t *cache, int node, int from, - int to); -static int re_search_internal (const regex_t *preg, const char *string, - int length, int start, int range, size_t nmatch, - regmatch_t pmatch[], int eflags); -static inline re_dfastate_t *acquire_init_state_context (const regex_t *preg, - const re_string_t *input, int idx, int eflags); +static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, + int from, int to); +static reg_errcode_t re_search_internal (const regex_t *preg, + const char *string, int length, + int start, int range, size_t nmatch, + regmatch_t pmatch[], int eflags); +static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err, + const regex_t *preg, + const re_string_t *input, + int idx, int eflags); static int check_matching (const regex_t *preg, re_string_t *input, re_match_context_t *mctx, re_dfastate_t **state_log, int start_idx, int fl_search, int fl_longest_match); @@ -61,9 +65,10 @@ static int proceed_next_node (const regex_t *preg, const re_match_context_t *mctx, const re_string_t *input, int *pidx, int node, re_node_set *eps_via_nodes); -static void set_regs (const regex_t *preg, re_dfastate_t **state_log, - const re_match_context_t *mctx, const re_string_t *input, - size_t nmatch, regmatch_t *pmatch, int last); +static reg_errcode_t set_regs (const regex_t *preg, re_dfastate_t **state_log, + const re_match_context_t *mctx, + const re_string_t *input, size_t nmatch, + regmatch_t *pmatch, int last); static int sift_states_iter_mb (const regex_t *preg, re_dfastate_t **state_log, const re_match_context_t *mctx, const re_string_t *input, int node_idx, @@ -73,36 +78,40 @@ static int sift_states_iter_bkref (const re_dfa_t *dfa, struct re_backref_cache_entry *mctx_entry, int node_idx, int idx, int match_first, int match_last); -static void sift_states_backward (const regex_t *preg, - re_dfastate_t **state_log, - const re_match_context_t *mctx, - const re_string_t *input, int last_node); -static void add_epsilon_backreference (const re_dfa_t *dfa, - const re_match_context_t *mctx, - const re_node_set *plog, int idx, - re_node_set *state_buf); -static re_dfastate_t *transit_state (const regex_t *preg, re_dfastate_t *state, - re_string_t *input, int fl_search, - re_dfastate_t **state_log, +static reg_errcode_t sift_states_backward (const regex_t *preg, + re_dfastate_t **state_log, + const re_match_context_t *mctx, + const re_string_t *input, + int last_node); +static reg_errcode_t add_epsilon_backreference (const re_dfa_t *dfa, + const re_match_context_t *mctx, + const re_node_set *plog, + int idx, + re_node_set *state_buf); +static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg, + re_dfastate_t *state, re_string_t *input, + int fl_search, re_dfastate_t **state_log, re_match_context_t *mctx); -static re_dfastate_t *transit_state_sb (const regex_t *preg, +static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg, re_dfastate_t *pstate, re_string_t *input, int fl_search, re_match_context_t *mctx); -static void transit_state_mb (const regex_t *preg, re_dfastate_t *pstate, - const re_string_t *input, - re_dfastate_t **state_log, - re_match_context_t *mctx); -static void transit_state_bkref (const regex_t *preg, re_dfastate_t *pstate, - const re_string_t *input, - re_dfastate_t **state_log, - re_match_context_t *mctx); -static void transit_state_bkref_loop (const regex_t *preg, - const re_string_t *input, - re_node_set *nodes, - re_dfastate_t **work_state_log, - re_dfastate_t **state_log, - re_match_context_t *mctx); +static reg_errcode_t transit_state_mb (const regex_t *preg, + re_dfastate_t *pstate, + const re_string_t *input, + re_dfastate_t **state_log, + re_match_context_t *mctx); +static reg_errcode_t transit_state_bkref (const regex_t *preg, + re_dfastate_t *pstate, + const re_string_t *input, + re_dfastate_t **state_log, + re_match_context_t *mctx); +static reg_errcode_t transit_state_bkref_loop (const regex_t *preg, + const re_string_t *input, + re_node_set *nodes, + re_dfastate_t **work_state_log, + re_dfastate_t **state_log, + re_match_context_t *mctx); static re_dfastate_t **build_trtable (const regex_t *dfa, const re_dfastate_t *state, int fl_search); @@ -141,13 +150,15 @@ regexec (preg, string, nmatch, pmatch, eflags) regmatch_t pmatch[]; int eflags; { + reg_errcode_t err; int length = strlen (string); if (preg->no_sub) - return re_search_internal (preg, string, length, 0, length, 0, - NULL, eflags); + err = re_search_internal (preg, string, length, 0, length, 0, + NULL, eflags); else - return re_search_internal (preg, string, length, 0, length, nmatch, - pmatch, eflags); + err = re_search_internal (preg, string, length, 0, length, nmatch, + pmatch, eflags); + return err != REG_NOERROR; } #ifdef _LIBC weak_alias (__regexec, regexec) @@ -164,7 +175,8 @@ re_match (buffer, string, length, start, regs) int length, start; struct re_registers *regs; { - int i, nregs, result, rval, eflags = 0; + reg_errcode_t result; + int i, nregs, rval, eflags = 0; regmatch_t *pmatch; eflags |= (buffer->not_bol) ? REG_NOTBOL : 0; @@ -238,7 +250,7 @@ re_match (buffer, string, length, start, regs) } } /* Return value is -1 if not match, the length of mathing otherwise. */ - rval = (result) ? -1 : pmatch[0].rm_eo - pmatch[0].rm_so; + rval = (result != REG_NOERROR) ? -1 : pmatch[0].rm_eo - pmatch[0].rm_so; re_free (pmatch); return rval; } @@ -290,7 +302,8 @@ re_search (bufp, string, size, startpos, range, regs) int size, startpos, range; struct re_registers *regs; { - int i, nregs, result, real_range, rval, eflags = 0; + reg_errcode_t result; + int i, nregs, real_range, rval, eflags = 0; regmatch_t *pmatch; eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; @@ -376,7 +389,7 @@ re_search (bufp, string, size, startpos, range, regs) } /* Return value is -1 if not match, the position where the mathing starts otherwise. */ - rval = (result) ? -1 : pmatch[0].rm_so; + rval = (result != REG_NOERROR) ? -1 : pmatch[0].rm_so; re_free (pmatch); return rval; } @@ -486,11 +499,12 @@ static re_node_set empty_set; length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same mingings with regexec. START, and RANGE have the same meanings with re_search. - Return 0 if we find a match and REG_NOMATCH if not. + Return REG_NOERROR if we find a match, and REG_NOMATCH if not, + otherwise return the error code. Note: We assume front end functions already check ranges. (START + RANGE >= 0 && START + RANGE <= LENGTH) */ -static int +static reg_errcode_t re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags) const regex_t *preg; const char *string; @@ -498,6 +512,7 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags) size_t nmatch; regmatch_t pmatch[]; { + reg_errcode_t err; re_dfa_t *dfa = (re_dfa_t *)preg->buffer; re_string_t input; re_dfastate_t **state_log; @@ -510,7 +525,7 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags) if (preg->used == 0 || dfa->init_state == NULL || dfa->init_state_word == NULL || dfa->init_state_nl == NULL || dfa->init_state_begbuf == NULL) - return 1; + return REG_NOMATCH; re_node_set_init_empty (&empty_set); @@ -522,16 +537,24 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags) back-reference or a node which can accept multibyte character or multi character collating element. */ if (nmatch > 1 || dfa->has_mb_node) - state_log = re_malloc (re_dfastate_t *, length + 1); + { + state_log = re_malloc (re_dfastate_t *, length + 1); + if (state_log == NULL) + return REG_ESPACE; + } else state_log = NULL; if (preg->syntax & RE_ICASE) - re_string_construct_toupper (&input, string, length, preg->translate); + err = re_string_construct_toupper (&input, string, length, preg->translate); else - re_string_construct (&input, string, length, preg->translate); + err = re_string_construct (&input, string, length, preg->translate); + if (err != REG_NOERROR) + return err; - match_ctx_init (&mctx, eflags, dfa->nbackref * 2); + err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); + if (err != REG_NOERROR) + return err; #ifdef DEBUG /* We assume front-end functions already check them. */ @@ -557,7 +580,12 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags) match_last = check_matching (preg, &input, &mctx, state_log, match_first, 0, fl_longest_match); if (match_last != -1) - break; + { + if (match_last == -2) + return REG_ESPACE; + else + break; /* We found a matching. */ + } } } /* Update counter. */ @@ -598,8 +626,13 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags) #endif halt_node = check_halt_state_context (preg, pstate, &input, match_last, eflags); - sift_states_backward (preg, state_log, &mctx, &input, halt_node); - set_regs (preg, state_log, &mctx, &input, nmatch, pmatch, halt_node); + err = sift_states_backward (preg, state_log, &mctx, &input, halt_node); + if (err != REG_NOERROR) + return err; + err = set_regs (preg, state_log, &mctx, &input, nmatch, pmatch, + halt_node); + if (err != REG_NOERROR) + return err; } } @@ -607,21 +640,23 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags) if (dfa->nbackref) match_ctx_free (&mctx); re_string_destruct (&input); - return match_last == -1; + return (match_last == -1) ? REG_NOMATCH : REG_NOERROR; } -/* Acquire an initial state. +/* Acquire an initial state and return it. We must select appropriate initial state depending on the context, since initial states may have constraints like "\<", "^", etc.. */ static inline re_dfastate_t * -acquire_init_state_context (preg, input, idx, eflags) - const regex_t *preg; - const re_string_t *input; - int idx, eflags; +acquire_init_state_context (err, preg, input, idx, eflags) + reg_errcode_t *err; + const regex_t *preg; + const re_string_t *input; + int idx, eflags; { re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + *err = REG_NOERROR; if (dfa->init_state->has_constraint) { unsigned int context; @@ -636,9 +671,12 @@ acquire_init_state_context (preg, input, idx, eflags) else if (IS_NEWLINE_CONTEXT (context)) return dfa->init_state_nl; else if (IS_BEGBUF_CONTEXT (context)) - /* It is relatively rare case, then calculate on demand. */ - return re_acquire_state_context (dfa, dfa->init_state->entrance_nodes, - context); + { + /* It is relatively rare case, then calculate on demand. */ + return re_acquire_state_context (err, dfa, + dfa->init_state->entrance_nodes, + context); + } else /* Must not happen? */ return dfa->init_state; @@ -648,7 +686,8 @@ acquire_init_state_context (preg, input, idx, eflags) } /* Check whether the regular expression match input string INPUT or not, - and return the index where the matching end, or return -1 if not match. + and return the index where the matching end, return -1 if not match, + or return -2 in case of an error. FL_SEARCH means we must search where the matching starts, FL_LONGEST_MATCH means we want the POSIX longest matching. */ @@ -661,11 +700,15 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search, re_dfastate_t **state_log; int start_idx, fl_search, fl_longest_match; { + reg_errcode_t err; int match = 0, match_last = -1; re_dfastate_t *cur_state; - cur_state = acquire_init_state_context (preg, input, start_idx, + cur_state = acquire_init_state_context (&err, preg, input, start_idx, mctx->eflags); + /* An initial state must not be NULL(invalid state). */ + if (cur_state == NULL) + return -2; if (state_log != NULL) state_log[start_idx] = cur_state; /* If the RE accepts NULL string. */ @@ -687,11 +730,13 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search, while (!re_string_eoi (input)) { - cur_state = transit_state (preg, cur_state, input, fl_search && !match, - state_log, mctx); - if (cur_state == NULL) /* Reached at the invalid state. */ + cur_state = transit_state (&err, preg, cur_state, input, + fl_search && !match, state_log, mctx); + if (cur_state == NULL) /* Reached at the invalid state or an error. */ { int cur_str_idx = re_string_cur_idx (input); + if (err != REG_NOERROR) + return -2; if (fl_search && !match) { /* Restart from initial state, since we are searching @@ -699,9 +744,11 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search, #ifdef RE_ENABLE_I18N if (MB_CUR_MAX == 1 || re_string_first_byte (input, cur_str_idx)) #endif /* RE_ENABLE_I18N */ - cur_state = acquire_init_state_context (preg, input, + cur_state = acquire_init_state_context (&err, preg, input, cur_str_idx, mctx->eflags); + if (cur_state == NULL && err != REG_NOERROR) + return -2; if (state_log != NULL) state_log[cur_str_idx] = cur_state; } @@ -787,9 +834,10 @@ check_halt_state_context (preg, state, input, idx, eflags) return 0; } -/* Compute the next node to which "NFA" transit from NODE. - Return the destination node, and update EPS_VIA_NODES. - ("NFA" is a NFA corresponding to the DFA. */ +/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA + corresponding to the DFA). + Return the destination node, and update EPS_VIA_NODES, return -1 in case + of errors. */ static int proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes) @@ -801,10 +849,12 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes) re_node_set *eps_via_nodes; { re_dfa_t *dfa = (re_dfa_t *)preg->buffer; - int i, dest_node = -1; + int i, dest_node = -1, err; if (IS_EPSILON_NODE (dfa->nodes[node].type)) { - re_node_set_insert (eps_via_nodes, node); + err = re_node_set_insert (eps_via_nodes, node); + if (err < 0) + return -1; for (i = 0; i < state_log[*pidx]->nodes.nelem; ++i) { int candidate = state_log[*pidx]->nodes.elems[i]; @@ -845,7 +895,9 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes) } if (naccepted == 0) { - re_node_set_insert (eps_via_nodes, node); + err = re_node_set_insert (eps_via_nodes, node); + if (err < 0) + return -1; dest_node = dfa->nexts[node]; if (re_node_set_contains (&state_log[*pidx]->nodes, dest_node)) return dest_node; @@ -885,7 +937,7 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes) Note: We assume that pmatch[0] is already set, and pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */ -static void +static reg_errcode_t set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node) const regex_t *preg; re_dfastate_t **state_log; @@ -944,9 +996,11 @@ set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node) /* Proceed to next node. */ cur_node = proceed_next_node (preg, state_log, mctx, input, &idx, cur_node, &eps_via_nodes); + if (cur_node < 0) + return REG_ESPACE; } re_node_set_free (&eps_via_nodes); - return; + return REG_NOERROR; } #define NUMBER_OF_STATE 1 @@ -974,7 +1028,7 @@ set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node) #define STATE_NODE_CONTAINS(state,node) \ ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) -static void +static reg_errcode_t sift_states_backward (preg, state_log, mctx, input, last_node) const regex_t *preg; re_dfastate_t **state_log; @@ -982,6 +1036,7 @@ sift_states_backward (preg, state_log, mctx, input, last_node) const re_string_t *input; int last_node; { + reg_errcode_t err; re_dfa_t *dfa = (re_dfa_t *)preg->buffer; re_node_set state_buf; int str_idx = mctx->match_last; @@ -990,18 +1045,28 @@ sift_states_backward (preg, state_log, mctx, input, last_node) #ifdef DEBUG assert (state_log != NULL && state_log[str_idx] != NULL); #endif - re_node_set_alloc (&state_buf, NUMBER_OF_STATE); + err = re_node_set_alloc (&state_buf, NUMBER_OF_STATE); + if (err != REG_NOERROR) + return err; plog = &state_log[str_idx]->nodes; /* Build sifted state_log[str_idx]. It has the nodes which can epsilon transit to the last_node and the last_node itself. */ - re_node_set_intersect (&state_buf, plog, dfa->inveclosures + last_node); + err = re_node_set_intersect (&state_buf, plog, dfa->inveclosures + last_node); + if (err != REG_NOERROR) + return err; if (state_log[str_idx] != NULL && state_log[str_idx]->has_backref) - add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf); + { + err = add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf); + if (err != REG_NOERROR) + return err; + } /* Update state log. */ - state_log[str_idx] = re_acquire_state (dfa, &state_buf); + state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf); + if (state_log[str_idx] == NULL && err != REG_NOERROR) + return err; /* Then check each states in the state_log. */ while (str_idx > mctx->match_first) @@ -1062,17 +1127,26 @@ sift_states_backward (preg, state_log, mctx, input, last_node) /* `prev_node' may point the entity of the OP_CONTEXT_NODE, then we use plog->elems[i] instead. */ - re_node_set_add_intersect (&state_buf, plog, - dfa->inveclosures + prev_node); + err = re_node_set_add_intersect (&state_buf, plog, + dfa->inveclosures + prev_node); + if (err != REG_NOERROR) + return err; } if (state_log[str_idx] != NULL && state_log[str_idx]->has_backref) - add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf); + { + err = add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf); + if (err != REG_NOERROR) + return err; + } /* Update state_log. */ - state_log[str_idx] = re_acquire_state (dfa, &state_buf); + state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf); + if (state_log[str_idx] == NULL && err != REG_NOERROR) + return err; } re_node_set_free (&state_buf); + return REG_NOERROR; } /* Helper functions. */ @@ -1136,7 +1210,7 @@ sift_states_iter_bkref (dfa, state_log, mctx_entry, node_idx, idx, match_first, return naccepted; } -static void +static reg_errcode_t add_epsilon_backreference (dfa, mctx, plog, idx, state_buf) const re_dfa_t *dfa; const re_match_context_t *mctx; @@ -1164,12 +1238,16 @@ add_epsilon_backreference (dfa, mctx, plog, idx, state_buf) } if (j < mctx->nbkref_ents || idx == mctx->match_first) { - re_node_set_add_intersect (state_buf, plog, - dfa->inveclosures + node_idx); + reg_errcode_t err; + err = re_node_set_add_intersect (state_buf, plog, + dfa->inveclosures + node_idx); + if (err != REG_NOERROR) + return err; i = 0; } } } + return REG_NOERROR; } /* Functions for state transition. */ @@ -1180,17 +1258,19 @@ add_epsilon_backreference (dfa, mctx, plog, idx, state_buf) update the destination of STATE_LOG. */ static re_dfastate_t * -transit_state (preg, state, input, fl_search, state_log, mctx) - const regex_t *preg; - re_dfastate_t *state, **state_log; - re_string_t *input; - int fl_search; - re_match_context_t *mctx; +transit_state (err, preg, state, input, fl_search, state_log, mctx) + reg_errcode_t *err; + const regex_t *preg; + re_dfastate_t *state, **state_log; + re_string_t *input; + int fl_search; + re_match_context_t *mctx; { re_dfa_t *dfa = (re_dfa_t *) preg->buffer; re_dfastate_t **trtable, *next_state; unsigned char ch; + *err = REG_NOERROR; if (state == NULL) { next_state = state; @@ -1200,7 +1280,11 @@ transit_state (preg, state, input, fl_search, state_log, mctx) { /* If the current state can accept multibyte. */ if (state->accept_mb) - transit_state_mb (preg, state, input, state_log, mctx); + { + *err = transit_state_mb (preg, state, input, state_log, mctx); + if (*err != REG_NOERROR) + return NULL; + } /* Then decide the next state with the single byte. */ if (1) @@ -1221,7 +1305,10 @@ transit_state (preg, state, input, fl_search, state_log, mctx) else { /* don't use transition table */ - next_state = transit_state_sb (preg, state, input, fl_search, mctx); + next_state = transit_state_sb (err, preg, state, input, fl_search, + mctx); + if (next_state == NULL && err != REG_NOERROR) + return NULL; } } @@ -1252,7 +1339,10 @@ transit_state (preg, state, input, fl_search, state_log, mctx) if (next_state != NULL) { table_nodes = next_state->entrance_nodes; - re_node_set_init_union (&next_nodes, table_nodes, log_nodes); + *err = re_node_set_init_union (&next_nodes, table_nodes, + log_nodes); + if (*err != REG_NOERROR) + return NULL; } else next_nodes = *log_nodes; @@ -1262,14 +1352,19 @@ transit_state (preg, state, input, fl_search, state_log, mctx) context = re_string_context_at (input, re_string_cur_idx (input) - 1, mctx->eflags, preg->newline_anchor); next_state = state_log[cur_idx] - = re_acquire_state_context (dfa, &next_nodes, context); + = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + if (table_nodes != NULL) re_node_set_free (&next_nodes); } /* If the next state has back references. */ if (next_state != NULL && next_state->has_backref) { - transit_state_bkref (preg, next_state, input, state_log, mctx); + *err = transit_state_bkref (preg, next_state, input, state_log, mctx); + if (*err != REG_NOERROR) + return NULL; next_state = state_log[cur_idx]; } } @@ -1282,12 +1377,13 @@ transit_state (preg, state, input, fl_search, state_log, mctx) accepting the current input byte. */ static re_dfastate_t * -transit_state_sb (preg, state, input, fl_search, mctx) - const regex_t *preg; - re_dfastate_t *state; - re_string_t *input; - int fl_search; - re_match_context_t *mctx; +transit_state_sb (err, preg, state, input, fl_search, mctx) + reg_errcode_t *err; + const regex_t *preg; + re_dfastate_t *state; + re_string_t *input; + int fl_search; + re_match_context_t *mctx; { re_dfa_t *dfa = (re_dfa_t *) preg->buffer; re_node_set next_nodes; @@ -1295,14 +1391,20 @@ transit_state_sb (preg, state, input, fl_search, mctx) int node_cnt, cur_str_idx = re_string_cur_idx (input); unsigned int context; - re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); + *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); + if (*err != REG_NOERROR) + return NULL; for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) { int cur_node = state->nodes.elems[node_cnt]; if (check_node_accept (preg, dfa->nodes + cur_node, input, cur_str_idx, mctx->eflags)) - re_node_set_merge (&next_nodes, - dfa->eclosures + dfa->nexts[cur_node]); + { + *err = re_node_set_merge (&next_nodes, + dfa->eclosures + dfa->nexts[cur_node]); + if (*err != REG_NOERROR) + return NULL; + } } if (fl_search) { @@ -1317,23 +1419,32 @@ transit_state_sb (preg, state, input, fl_search, mctx) } if (!not_initial) #endif - re_node_set_merge (&next_nodes, dfa->init_state->entrance_nodes); + { + *err = re_node_set_merge (&next_nodes, + dfa->init_state->entrance_nodes); + if (*err != REG_NOERROR) + return NULL; + } } context = re_string_context_at (input, cur_str_idx, mctx->eflags, preg->newline_anchor); - next_state = re_acquire_state_context (dfa, &next_nodes, context); + next_state = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + re_node_set_free (&next_nodes); re_string_skip_bytes (input, 1); return next_state; } -static void +static reg_errcode_t transit_state_mb (preg, pstate, input, state_log, mctx) const regex_t *preg; re_dfastate_t *pstate, **state_log; const re_string_t *input; re_match_context_t *mctx; { + reg_errcode_t err; re_dfa_t *dfa = (re_dfa_t *) preg->buffer; int i; @@ -1376,39 +1487,50 @@ transit_state_mb (preg, pstate, input, state_log, mctx) if (dest_state == NULL) dest_nodes = *new_nodes; else - re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes, - new_nodes); + { + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, new_nodes); + if (err != REG_NOERROR) + return err; + } context = re_string_context_at (input, dest_idx - 1, mctx->eflags, preg->newline_anchor); - state_log[dest_idx] = re_acquire_state_context (dfa, &dest_nodes, context); + state_log[dest_idx] = re_acquire_state_context (&err, dfa, &dest_nodes, + context); + if (state_log[dest_idx] == NULL && err != REG_NOERROR) + return err; if (dest_state != NULL) re_node_set_free (&dest_nodes); } + return REG_NOERROR; } -static void +static reg_errcode_t transit_state_bkref (preg, pstate, input, state_log, mctx) const regex_t *preg; re_dfastate_t *pstate, **state_log; const re_string_t *input; re_match_context_t *mctx; { + reg_errcode_t err; re_dfastate_t **work_state_log; #ifdef DEBUG assert (mctx->match_first != -1); #endif work_state_log = re_malloc (re_dfastate_t *, re_string_cur_idx (input) + 1); + if (work_state_log == NULL) + return REG_ESPACE; - transit_state_bkref_loop (preg, input, &pstate->nodes, work_state_log, - state_log, mctx); - + err = transit_state_bkref_loop (preg, input, &pstate->nodes, work_state_log, + state_log, mctx); re_free (work_state_log); + return err; } /* Caller must allocate `work_state_log'. */ -static void +static reg_errcode_t transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx) const regex_t *preg; const re_string_t *input; @@ -1416,10 +1538,13 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx) re_dfastate_t **work_state_log, **state_log; re_match_context_t *mctx; { + reg_errcode_t err; re_dfa_t *dfa = (re_dfa_t *) preg->buffer; int i, j; regmatch_t *cur_regs = re_malloc (regmatch_t, preg->re_nsub + 1); int cur_str_idx = re_string_cur_idx (input); + if (cur_regs == NULL) + return REG_ESPACE; for (i = 0; i < nodes->nelem; ++i) { @@ -1474,7 +1599,9 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx) /* Successfully matched, add a new cache entry. */ dest_str_idx = cur_str_idx + subexp_len; - match_ctx_add_entry (mctx, node_idx, cur_str_idx, dest_str_idx); + err = match_ctx_add_entry (mctx, node_idx, cur_str_idx, dest_str_idx); + if (err != REG_NOERROR) + return err; clean_state_log_if_need (state_log, mctx, dest_str_idx); /* And add the epsilon closures (which is `new_dest_nodes') of @@ -1494,29 +1621,44 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx) : state_log[cur_str_idx]->nodes.nelem); /* Add `new_dest_node' to state_log. */ if (dest_state == NULL) - state_log[dest_str_idx] = re_acquire_state_context (dfa, - new_dest_nodes, - context); + { + state_log[dest_str_idx] = re_acquire_state_context (&err, dfa, + new_dest_nodes, + context); + if (state_log[dest_str_idx] == NULL && err != REG_NOERROR) + return err; + } else { re_node_set dest_nodes; - re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes, - new_dest_nodes); - state_log[dest_str_idx] = re_acquire_state_context (dfa, &dest_nodes, + err = re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes, + new_dest_nodes); + if (err != REG_NOERROR) + return err; + state_log[dest_str_idx] = re_acquire_state_context (&err, dfa, + &dest_nodes, context); + if (state_log[dest_str_idx] == NULL && err != REG_NOERROR) + return err; re_node_set_free (&dest_nodes); } /* We need to check recursively if the backreference can epsilon transit. */ if (subexp_len == 0 && state_log[cur_str_idx]->nodes.nelem > prev_nelem) - transit_state_bkref_loop (preg, input, new_dest_nodes, work_state_log, - state_log, mctx); + { + err = transit_state_bkref_loop (preg, input, new_dest_nodes, + work_state_log, state_log, mctx); + if (err != REG_NOERROR) + return err; + } } re_free (cur_regs); + return REG_NOERROR; } -/* Build transition table for the state. */ +/* Build transition table for the state. + Return the new table if succeeded, otherwise return NULL. */ static re_dfastate_t ** build_trtable (preg, state, fl_search) @@ -1524,6 +1666,7 @@ build_trtable (preg, state, fl_search) const re_dfastate_t *state; int fl_search; { + reg_errcode_t err; re_dfa_t *dfa = (re_dfa_t *) preg->buffer; int i, j, k, ch; int ndests; /* Number of the destination states from `state'. */ @@ -1541,15 +1684,18 @@ build_trtable (preg, state, fl_search) /* Initialize transiton table. */ trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); + if (dests_node == NULL || dests_ch == NULL || trtable == NULL) + return NULL; /* At first, group all nodes belonging to `state' into several destinations. */ ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch); - if (ndests == 0) + if (ndests <= 0) { re_free (dests_node); re_free (dests_ch); - return trtable; + /* Return NULL in case of an error, trtable otherwise. */ + return (ndests < 0) ? NULL : trtable; } dest_states = re_malloc (re_dfastate_t *, ndests); @@ -1557,7 +1703,11 @@ build_trtable (preg, state, fl_search) dest_states_nl = re_malloc (re_dfastate_t *, ndests); bitset_empty (acceptable); - re_node_set_alloc (&follows, ndests + 1); + err = re_node_set_alloc (&follows, ndests + 1); + if (dest_states == NULL || dest_states_word == NULL || dest_states_nl == NULL + || err != REG_NOERROR) + return NULL; + /* Then build the states for all destinations. */ for (i = 0; i < ndests; ++i) { @@ -1569,7 +1719,9 @@ build_trtable (preg, state, fl_search) next_node = dfa->nexts[dests_node[i].elems[j]]; if (next_node != -1) { - re_node_set_merge (&follows, dfa->eclosures + next_node); + err = re_node_set_merge (&follows, dfa->eclosures + next_node); + if (err != REG_NOERROR) + return NULL; } } /* If search flag is set, merge the initial state. */ @@ -1585,17 +1737,28 @@ build_trtable (preg, state, fl_search) } if (!not_initial) #endif - re_node_set_merge (&follows, dfa->init_state->entrance_nodes); + { + err = re_node_set_merge (&follows, + dfa->init_state->entrance_nodes); + if (err != REG_NOERROR) + return NULL; + } } - dest_states[i] = re_acquire_state_context (dfa, &follows, 0); + dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); + if (dest_states[i] == NULL && err != REG_NOERROR) + return NULL; /* If the new state has context constraint, build appropriate states for these contexts. */ if (dest_states[i]->has_constraint) { - dest_states_word[i] = re_acquire_state_context (dfa, &follows, + dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, CONTEXT_WORD); - dest_states_nl[i] = re_acquire_state_context (dfa, &follows, + if (dest_states_word[i] == NULL && err != REG_NOERROR) + return NULL; + dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, CONTEXT_NEWLINE); + if (dest_states_nl[i] == NULL && err != REG_NOERROR) + return NULL; } else { @@ -1654,6 +1817,7 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch) re_node_set *dests_node; bitset *dests_ch; { + reg_errcode_t err; const re_dfa_t *dfa = (re_dfa_t *) preg->buffer; int i, j, k; int ndests; /* Number of the destinations from `state'. */ @@ -1750,12 +1914,16 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch) { bitset_copy (dests_ch[ndests], remains); bitset_copy (dests_ch[j], intersec); - re_node_set_init_copy (dests_node + ndests, &dests_node[j]); + err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); + if (err != REG_NOERROR) + return -1; ++ndests; } /* Put the position in the current group. */ - re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); + err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); + if (err < 0) + return -1; /* If all characters are consumed, go to next node. */ if (!not_consumed) @@ -1765,7 +1933,9 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch) if (j == ndests) { bitset_copy (dests_ch[ndests], accepts); - re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); + err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); + if (err != REG_NOERROR) + return -1; ++ndests; bitset_empty (accepts); } @@ -2028,7 +2198,7 @@ check_node_accept (preg, node, input, idx, eflags) /* Functions for matching context. */ -static void +static reg_errcode_t match_ctx_init (mctx, eflags, n) re_match_context_t *mctx; int eflags; @@ -2037,12 +2207,17 @@ match_ctx_init (mctx, eflags, n) mctx->eflags = eflags; mctx->match_first = mctx->match_last = -1; if (n > 0) - mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); + { + mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); + if (mctx->bkref_ents == NULL) + return REG_ESPACE; + } else mctx->bkref_ents = NULL; mctx->nbkref_ents = 0; mctx->abkref_ents = n; mctx->max_bkref_len = 0; + return REG_NOERROR; } static void @@ -2054,7 +2229,7 @@ match_ctx_free (mctx) /* Add a new backreference entry to the cache. */ -static void +static reg_errcode_t match_ctx_add_entry (mctx, node, from, to) re_match_context_t *mctx; int node, from, to; @@ -2064,6 +2239,8 @@ match_ctx_add_entry (mctx, node, from, to) mctx->bkref_ents = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, mctx->abkref_ents * 2); + if (mctx->bkref_ents == NULL) + return REG_ESPACE; memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); mctx->abkref_ents *= 2; @@ -2073,4 +2250,5 @@ match_ctx_add_entry (mctx, node, from, to) mctx->bkref_ents[mctx->nbkref_ents++].to = to; if (mctx->max_bkref_len < to - from) mctx->max_bkref_len = to - from; + return REG_NOERROR; } -- 2.7.4