1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002-2006, 2010, 2011 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
20 static void re_string_construct_common (const char *str, int len,
22 RE_TRANSLATE_TYPE trans, int icase,
23 const re_dfa_t *dfa) internal_function;
24 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
25 const re_node_set *nodes,
26 unsigned int hash) internal_function;
27 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
28 const re_node_set *nodes,
30 unsigned int hash) internal_function;
33 #undef MAX /* safety */
35 MAX(size_t a, size_t b)
37 return (a > b ? a : b);
41 /* Functions for string operation. */
43 /* This function allocate the buffers. It is necessary to call
44 re_string_reconstruct before using the object. */
47 internal_function __attribute_warn_unused_result__
48 re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
49 RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
54 /* Ensure at least one character fits into the buffers. */
55 if (init_len < dfa->mb_cur_max)
56 init_len = dfa->mb_cur_max;
57 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
58 re_string_construct_common (str, len, pstr, trans, icase, dfa);
60 ret = re_string_realloc_buffers (pstr, init_buf_len);
61 if (BE (ret != REG_NOERROR, 0))
64 pstr->word_char = dfa->word_char;
65 pstr->word_ops_used = dfa->word_ops_used;
66 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
67 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
68 pstr->valid_raw_len = pstr->valid_len;
72 /* This function allocate the buffers, and initialize them. */
75 internal_function __attribute_warn_unused_result__
76 re_string_construct (re_string_t *pstr, const char *str, int len,
77 RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
80 memset (pstr, '\0', sizeof (re_string_t));
81 re_string_construct_common (str, len, pstr, trans, icase, dfa);
85 ret = re_string_realloc_buffers (pstr, len + 1);
86 if (BE (ret != REG_NOERROR, 0))
89 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
94 if (dfa->mb_cur_max > 1)
98 ret = build_wcs_upper_buffer (pstr);
99 if (BE (ret != REG_NOERROR, 0))
101 if (pstr->valid_raw_len >= len)
103 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
105 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
106 if (BE (ret != REG_NOERROR, 0))
111 #endif /* RE_ENABLE_I18N */
112 build_upper_buffer (pstr);
116 #ifdef RE_ENABLE_I18N
117 if (dfa->mb_cur_max > 1)
118 build_wcs_buffer (pstr);
120 #endif /* RE_ENABLE_I18N */
123 re_string_translate_buffer (pstr);
126 pstr->valid_len = pstr->bufs_len;
127 pstr->valid_raw_len = pstr->bufs_len;
135 /* Helper functions for re_string_allocate, and re_string_construct. */
138 internal_function __attribute_warn_unused_result__
139 re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
141 #ifdef RE_ENABLE_I18N
142 if (pstr->mb_cur_max > 1)
146 /* Avoid overflow in realloc. */
147 const size_t max_object_size = MAX (sizeof (wint_t), sizeof (int));
148 if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
151 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
152 if (BE (new_wcs == NULL, 0))
155 if (pstr->offsets != NULL)
157 int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
158 if (BE (new_offsets == NULL, 0))
160 pstr->offsets = new_offsets;
163 #endif /* RE_ENABLE_I18N */
164 if (pstr->mbs_allocated)
166 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
168 if (BE (new_mbs == NULL, 0))
172 pstr->bufs_len = new_buf_len;
179 re_string_construct_common (const char *str, int len, re_string_t *pstr,
180 RE_TRANSLATE_TYPE trans, int icase,
183 pstr->raw_mbs = (const unsigned char *) str;
187 pstr->icase = icase ? 1 : 0;
188 pstr->mbs_allocated = (trans != NULL || icase);
189 pstr->mb_cur_max = dfa->mb_cur_max;
190 pstr->is_utf8 = dfa->is_utf8;
191 pstr->map_notascii = dfa->map_notascii;
192 pstr->stop = pstr->len;
193 pstr->raw_stop = pstr->stop;
196 #ifdef RE_ENABLE_I18N
198 /* Build wide character buffer PSTR->WCS.
199 If the byte sequence of the string are:
200 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
201 Then wide character buffer will be:
202 <wc1> , WEOF , <wc2> , WEOF , <wc3>
203 We use WEOF for padding, they indicate that the position isn't
204 a first byte of a multibyte character.
206 Note that this function assumes PSTR->VALID_LEN elements are already
207 built and starts from PSTR->VALID_LEN. */
211 build_wcs_buffer (re_string_t *pstr)
214 unsigned char buf[MB_LEN_MAX];
215 assert (MB_LEN_MAX >= pstr->mb_cur_max);
217 unsigned char buf[64];
220 int byte_idx, end_idx, remain_len;
223 /* Build the buffers from pstr->valid_len to either pstr->len or
225 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
226 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
231 remain_len = end_idx - byte_idx;
232 prev_st = pstr->cur_state;
233 /* Apply the translation if we need. */
234 if (BE (pstr->trans != NULL, 0))
238 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
240 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
241 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
243 p = (const char *) buf;
246 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
247 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
248 if (BE (mbclen == (size_t) -1 || mbclen == 0
249 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
251 /* We treat these cases as a singlebyte character. */
253 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
254 if (BE (pstr->trans != NULL, 0))
255 wc = pstr->trans[wc];
256 pstr->cur_state = prev_st;
258 else if (BE (mbclen == (size_t) -2, 0))
260 /* The buffer doesn't have enough space, finish to build. */
261 pstr->cur_state = prev_st;
265 /* Write wide character and padding. */
266 pstr->wcs[byte_idx++] = wc;
267 /* Write paddings. */
268 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
269 pstr->wcs[byte_idx++] = WEOF;
271 pstr->valid_len = byte_idx;
272 pstr->valid_raw_len = byte_idx;
275 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
276 but for REG_ICASE. */
279 internal_function __attribute_warn_unused_result__
280 build_wcs_upper_buffer (re_string_t *pstr)
283 int src_idx, byte_idx, end_idx, remain_len;
286 char buf[MB_LEN_MAX];
287 assert (MB_LEN_MAX >= pstr->mb_cur_max);
292 byte_idx = pstr->valid_len;
293 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
295 /* The following optimization assumes that ASCII characters can be
296 mapped to wide characters with a simple cast. */
297 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
299 while (byte_idx < end_idx)
303 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
304 && mbsinit (&pstr->cur_state))
306 /* In case of a singlebyte character. */
308 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
309 /* The next step uses the assumption that wchar_t is encoded
310 ASCII-safe: all ASCII values can be converted like this. */
311 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
316 remain_len = end_idx - byte_idx;
317 prev_st = pstr->cur_state;
318 mbclen = __mbrtowc (&wc,
319 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
320 + byte_idx), remain_len, &pstr->cur_state);
321 if (BE (mbclen + 2 > 2, 1))
329 mbcdlen = wcrtomb (buf, wcu, &prev_st);
330 if (BE (mbclen == mbcdlen, 1))
331 memcpy (pstr->mbs + byte_idx, buf, mbclen);
339 memcpy (pstr->mbs + byte_idx,
340 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
341 pstr->wcs[byte_idx++] = wcu;
342 /* Write paddings. */
343 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
344 pstr->wcs[byte_idx++] = WEOF;
346 else if (mbclen == (size_t) -1 || mbclen == 0
347 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
349 /* It is an invalid character, an incomplete character
350 at the end of the string, or '\0'. Just use the byte. */
351 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
352 pstr->mbs[byte_idx] = ch;
353 /* And also cast it to wide char. */
354 pstr->wcs[byte_idx++] = (wchar_t) ch;
355 if (BE (mbclen == (size_t) -1, 0))
356 pstr->cur_state = prev_st;
360 /* The buffer doesn't have enough space, finish to build. */
361 pstr->cur_state = prev_st;
365 pstr->valid_len = byte_idx;
366 pstr->valid_raw_len = byte_idx;
370 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
375 remain_len = end_idx - byte_idx;
376 prev_st = pstr->cur_state;
377 if (BE (pstr->trans != NULL, 0))
381 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
383 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
384 buf[i] = pstr->trans[ch];
386 p = (const char *) buf;
389 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
390 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
391 if (BE (mbclen + 2 > 2, 1))
399 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
400 if (BE (mbclen == mbcdlen, 1))
401 memcpy (pstr->mbs + byte_idx, buf, mbclen);
402 else if (mbcdlen != (size_t) -1)
406 if (byte_idx + mbcdlen > pstr->bufs_len)
408 pstr->cur_state = prev_st;
412 if (pstr->offsets == NULL)
414 pstr->offsets = re_malloc (int, pstr->bufs_len);
416 if (pstr->offsets == NULL)
419 if (!pstr->offsets_needed)
421 for (i = 0; i < (size_t) byte_idx; ++i)
422 pstr->offsets[i] = i;
423 pstr->offsets_needed = 1;
426 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
427 pstr->wcs[byte_idx] = wcu;
428 pstr->offsets[byte_idx] = src_idx;
429 for (i = 1; i < mbcdlen; ++i)
431 pstr->offsets[byte_idx + i]
432 = src_idx + (i < mbclen ? i : mbclen - 1);
433 pstr->wcs[byte_idx + i] = WEOF;
435 pstr->len += mbcdlen - mbclen;
436 if (pstr->raw_stop > src_idx)
437 pstr->stop += mbcdlen - mbclen;
438 end_idx = (pstr->bufs_len > pstr->len)
439 ? pstr->len : pstr->bufs_len;
445 memcpy (pstr->mbs + byte_idx, p, mbclen);
448 memcpy (pstr->mbs + byte_idx, p, mbclen);
450 if (BE (pstr->offsets_needed != 0, 0))
453 for (i = 0; i < mbclen; ++i)
454 pstr->offsets[byte_idx + i] = src_idx + i;
458 pstr->wcs[byte_idx++] = wcu;
459 /* Write paddings. */
460 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
461 pstr->wcs[byte_idx++] = WEOF;
463 else if (mbclen == (size_t) -1 || mbclen == 0
464 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
466 /* It is an invalid character or '\0'. Just use the byte. */
467 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
469 if (BE (pstr->trans != NULL, 0))
470 ch = pstr->trans [ch];
471 pstr->mbs[byte_idx] = ch;
473 if (BE (pstr->offsets_needed != 0, 0))
474 pstr->offsets[byte_idx] = src_idx;
477 /* And also cast it to wide char. */
478 pstr->wcs[byte_idx++] = (wchar_t) ch;
479 if (BE (mbclen == (size_t) -1, 0))
480 pstr->cur_state = prev_st;
484 /* The buffer doesn't have enough space, finish to build. */
485 pstr->cur_state = prev_st;
489 pstr->valid_len = byte_idx;
490 pstr->valid_raw_len = src_idx;
494 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
499 re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
506 /* Skip the characters which are not necessary to check. */
507 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
508 rawbuf_idx < new_raw_idx;)
511 int remain_len = pstr->raw_len - rawbuf_idx;
512 prev_st = pstr->cur_state;
513 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
514 remain_len, &pstr->cur_state);
515 if (BE ((ssize_t) mbclen <= 0, 0))
517 /* We treat these cases as a single byte character. */
518 if (mbclen == 0 || remain_len == 0)
521 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
523 pstr->cur_state = prev_st;
527 /* Then proceed the next character. */
528 rawbuf_idx += mbclen;
530 *last_wc = (wint_t) wc;
533 #endif /* RE_ENABLE_I18N */
535 /* Build the buffer PSTR->MBS, and apply the translation if we need.
536 This function is used in case of REG_ICASE. */
540 build_upper_buffer (re_string_t *pstr)
542 int char_idx, end_idx;
543 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
545 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
547 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
548 if (BE (pstr->trans != NULL, 0))
549 ch = pstr->trans[ch];
551 pstr->mbs[char_idx] = toupper (ch);
553 pstr->mbs[char_idx] = ch;
555 pstr->valid_len = char_idx;
556 pstr->valid_raw_len = char_idx;
559 /* Apply TRANS to the buffer in PSTR. */
563 re_string_translate_buffer (re_string_t *pstr)
565 int buf_idx, end_idx;
566 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
568 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
570 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
571 pstr->mbs[buf_idx] = pstr->trans[ch];
574 pstr->valid_len = buf_idx;
575 pstr->valid_raw_len = buf_idx;
578 /* This function re-construct the buffers.
579 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
580 convert to upper case in case of REG_ICASE, apply translation. */
583 internal_function __attribute_warn_unused_result__
584 re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
586 int offset = idx - pstr->raw_mbs_idx;
587 if (BE (offset < 0, 0))
590 #ifdef RE_ENABLE_I18N
591 if (pstr->mb_cur_max > 1)
592 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
593 #endif /* RE_ENABLE_I18N */
594 pstr->len = pstr->raw_len;
595 pstr->stop = pstr->raw_stop;
597 pstr->raw_mbs_idx = 0;
598 pstr->valid_raw_len = 0;
599 pstr->offsets_needed = 0;
600 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
601 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
602 if (!pstr->mbs_allocated)
603 pstr->mbs = (unsigned char *) pstr->raw_mbs;
607 if (BE (offset != 0, 1))
609 /* Should the already checked characters be kept? */
610 if (BE (offset < pstr->valid_raw_len, 1))
612 /* Yes, move them to the front of the buffer. */
613 #ifdef RE_ENABLE_I18N
614 if (BE (pstr->offsets_needed, 0))
616 int low = 0, high = pstr->valid_len, mid;
619 mid = (high + low) / 2;
620 if (pstr->offsets[mid] > offset)
622 else if (pstr->offsets[mid] < offset)
628 if (pstr->offsets[mid] < offset)
630 pstr->tip_context = re_string_context_at (pstr, mid - 1,
632 /* This can be quite complicated, so handle specially
633 only the common and easy case where the character with
634 different length representation of lower and upper
635 case is present at or after offset. */
636 if (pstr->valid_len > offset
637 && mid == offset && pstr->offsets[mid] == offset)
639 memmove (pstr->wcs, pstr->wcs + offset,
640 (pstr->valid_len - offset) * sizeof (wint_t));
641 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
642 pstr->valid_len -= offset;
643 pstr->valid_raw_len -= offset;
644 for (low = 0; low < pstr->valid_len; low++)
645 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
649 /* Otherwise, just find out how long the partial multibyte
650 character at offset is and fill it with WEOF/255. */
651 pstr->len = pstr->raw_len - idx + offset;
652 pstr->stop = pstr->raw_stop - idx + offset;
653 pstr->offsets_needed = 0;
654 while (mid > 0 && pstr->offsets[mid - 1] == offset)
656 while (mid < pstr->valid_len)
657 if (pstr->wcs[mid] != WEOF)
661 if (mid == pstr->valid_len)
665 pstr->valid_len = pstr->offsets[mid] - offset;
668 for (low = 0; low < pstr->valid_len; ++low)
669 pstr->wcs[low] = WEOF;
670 memset (pstr->mbs, 255, pstr->valid_len);
673 pstr->valid_raw_len = pstr->valid_len;
679 pstr->tip_context = re_string_context_at (pstr, offset - 1,
681 #ifdef RE_ENABLE_I18N
682 if (pstr->mb_cur_max > 1)
683 memmove (pstr->wcs, pstr->wcs + offset,
684 (pstr->valid_len - offset) * sizeof (wint_t));
685 #endif /* RE_ENABLE_I18N */
686 if (BE (pstr->mbs_allocated, 0))
687 memmove (pstr->mbs, pstr->mbs + offset,
688 pstr->valid_len - offset);
689 pstr->valid_len -= offset;
690 pstr->valid_raw_len -= offset;
692 assert (pstr->valid_len > 0);
698 #ifdef RE_ENABLE_I18N
699 /* No, skip all characters until IDX. */
700 int prev_valid_len = pstr->valid_len;
702 if (BE (pstr->offsets_needed, 0))
704 pstr->len = pstr->raw_len - idx + offset;
705 pstr->stop = pstr->raw_stop - idx + offset;
706 pstr->offsets_needed = 0;
710 #ifdef RE_ENABLE_I18N
711 if (pstr->mb_cur_max > 1)
718 const unsigned char *raw, *p, *end;
720 /* Special case UTF-8. Multi-byte chars start with any
721 byte other than 0x80 - 0xbf. */
722 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
723 end = raw + (offset - pstr->mb_cur_max);
724 if (end < pstr->raw_mbs)
726 p = raw + offset - 1;
728 /* We know the wchar_t encoding is UCS4, so for the simple
729 case, ASCII characters, skip the conversion step. */
730 if (isascii (*p) && BE (pstr->trans == NULL, 1))
732 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
733 /* pstr->valid_len = 0; */
738 for (; p >= end; --p)
739 if ((*p & 0xc0) != 0x80)
743 int mlen = raw + pstr->len - p;
744 unsigned char buf[6];
747 const unsigned char *pp = p;
748 if (BE (pstr->trans != NULL, 0))
750 int i = mlen < 6 ? mlen : 6;
752 buf[i] = pstr->trans[p[i]];
755 /* XXX Don't use mbrtowc, we know which conversion
756 to use (UTF-8 -> UCS4). */
757 memset (&cur_state, 0, sizeof (cur_state));
758 mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
760 if (raw + offset - p <= mbclen
761 && mbclen < (size_t) -2)
763 memset (&pstr->cur_state, '\0',
765 pstr->valid_len = mbclen - (raw + offset - p);
773 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
776 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
778 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
779 && IS_WIDE_WORD_CHAR (wc))
781 : ((IS_WIDE_NEWLINE (wc)
782 && pstr->newline_anchor)
783 ? CONTEXT_NEWLINE : 0));
784 if (BE (pstr->valid_len, 0))
786 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
787 pstr->wcs[wcs_idx] = WEOF;
788 if (pstr->mbs_allocated)
789 memset (pstr->mbs, 255, pstr->valid_len);
791 pstr->valid_raw_len = pstr->valid_len;
794 #endif /* RE_ENABLE_I18N */
796 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
797 pstr->valid_raw_len = 0;
800 pstr->tip_context = (bitset_contain (pstr->word_char, c)
802 : ((IS_NEWLINE (c) && pstr->newline_anchor)
803 ? CONTEXT_NEWLINE : 0));
806 if (!BE (pstr->mbs_allocated, 0))
809 pstr->raw_mbs_idx = idx;
811 pstr->stop -= offset;
813 /* Then build the buffers. */
814 #ifdef RE_ENABLE_I18N
815 if (pstr->mb_cur_max > 1)
819 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
820 if (BE (ret != REG_NOERROR, 0))
824 build_wcs_buffer (pstr);
827 #endif /* RE_ENABLE_I18N */
828 if (BE (pstr->mbs_allocated, 0))
831 build_upper_buffer (pstr);
832 else if (pstr->trans != NULL)
833 re_string_translate_buffer (pstr);
836 pstr->valid_len = pstr->len;
843 internal_function __attribute ((pure))
844 re_string_peek_byte_case (const re_string_t *pstr, int idx)
848 /* Handle the common (easiest) cases first. */
849 if (BE (!pstr->mbs_allocated, 1))
850 return re_string_peek_byte (pstr, idx);
852 #ifdef RE_ENABLE_I18N
853 if (pstr->mb_cur_max > 1
854 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
855 return re_string_peek_byte (pstr, idx);
858 off = pstr->cur_idx + idx;
859 #ifdef RE_ENABLE_I18N
860 if (pstr->offsets_needed)
861 off = pstr->offsets[off];
864 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
866 #ifdef RE_ENABLE_I18N
867 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
868 this function returns CAPITAL LETTER I instead of first byte of
869 DOTLESS SMALL LETTER I. The latter would confuse the parser,
870 since peek_byte_case doesn't advance cur_idx in any way. */
871 if (pstr->offsets_needed && !isascii (ch))
872 return re_string_peek_byte (pstr, idx);
880 re_string_fetch_byte_case (re_string_t *pstr)
882 if (BE (!pstr->mbs_allocated, 1))
883 return re_string_fetch_byte (pstr);
885 #ifdef RE_ENABLE_I18N
886 if (pstr->offsets_needed)
890 /* For tr_TR.UTF-8 [[:islower:]] there is
891 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
892 in that case the whole multi-byte character and return
893 the original letter. On the other side, with
894 [[: DOTLESS SMALL LETTER I return [[:I, as doing
895 anything else would complicate things too much. */
897 if (!re_string_first_byte (pstr, pstr->cur_idx))
898 return re_string_fetch_byte (pstr);
900 off = pstr->offsets[pstr->cur_idx];
901 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
904 return re_string_fetch_byte (pstr);
906 re_string_skip_bytes (pstr,
907 re_string_char_size_at (pstr, pstr->cur_idx));
912 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
917 re_string_destruct (re_string_t *pstr)
919 #ifdef RE_ENABLE_I18N
921 re_free (pstr->offsets);
922 #endif /* RE_ENABLE_I18N */
923 if (pstr->mbs_allocated)
927 /* Return the context at IDX in INPUT. */
931 re_string_context_at (const re_string_t *input, int idx, int eflags)
935 /* In this case, we use the value stored in input->tip_context,
936 since we can't know the character in input->mbs[-1] here. */
937 return input->tip_context;
938 if (BE (idx == input->len, 0))
939 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
940 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
941 #ifdef RE_ENABLE_I18N
942 if (input->mb_cur_max > 1)
946 while(input->wcs[wc_idx] == WEOF)
949 /* It must not happen. */
950 assert (wc_idx >= 0);
954 return input->tip_context;
956 wc = input->wcs[wc_idx];
957 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
959 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
960 ? CONTEXT_NEWLINE : 0);
965 c = re_string_byte_at (input, idx);
966 if (bitset_contain (input->word_char, c))
968 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
972 /* Functions for set operation. */
975 internal_function __attribute_warn_unused_result__
976 re_node_set_alloc (re_node_set *set, int size)
979 * ADR: valgrind says size can be 0, which then doesn't
980 * free the block of size 0. Harumph. This seems
981 * to work ok, though.
985 memset(set, 0, sizeof(*set));
990 set->elems = re_malloc (int, size);
991 if (BE (set->elems == NULL, 0))
997 internal_function __attribute_warn_unused_result__
998 re_node_set_init_1 (re_node_set *set, int elem)
1002 set->elems = re_malloc (int, 1);
1003 if (BE (set->elems == NULL, 0))
1005 set->alloc = set->nelem = 0;
1008 set->elems[0] = elem;
1012 static reg_errcode_t
1013 internal_function __attribute_warn_unused_result__
1014 re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
1017 set->elems = re_malloc (int, 2);
1018 if (BE (set->elems == NULL, 0))
1023 set->elems[0] = elem1;
1030 set->elems[0] = elem1;
1031 set->elems[1] = elem2;
1035 set->elems[0] = elem2;
1036 set->elems[1] = elem1;
1042 static reg_errcode_t
1043 internal_function __attribute_warn_unused_result__
1044 re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1046 dest->nelem = src->nelem;
1049 dest->alloc = dest->nelem;
1050 dest->elems = re_malloc (int, dest->alloc);
1051 if (BE (dest->elems == NULL, 0))
1053 dest->alloc = dest->nelem = 0;
1056 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1059 re_node_set_init_empty (dest);
1063 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1064 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1065 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1067 static reg_errcode_t
1068 internal_function __attribute_warn_unused_result__
1069 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1070 const re_node_set *src2)
1072 int i1, i2, is, id, delta, sbase;
1073 if (src1->nelem == 0 || src2->nelem == 0)
1076 /* We need dest->nelem + 2 * elems_in_intersection; this is a
1077 conservative estimate. */
1078 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1080 int new_alloc = src1->nelem + src2->nelem + dest->alloc;
1081 int *new_elems = re_realloc (dest->elems, int, new_alloc);
1082 if (BE (new_elems == NULL, 0))
1084 dest->elems = new_elems;
1085 dest->alloc = new_alloc;
1088 /* Find the items in the intersection of SRC1 and SRC2, and copy
1089 into the top of DEST those that are not already in DEST itself. */
1090 sbase = dest->nelem + src1->nelem + src2->nelem;
1091 i1 = src1->nelem - 1;
1092 i2 = src2->nelem - 1;
1093 id = dest->nelem - 1;
1096 if (src1->elems[i1] == src2->elems[i2])
1098 /* Try to find the item in DEST. Maybe we could binary search? */
1099 while (id >= 0 && dest->elems[id] > src1->elems[i1])
1102 if (id < 0 || dest->elems[id] != src1->elems[i1])
1103 dest->elems[--sbase] = src1->elems[i1];
1105 if (--i1 < 0 || --i2 < 0)
1109 /* Lower the highest of the two items. */
1110 else if (src1->elems[i1] < src2->elems[i2])
1122 id = dest->nelem - 1;
1123 is = dest->nelem + src1->nelem + src2->nelem - 1;
1124 delta = is - sbase + 1;
1126 /* Now copy. When DELTA becomes zero, the remaining
1127 DEST elements are already in place; this is more or
1128 less the same loop that is in re_node_set_merge. */
1129 dest->nelem += delta;
1130 if (delta > 0 && id >= 0)
1133 if (dest->elems[is] > dest->elems[id])
1135 /* Copy from the top. */
1136 dest->elems[id + delta--] = dest->elems[is--];
1142 /* Slide from the bottom. */
1143 dest->elems[id + delta] = dest->elems[id];
1149 /* Copy remaining SRC elements. */
1150 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
1155 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1156 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1158 static reg_errcode_t
1159 internal_function __attribute_warn_unused_result__
1160 re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1161 const re_node_set *src2)
1164 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1166 dest->alloc = src1->nelem + src2->nelem;
1167 dest->elems = re_malloc (int, dest->alloc);
1168 if (BE (dest->elems == NULL, 0))
1173 if (src1 != NULL && src1->nelem > 0)
1174 return re_node_set_init_copy (dest, src1);
1175 else if (src2 != NULL && src2->nelem > 0)
1176 return re_node_set_init_copy (dest, src2);
1178 re_node_set_init_empty (dest);
1181 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1183 if (src1->elems[i1] > src2->elems[i2])
1185 dest->elems[id++] = src2->elems[i2++];
1188 if (src1->elems[i1] == src2->elems[i2])
1190 dest->elems[id++] = src1->elems[i1++];
1192 if (i1 < src1->nelem)
1194 memcpy (dest->elems + id, src1->elems + i1,
1195 (src1->nelem - i1) * sizeof (int));
1196 id += src1->nelem - i1;
1198 else if (i2 < src2->nelem)
1200 memcpy (dest->elems + id, src2->elems + i2,
1201 (src2->nelem - i2) * sizeof (int));
1202 id += src2->nelem - i2;
1208 /* Calculate the union set of the sets DEST and SRC. And store it to
1209 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1211 static reg_errcode_t
1212 internal_function __attribute_warn_unused_result__
1213 re_node_set_merge (re_node_set *dest, const re_node_set *src)
1215 int is, id, sbase, delta;
1216 if (src == NULL || src->nelem == 0)
1218 if (dest->alloc < 2 * src->nelem + dest->nelem)
1220 int new_alloc = 2 * (src->nelem + dest->alloc);
1221 int *new_buffer = re_realloc (dest->elems, int, new_alloc);
1222 if (BE (new_buffer == NULL, 0))
1224 dest->elems = new_buffer;
1225 dest->alloc = new_alloc;
1228 if (BE (dest->nelem == 0, 0))
1230 dest->nelem = src->nelem;
1231 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1235 /* Copy into the top of DEST the items of SRC that are not
1236 found in DEST. Maybe we could binary search in DEST? */
1237 for (sbase = dest->nelem + 2 * src->nelem,
1238 is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
1240 if (dest->elems[id] == src->elems[is])
1242 else if (dest->elems[id] < src->elems[is])
1243 dest->elems[--sbase] = src->elems[is--];
1244 else /* if (dest->elems[id] > src->elems[is]) */
1250 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1252 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
1255 id = dest->nelem - 1;
1256 is = dest->nelem + 2 * src->nelem - 1;
1257 delta = is - sbase + 1;
1261 /* Now copy. When DELTA becomes zero, the remaining
1262 DEST elements are already in place. */
1263 dest->nelem += delta;
1266 if (dest->elems[is] > dest->elems[id])
1268 /* Copy from the top. */
1269 dest->elems[id + delta--] = dest->elems[is--];
1275 /* Slide from the bottom. */
1276 dest->elems[id + delta] = dest->elems[id];
1279 /* Copy remaining SRC elements. */
1280 memcpy (dest->elems, dest->elems + sbase,
1281 delta * sizeof (int));
1290 /* Insert the new element ELEM to the re_node_set* SET.
1291 SET should not already have ELEM.
1292 return -1 if an error is occured, return 1 otherwise. */
1295 internal_function __attribute_warn_unused_result__
1296 re_node_set_insert (re_node_set *set, int elem)
1299 /* In case the set is empty. */
1300 if (set->alloc == 0)
1302 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
1308 if (BE (set->nelem, 0) == 0)
1310 /* We already guaranteed above that set->alloc != 0. */
1311 set->elems[0] = elem;
1316 /* Realloc if we need. */
1317 if (set->alloc == set->nelem)
1320 set->alloc = set->alloc * 2;
1321 new_elems = re_realloc (set->elems, int, set->alloc);
1322 if (BE (new_elems == NULL, 0))
1324 set->elems = new_elems;
1327 /* Move the elements which follows the new element. Test the
1328 first element separately to skip a check in the inner loop. */
1329 if (elem < set->elems[0])
1332 for (idx = set->nelem; idx > 0; idx--)
1333 set->elems[idx] = set->elems[idx - 1];
1337 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1338 set->elems[idx] = set->elems[idx - 1];
1341 /* Insert the new element. */
1342 set->elems[idx] = elem;
1347 /* Insert the new element ELEM to the re_node_set* SET.
1348 SET should not already have any element greater than or equal to ELEM.
1349 Return -1 if an error is occured, return 1 otherwise. */
1352 internal_function __attribute_warn_unused_result__
1353 re_node_set_insert_last (re_node_set *set, int elem)
1355 /* Realloc if we need. */
1356 if (set->alloc == set->nelem)
1359 set->alloc = (set->alloc + 1) * 2;
1360 new_elems = re_realloc (set->elems, int, set->alloc);
1361 if (BE (new_elems == NULL, 0))
1363 set->elems = new_elems;
1366 /* Insert the new element. */
1367 set->elems[set->nelem++] = elem;
1371 /* Compare two node sets SET1 and SET2.
1372 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
1375 internal_function __attribute ((pure))
1376 re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1379 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1381 for (i = set1->nelem ; --i >= 0 ; )
1382 if (set1->elems[i] != set2->elems[i])
1387 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1390 internal_function __attribute ((pure))
1391 re_node_set_contains (const re_node_set *set, int elem)
1393 unsigned int idx, right, mid;
1394 if (set->nelem <= 0)
1397 /* Binary search the element. */
1399 right = set->nelem - 1;
1402 mid = (idx + right) / 2;
1403 if (set->elems[mid] < elem)
1408 return set->elems[idx] == elem ? idx + 1 : 0;
1413 re_node_set_remove_at (re_node_set *set, int idx)
1415 if (idx < 0 || idx >= set->nelem)
1418 for (; idx < set->nelem; idx++)
1419 set->elems[idx] = set->elems[idx + 1];
1423 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1424 Or return -1, if an error will be occured. */
1428 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1430 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1432 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1433 int *new_nexts, *new_indices;
1434 re_node_set *new_edests, *new_eclosures;
1435 re_token_t *new_nodes;
1437 /* Avoid overflows in realloc. */
1438 const size_t max_object_size = MAX (sizeof (re_token_t),
1439 MAX (sizeof (re_node_set),
1441 if (BE (SIZE_MAX / max_object_size < new_nodes_alloc, 0))
1444 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1445 if (BE (new_nodes == NULL, 0))
1447 dfa->nodes = new_nodes;
1448 new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
1449 new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
1450 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1451 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1452 if (BE (new_nexts == NULL || new_indices == NULL
1453 || new_edests == NULL || new_eclosures == NULL, 0))
1455 dfa->nexts = new_nexts;
1456 dfa->org_indices = new_indices;
1457 dfa->edests = new_edests;
1458 dfa->eclosures = new_eclosures;
1459 dfa->nodes_alloc = new_nodes_alloc;
1461 dfa->nodes[dfa->nodes_len] = token;
1462 dfa->nodes[dfa->nodes_len].constraint = 0;
1463 #ifdef RE_ENABLE_I18N
1464 dfa->nodes[dfa->nodes_len].accept_mb =
1465 (token.type == OP_PERIOD && dfa->mb_cur_max > 1) || token.type == COMPLEX_BRACKET;
1467 dfa->nexts[dfa->nodes_len] = -1;
1468 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1469 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1470 return dfa->nodes_len++;
1473 static inline unsigned int
1475 calc_state_hash (const re_node_set *nodes, unsigned int context)
1477 unsigned int hash = nodes->nelem + context;
1479 for (i = 0 ; i < nodes->nelem ; i++)
1480 hash += nodes->elems[i];
1484 /* Search for the state whose node_set is equivalent to NODES.
1485 Return the pointer to the state, if we found it in the DFA.
1486 Otherwise create the new one and return it. In case of an error
1487 return NULL and set the error code in ERR.
1488 Note: - We assume NULL as the invalid state, then it is possible that
1489 return value is NULL and ERR is REG_NOERROR.
1490 - We never return non-NULL value in case of any errors, it is for
1493 static re_dfastate_t *
1494 internal_function __attribute_warn_unused_result__
1495 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1496 const re_node_set *nodes)
1499 re_dfastate_t *new_state;
1500 struct re_state_table_entry *spot;
1502 if (BE (nodes->nelem == 0, 0))
1507 hash = calc_state_hash (nodes, 0);
1508 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1510 for (i = 0 ; i < spot->num ; i++)
1512 re_dfastate_t *state = spot->array[i];
1513 if (hash != state->hash)
1515 if (re_node_set_compare (&state->nodes, nodes))
1519 /* There are no appropriate state in the dfa, create the new one. */
1520 new_state = create_ci_newstate (dfa, nodes, hash);
1521 if (BE (new_state == NULL, 0))
1527 /* Search for the state whose node_set is equivalent to NODES and
1528 whose context is equivalent to CONTEXT.
1529 Return the pointer to the state, if we found it in the DFA.
1530 Otherwise create the new one and return it. In case of an error
1531 return NULL and set the error code in ERR.
1532 Note: - We assume NULL as the invalid state, then it is possible that
1533 return value is NULL and ERR is REG_NOERROR.
1534 - We never return non-NULL value in case of any errors, it is for
1537 static re_dfastate_t *
1538 internal_function __attribute_warn_unused_result__
1539 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1540 const re_node_set *nodes, unsigned int context)
1543 re_dfastate_t *new_state;
1544 struct re_state_table_entry *spot;
1546 if (nodes->nelem == 0)
1551 hash = calc_state_hash (nodes, context);
1552 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1554 for (i = 0 ; i < spot->num ; i++)
1556 re_dfastate_t *state = spot->array[i];
1557 if (state->hash == hash
1558 && state->context == context
1559 && re_node_set_compare (state->entrance_nodes, nodes))
1562 /* There are no appropriate state in `dfa', create the new one. */
1563 new_state = create_cd_newstate (dfa, nodes, context, hash);
1564 if (BE (new_state == NULL, 0))
1570 /* Finish initialization of the new state NEWSTATE, and using its hash value
1571 HASH put in the appropriate bucket of DFA's state table. Return value
1572 indicates the error code if failed. */
1574 static reg_errcode_t
1575 __attribute_warn_unused_result__
1576 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1579 struct re_state_table_entry *spot;
1583 newstate->hash = hash;
1584 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1585 if (BE (err != REG_NOERROR, 0))
1587 for (i = 0; i < newstate->nodes.nelem; i++)
1589 int elem = newstate->nodes.elems[i];
1590 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1591 if (re_node_set_insert_last (&newstate->non_eps_nodes, elem) < 0)
1595 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1596 if (BE (spot->alloc <= spot->num, 0))
1598 int new_alloc = 2 * spot->num + 2;
1599 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1601 if (BE (new_array == NULL, 0))
1603 spot->array = new_array;
1604 spot->alloc = new_alloc;
1606 spot->array[spot->num++] = newstate;
1611 free_state (re_dfastate_t *state)
1613 re_node_set_free (&state->non_eps_nodes);
1614 re_node_set_free (&state->inveclosure);
1615 if (state->entrance_nodes != &state->nodes)
1617 re_node_set_free (state->entrance_nodes);
1618 re_free (state->entrance_nodes);
1620 re_node_set_free (&state->nodes);
1621 re_free (state->word_trtable);
1622 re_free (state->trtable);
1626 /* Create the new state which is independ of contexts.
1627 Return the new state if succeeded, otherwise return NULL. */
1629 static re_dfastate_t *
1630 internal_function __attribute_warn_unused_result__
1631 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1636 re_dfastate_t *newstate;
1638 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1639 if (BE (newstate == NULL, 0))
1641 err = re_node_set_init_copy (&newstate->nodes, nodes);
1642 if (BE (err != REG_NOERROR, 0))
1648 newstate->entrance_nodes = &newstate->nodes;
1649 for (i = 0 ; i < nodes->nelem ; i++)
1651 re_token_t *node = dfa->nodes + nodes->elems[i];
1652 re_token_type_t type = node->type;
1653 if (type == CHARACTER && !node->constraint)
1655 #ifdef RE_ENABLE_I18N
1656 newstate->accept_mb |= node->accept_mb;
1657 #endif /* RE_ENABLE_I18N */
1659 /* If the state has the halt node, the state is a halt state. */
1660 if (type == END_OF_RE)
1662 else if (type == OP_BACK_REF)
1663 newstate->has_backref = 1;
1664 else if (type == ANCHOR || node->constraint)
1665 newstate->has_constraint = 1;
1667 err = register_state (dfa, newstate, hash);
1668 if (BE (err != REG_NOERROR, 0))
1670 free_state (newstate);
1676 /* Create the new state which is depend on the context CONTEXT.
1677 Return the new state if succeeded, otherwise return NULL. */
1679 static re_dfastate_t *
1680 internal_function __attribute_warn_unused_result__
1681 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1682 unsigned int context, unsigned int hash)
1684 int i, nctx_nodes = 0;
1686 re_dfastate_t *newstate;
1688 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1689 if (BE (newstate == NULL, 0))
1691 err = re_node_set_init_copy (&newstate->nodes, nodes);
1692 if (BE (err != REG_NOERROR, 0))
1698 newstate->context = context;
1699 newstate->entrance_nodes = &newstate->nodes;
1701 for (i = 0 ; i < nodes->nelem ; i++)
1703 re_token_t *node = dfa->nodes + nodes->elems[i];
1704 re_token_type_t type = node->type;
1705 unsigned int constraint = node->constraint;
1707 if (type == CHARACTER && !constraint)
1709 #ifdef RE_ENABLE_I18N
1710 newstate->accept_mb |= node->accept_mb;
1711 #endif /* RE_ENABLE_I18N */
1713 /* If the state has the halt node, the state is a halt state. */
1714 if (type == END_OF_RE)
1716 else if (type == OP_BACK_REF)
1717 newstate->has_backref = 1;
1721 if (newstate->entrance_nodes == &newstate->nodes)
1723 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1724 if (BE (newstate->entrance_nodes == NULL, 0))
1726 free_state (newstate);
1729 if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1733 newstate->has_constraint = 1;
1736 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1738 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1743 err = register_state (dfa, newstate, hash);
1744 if (BE (err != REG_NOERROR, 0))
1746 free_state (newstate);