1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002-2006, 2010 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 static void re_string_construct_common (const char *str, int len,
23 RE_TRANSLATE_TYPE trans, int icase,
24 const re_dfa_t *dfa) internal_function;
25 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
26 const re_node_set *nodes,
27 unsigned int hash) internal_function;
28 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
29 const re_node_set *nodes,
31 unsigned int hash) internal_function;
33 /* Functions for string operation. */
35 /* This function allocate the buffers. It is necessary to call
36 re_string_reconstruct before using the object. */
39 internal_function __attribute_warn_unused_result__
40 re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
41 RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
46 /* Ensure at least one character fits into the buffers. */
47 if (init_len < dfa->mb_cur_max)
48 init_len = dfa->mb_cur_max;
49 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
50 re_string_construct_common (str, len, pstr, trans, icase, dfa);
52 ret = re_string_realloc_buffers (pstr, init_buf_len);
53 if (BE (ret != REG_NOERROR, 0))
56 pstr->word_char = dfa->word_char;
57 pstr->word_ops_used = dfa->word_ops_used;
58 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
59 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
60 pstr->valid_raw_len = pstr->valid_len;
64 /* This function allocate the buffers, and initialize them. */
67 internal_function __attribute_warn_unused_result__
68 re_string_construct (re_string_t *pstr, const char *str, int len,
69 RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
72 memset (pstr, '\0', sizeof (re_string_t));
73 re_string_construct_common (str, len, pstr, trans, icase, dfa);
77 ret = re_string_realloc_buffers (pstr, len + 1);
78 if (BE (ret != REG_NOERROR, 0))
81 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
86 if (dfa->mb_cur_max > 1)
90 ret = build_wcs_upper_buffer (pstr);
91 if (BE (ret != REG_NOERROR, 0))
93 if (pstr->valid_raw_len >= len)
95 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
97 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
98 if (BE (ret != REG_NOERROR, 0))
103 #endif /* RE_ENABLE_I18N */
104 build_upper_buffer (pstr);
108 #ifdef RE_ENABLE_I18N
109 if (dfa->mb_cur_max > 1)
110 build_wcs_buffer (pstr);
112 #endif /* RE_ENABLE_I18N */
115 re_string_translate_buffer (pstr);
118 pstr->valid_len = pstr->bufs_len;
119 pstr->valid_raw_len = pstr->bufs_len;
127 /* Helper functions for re_string_allocate, and re_string_construct. */
130 internal_function __attribute_warn_unused_result__
131 re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
133 #ifdef RE_ENABLE_I18N
134 if (pstr->mb_cur_max > 1)
138 /* Avoid overflow in realloc. */
139 const size_t max_object_size = MAX (sizeof (wint_t), sizeof (int));
140 if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
143 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
144 if (BE (new_wcs == NULL, 0))
147 if (pstr->offsets != NULL)
149 int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
150 if (BE (new_offsets == NULL, 0))
152 pstr->offsets = new_offsets;
155 #endif /* RE_ENABLE_I18N */
156 if (pstr->mbs_allocated)
158 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
160 if (BE (new_mbs == NULL, 0))
164 pstr->bufs_len = new_buf_len;
171 re_string_construct_common (const char *str, int len, re_string_t *pstr,
172 RE_TRANSLATE_TYPE trans, int icase,
175 pstr->raw_mbs = (const unsigned char *) str;
179 pstr->icase = icase ? 1 : 0;
180 pstr->mbs_allocated = (trans != NULL || icase);
181 pstr->mb_cur_max = dfa->mb_cur_max;
182 pstr->is_utf8 = dfa->is_utf8;
183 pstr->map_notascii = dfa->map_notascii;
184 pstr->stop = pstr->len;
185 pstr->raw_stop = pstr->stop;
188 #ifdef RE_ENABLE_I18N
190 /* Build wide character buffer PSTR->WCS.
191 If the byte sequence of the string are:
192 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
193 Then wide character buffer will be:
194 <wc1> , WEOF , <wc2> , WEOF , <wc3>
195 We use WEOF for padding, they indicate that the position isn't
196 a first byte of a multibyte character.
198 Note that this function assumes PSTR->VALID_LEN elements are already
199 built and starts from PSTR->VALID_LEN. */
203 build_wcs_buffer (re_string_t *pstr)
206 unsigned char buf[MB_LEN_MAX];
207 assert (MB_LEN_MAX >= pstr->mb_cur_max);
209 unsigned char buf[64];
212 int byte_idx, end_idx, remain_len;
215 /* Build the buffers from pstr->valid_len to either pstr->len or
217 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
218 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
223 remain_len = end_idx - byte_idx;
224 prev_st = pstr->cur_state;
225 /* Apply the translation if we need. */
226 if (BE (pstr->trans != NULL, 0))
230 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
232 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
233 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
235 p = (const char *) buf;
238 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
239 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
240 if (BE (mbclen == (size_t) -2, 0))
242 /* The buffer doesn't have enough space, finish to build. */
243 pstr->cur_state = prev_st;
246 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
248 /* We treat these cases as a singlebyte character. */
250 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
251 if (BE (pstr->trans != NULL, 0))
252 wc = pstr->trans[wc];
253 pstr->cur_state = prev_st;
256 /* Write wide character and padding. */
257 pstr->wcs[byte_idx++] = wc;
258 /* Write paddings. */
259 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
260 pstr->wcs[byte_idx++] = WEOF;
262 pstr->valid_len = byte_idx;
263 pstr->valid_raw_len = byte_idx;
266 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
267 but for REG_ICASE. */
270 internal_function __attribute_warn_unused_result__
271 build_wcs_upper_buffer (re_string_t *pstr)
274 int src_idx, byte_idx, end_idx, remain_len;
277 char buf[MB_LEN_MAX];
278 assert (MB_LEN_MAX >= pstr->mb_cur_max);
283 byte_idx = pstr->valid_len;
284 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
286 /* The following optimization assumes that ASCII characters can be
287 mapped to wide characters with a simple cast. */
288 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
290 while (byte_idx < end_idx)
294 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
295 && mbsinit (&pstr->cur_state))
297 /* In case of a singlebyte character. */
299 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
300 /* The next step uses the assumption that wchar_t is encoded
301 ASCII-safe: all ASCII values can be converted like this. */
302 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
307 remain_len = end_idx - byte_idx;
308 prev_st = pstr->cur_state;
309 mbclen = __mbrtowc (&wc,
310 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
311 + byte_idx), remain_len, &pstr->cur_state);
312 if (BE (mbclen + 2 > 2, 1))
320 mbcdlen = wcrtomb (buf, wcu, &prev_st);
321 if (BE (mbclen == mbcdlen, 1))
322 memcpy (pstr->mbs + byte_idx, buf, mbclen);
330 memcpy (pstr->mbs + byte_idx,
331 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
332 pstr->wcs[byte_idx++] = wcu;
333 /* Write paddings. */
334 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
335 pstr->wcs[byte_idx++] = WEOF;
337 else if (mbclen == (size_t) -1 || mbclen == 0)
339 /* It is an invalid character or '\0'. Just use the byte. */
340 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
341 pstr->mbs[byte_idx] = ch;
342 /* And also cast it to wide char. */
343 pstr->wcs[byte_idx++] = (wchar_t) ch;
344 if (BE (mbclen == (size_t) -1, 0))
345 pstr->cur_state = prev_st;
349 /* The buffer doesn't have enough space, finish to build. */
350 pstr->cur_state = prev_st;
354 pstr->valid_len = byte_idx;
355 pstr->valid_raw_len = byte_idx;
359 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
364 remain_len = end_idx - byte_idx;
365 prev_st = pstr->cur_state;
366 if (BE (pstr->trans != NULL, 0))
370 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
372 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
373 buf[i] = pstr->trans[ch];
375 p = (const char *) buf;
378 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
379 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
380 if (BE (mbclen + 2 > 2, 1))
388 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
389 if (BE (mbclen == mbcdlen, 1))
390 memcpy (pstr->mbs + byte_idx, buf, mbclen);
391 else if (mbcdlen != (size_t) -1)
395 if (byte_idx + mbcdlen > pstr->bufs_len)
397 pstr->cur_state = prev_st;
401 if (pstr->offsets == NULL)
403 pstr->offsets = re_malloc (int, pstr->bufs_len);
405 if (pstr->offsets == NULL)
408 if (!pstr->offsets_needed)
410 for (i = 0; i < (size_t) byte_idx; ++i)
411 pstr->offsets[i] = i;
412 pstr->offsets_needed = 1;
415 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
416 pstr->wcs[byte_idx] = wcu;
417 pstr->offsets[byte_idx] = src_idx;
418 for (i = 1; i < mbcdlen; ++i)
420 pstr->offsets[byte_idx + i]
421 = src_idx + (i < mbclen ? i : mbclen - 1);
422 pstr->wcs[byte_idx + i] = WEOF;
424 pstr->len += mbcdlen - mbclen;
425 if (pstr->raw_stop > src_idx)
426 pstr->stop += mbcdlen - mbclen;
427 end_idx = (pstr->bufs_len > pstr->len)
428 ? pstr->len : pstr->bufs_len;
434 memcpy (pstr->mbs + byte_idx, p, mbclen);
437 memcpy (pstr->mbs + byte_idx, p, mbclen);
439 if (BE (pstr->offsets_needed != 0, 0))
442 for (i = 0; i < mbclen; ++i)
443 pstr->offsets[byte_idx + i] = src_idx + i;
447 pstr->wcs[byte_idx++] = wcu;
448 /* Write paddings. */
449 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
450 pstr->wcs[byte_idx++] = WEOF;
452 else if (mbclen == (size_t) -1 || mbclen == 0)
454 /* It is an invalid character or '\0'. Just use the byte. */
455 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
457 if (BE (pstr->trans != NULL, 0))
458 ch = pstr->trans [ch];
459 pstr->mbs[byte_idx] = ch;
461 if (BE (pstr->offsets_needed != 0, 0))
462 pstr->offsets[byte_idx] = src_idx;
465 /* And also cast it to wide char. */
466 pstr->wcs[byte_idx++] = (wchar_t) ch;
467 if (BE (mbclen == (size_t) -1, 0))
468 pstr->cur_state = prev_st;
472 /* The buffer doesn't have enough space, finish to build. */
473 pstr->cur_state = prev_st;
477 pstr->valid_len = byte_idx;
478 pstr->valid_raw_len = src_idx;
482 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
487 re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
494 /* Skip the characters which are not necessary to check. */
495 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
496 rawbuf_idx < new_raw_idx;)
499 remain_len = pstr->len - rawbuf_idx;
500 prev_st = pstr->cur_state;
501 mbclen = __mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
502 remain_len, &pstr->cur_state);
503 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
505 /* We treat these cases as a single byte character. */
506 if (mbclen == 0 || remain_len == 0)
509 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
511 pstr->cur_state = prev_st;
513 /* Then proceed the next character. */
514 rawbuf_idx += mbclen;
516 *last_wc = (wint_t) wc;
519 #endif /* RE_ENABLE_I18N */
521 /* Build the buffer PSTR->MBS, and apply the translation if we need.
522 This function is used in case of REG_ICASE. */
526 build_upper_buffer (re_string_t *pstr)
528 int char_idx, end_idx;
529 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
531 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
533 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
534 if (BE (pstr->trans != NULL, 0))
535 ch = pstr->trans[ch];
537 pstr->mbs[char_idx] = toupper (ch);
539 pstr->mbs[char_idx] = ch;
541 pstr->valid_len = char_idx;
542 pstr->valid_raw_len = char_idx;
545 /* Apply TRANS to the buffer in PSTR. */
549 re_string_translate_buffer (re_string_t *pstr)
551 int buf_idx, end_idx;
552 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
554 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
556 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
557 pstr->mbs[buf_idx] = pstr->trans[ch];
560 pstr->valid_len = buf_idx;
561 pstr->valid_raw_len = buf_idx;
564 /* This function re-construct the buffers.
565 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
566 convert to upper case in case of REG_ICASE, apply translation. */
569 internal_function __attribute_warn_unused_result__
570 re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
572 int offset = idx - pstr->raw_mbs_idx;
573 if (BE (offset < 0, 0))
576 #ifdef RE_ENABLE_I18N
577 if (pstr->mb_cur_max > 1)
578 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
579 #endif /* RE_ENABLE_I18N */
580 pstr->len = pstr->raw_len;
581 pstr->stop = pstr->raw_stop;
583 pstr->raw_mbs_idx = 0;
584 pstr->valid_raw_len = 0;
585 pstr->offsets_needed = 0;
586 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
587 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
588 if (!pstr->mbs_allocated)
589 pstr->mbs = (unsigned char *) pstr->raw_mbs;
593 if (BE (offset != 0, 1))
595 /* Should the already checked characters be kept? */
596 if (BE (offset < pstr->valid_raw_len, 1))
598 /* Yes, move them to the front of the buffer. */
599 #ifdef RE_ENABLE_I18N
600 if (BE (pstr->offsets_needed, 0))
602 int low = 0, high = pstr->valid_len, mid;
605 mid = (high + low) / 2;
606 if (pstr->offsets[mid] > offset)
608 else if (pstr->offsets[mid] < offset)
614 if (pstr->offsets[mid] < offset)
616 pstr->tip_context = re_string_context_at (pstr, mid - 1,
618 /* This can be quite complicated, so handle specially
619 only the common and easy case where the character with
620 different length representation of lower and upper
621 case is present at or after offset. */
622 if (pstr->valid_len > offset
623 && mid == offset && pstr->offsets[mid] == offset)
625 memmove (pstr->wcs, pstr->wcs + offset,
626 (pstr->valid_len - offset) * sizeof (wint_t));
627 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
628 pstr->valid_len -= offset;
629 pstr->valid_raw_len -= offset;
630 for (low = 0; low < pstr->valid_len; low++)
631 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
635 /* Otherwise, just find out how long the partial multibyte
636 character at offset is and fill it with WEOF/255. */
637 pstr->len = pstr->raw_len - idx + offset;
638 pstr->stop = pstr->raw_stop - idx + offset;
639 pstr->offsets_needed = 0;
640 while (mid > 0 && pstr->offsets[mid - 1] == offset)
642 while (mid < pstr->valid_len)
643 if (pstr->wcs[mid] != WEOF)
647 if (mid == pstr->valid_len)
651 pstr->valid_len = pstr->offsets[mid] - offset;
654 for (low = 0; low < pstr->valid_len; ++low)
655 pstr->wcs[low] = WEOF;
656 memset (pstr->mbs, 255, pstr->valid_len);
659 pstr->valid_raw_len = pstr->valid_len;
665 pstr->tip_context = re_string_context_at (pstr, offset - 1,
667 #ifdef RE_ENABLE_I18N
668 if (pstr->mb_cur_max > 1)
669 memmove (pstr->wcs, pstr->wcs + offset,
670 (pstr->valid_len - offset) * sizeof (wint_t));
671 #endif /* RE_ENABLE_I18N */
672 if (BE (pstr->mbs_allocated, 0))
673 memmove (pstr->mbs, pstr->mbs + offset,
674 pstr->valid_len - offset);
675 pstr->valid_len -= offset;
676 pstr->valid_raw_len -= offset;
678 assert (pstr->valid_len > 0);
684 /* No, skip all characters until IDX. */
685 int prev_valid_len = pstr->valid_len;
687 #ifdef RE_ENABLE_I18N
688 if (BE (pstr->offsets_needed, 0))
690 pstr->len = pstr->raw_len - idx + offset;
691 pstr->stop = pstr->raw_stop - idx + offset;
692 pstr->offsets_needed = 0;
696 #ifdef RE_ENABLE_I18N
697 if (pstr->mb_cur_max > 1)
704 const unsigned char *raw, *p, *end;
706 /* Special case UTF-8. Multi-byte chars start with any
707 byte other than 0x80 - 0xbf. */
708 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
709 end = raw + (offset - pstr->mb_cur_max);
710 if (end < pstr->raw_mbs)
712 p = raw + offset - 1;
714 /* We know the wchar_t encoding is UCS4, so for the simple
715 case, ASCII characters, skip the conversion step. */
716 if (isascii (*p) && BE (pstr->trans == NULL, 1))
718 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
719 /* pstr->valid_len = 0; */
724 for (; p >= end; --p)
725 if ((*p & 0xc0) != 0x80)
729 int mlen = raw + pstr->len - p;
730 unsigned char buf[6];
733 if (BE (pstr->trans != NULL, 0))
735 int i = mlen < 6 ? mlen : 6;
737 buf[i] = pstr->trans[p[i]];
739 /* XXX Don't use mbrtowc, we know which conversion
740 to use (UTF-8 -> UCS4). */
741 memset (&cur_state, 0, sizeof (cur_state));
742 mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
744 if (raw + offset - p <= mbclen
745 && mbclen < (size_t) -2)
747 memset (&pstr->cur_state, '\0',
749 pstr->valid_len = mbclen - (raw + offset - p);
757 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
760 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
762 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
763 && IS_WIDE_WORD_CHAR (wc))
765 : ((IS_WIDE_NEWLINE (wc)
766 && pstr->newline_anchor)
767 ? CONTEXT_NEWLINE : 0));
768 if (BE (pstr->valid_len, 0))
770 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
771 pstr->wcs[wcs_idx] = WEOF;
772 if (pstr->mbs_allocated)
773 memset (pstr->mbs, 255, pstr->valid_len);
775 pstr->valid_raw_len = pstr->valid_len;
778 #endif /* RE_ENABLE_I18N */
780 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
781 pstr->valid_raw_len = 0;
784 pstr->tip_context = (bitset_contain (pstr->word_char, c)
786 : ((IS_NEWLINE (c) && pstr->newline_anchor)
787 ? CONTEXT_NEWLINE : 0));
790 if (!BE (pstr->mbs_allocated, 0))
793 pstr->raw_mbs_idx = idx;
795 pstr->stop -= offset;
797 /* Then build the buffers. */
798 #ifdef RE_ENABLE_I18N
799 if (pstr->mb_cur_max > 1)
803 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
804 if (BE (ret != REG_NOERROR, 0))
808 build_wcs_buffer (pstr);
811 #endif /* RE_ENABLE_I18N */
812 if (BE (pstr->mbs_allocated, 0))
815 build_upper_buffer (pstr);
816 else if (pstr->trans != NULL)
817 re_string_translate_buffer (pstr);
820 pstr->valid_len = pstr->len;
827 internal_function __attribute ((pure))
828 re_string_peek_byte_case (const re_string_t *pstr, int idx)
832 /* Handle the common (easiest) cases first. */
833 if (BE (!pstr->mbs_allocated, 1))
834 return re_string_peek_byte (pstr, idx);
836 #ifdef RE_ENABLE_I18N
837 if (pstr->mb_cur_max > 1
838 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
839 return re_string_peek_byte (pstr, idx);
842 off = pstr->cur_idx + idx;
843 #ifdef RE_ENABLE_I18N
844 if (pstr->offsets_needed)
845 off = pstr->offsets[off];
848 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
850 #ifdef RE_ENABLE_I18N
851 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
852 this function returns CAPITAL LETTER I instead of first byte of
853 DOTLESS SMALL LETTER I. The latter would confuse the parser,
854 since peek_byte_case doesn't advance cur_idx in any way. */
855 if (pstr->offsets_needed && !isascii (ch))
856 return re_string_peek_byte (pstr, idx);
863 internal_function __attribute ((pure))
864 re_string_fetch_byte_case (re_string_t *pstr)
866 if (BE (!pstr->mbs_allocated, 1))
867 return re_string_fetch_byte (pstr);
869 #ifdef RE_ENABLE_I18N
870 if (pstr->offsets_needed)
874 /* For tr_TR.UTF-8 [[:islower:]] there is
875 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
876 in that case the whole multi-byte character and return
877 the original letter. On the other side, with
878 [[: DOTLESS SMALL LETTER I return [[:I, as doing
879 anything else would complicate things too much. */
881 if (!re_string_first_byte (pstr, pstr->cur_idx))
882 return re_string_fetch_byte (pstr);
884 off = pstr->offsets[pstr->cur_idx];
885 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
888 return re_string_fetch_byte (pstr);
890 re_string_skip_bytes (pstr,
891 re_string_char_size_at (pstr, pstr->cur_idx));
896 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
901 re_string_destruct (re_string_t *pstr)
903 #ifdef RE_ENABLE_I18N
905 re_free (pstr->offsets);
906 #endif /* RE_ENABLE_I18N */
907 if (pstr->mbs_allocated)
911 /* Return the context at IDX in INPUT. */
915 re_string_context_at (const re_string_t *input, int idx, int eflags)
919 /* In this case, we use the value stored in input->tip_context,
920 since we can't know the character in input->mbs[-1] here. */
921 return input->tip_context;
922 if (BE (idx == input->len, 0))
923 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
924 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
925 #ifdef RE_ENABLE_I18N
926 if (input->mb_cur_max > 1)
930 while(input->wcs[wc_idx] == WEOF)
933 /* It must not happen. */
934 assert (wc_idx >= 0);
938 return input->tip_context;
940 wc = input->wcs[wc_idx];
941 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
943 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
944 ? CONTEXT_NEWLINE : 0);
949 c = re_string_byte_at (input, idx);
950 if (bitset_contain (input->word_char, c))
952 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
956 /* Functions for set operation. */
959 internal_function __attribute_warn_unused_result__
960 re_node_set_alloc (re_node_set *set, int size)
964 set->elems = re_malloc (int, size);
965 if (BE (set->elems == NULL, 0))
971 internal_function __attribute_warn_unused_result__
972 re_node_set_init_1 (re_node_set *set, int elem)
976 set->elems = re_malloc (int, 1);
977 if (BE (set->elems == NULL, 0))
979 set->alloc = set->nelem = 0;
982 set->elems[0] = elem;
987 internal_function __attribute_warn_unused_result__
988 re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
991 set->elems = re_malloc (int, 2);
992 if (BE (set->elems == NULL, 0))
997 set->elems[0] = elem1;
1004 set->elems[0] = elem1;
1005 set->elems[1] = elem2;
1009 set->elems[0] = elem2;
1010 set->elems[1] = elem1;
1016 static reg_errcode_t
1017 internal_function __attribute_warn_unused_result__
1018 re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1020 dest->nelem = src->nelem;
1023 dest->alloc = dest->nelem;
1024 dest->elems = re_malloc (int, dest->alloc);
1025 if (BE (dest->elems == NULL, 0))
1027 dest->alloc = dest->nelem = 0;
1030 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1033 re_node_set_init_empty (dest);
1037 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1038 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1039 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1041 static reg_errcode_t
1042 internal_function __attribute_warn_unused_result__
1043 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1044 const re_node_set *src2)
1046 int i1, i2, is, id, delta, sbase;
1047 if (src1->nelem == 0 || src2->nelem == 0)
1050 /* We need dest->nelem + 2 * elems_in_intersection; this is a
1051 conservative estimate. */
1052 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1054 int new_alloc = src1->nelem + src2->nelem + dest->alloc;
1055 int *new_elems = re_realloc (dest->elems, int, new_alloc);
1056 if (BE (new_elems == NULL, 0))
1058 dest->elems = new_elems;
1059 dest->alloc = new_alloc;
1062 /* Find the items in the intersection of SRC1 and SRC2, and copy
1063 into the top of DEST those that are not already in DEST itself. */
1064 sbase = dest->nelem + src1->nelem + src2->nelem;
1065 i1 = src1->nelem - 1;
1066 i2 = src2->nelem - 1;
1067 id = dest->nelem - 1;
1070 if (src1->elems[i1] == src2->elems[i2])
1072 /* Try to find the item in DEST. Maybe we could binary search? */
1073 while (id >= 0 && dest->elems[id] > src1->elems[i1])
1076 if (id < 0 || dest->elems[id] != src1->elems[i1])
1077 dest->elems[--sbase] = src1->elems[i1];
1079 if (--i1 < 0 || --i2 < 0)
1083 /* Lower the highest of the two items. */
1084 else if (src1->elems[i1] < src2->elems[i2])
1096 id = dest->nelem - 1;
1097 is = dest->nelem + src1->nelem + src2->nelem - 1;
1098 delta = is - sbase + 1;
1100 /* Now copy. When DELTA becomes zero, the remaining
1101 DEST elements are already in place; this is more or
1102 less the same loop that is in re_node_set_merge. */
1103 dest->nelem += delta;
1104 if (delta > 0 && id >= 0)
1107 if (dest->elems[is] > dest->elems[id])
1109 /* Copy from the top. */
1110 dest->elems[id + delta--] = dest->elems[is--];
1116 /* Slide from the bottom. */
1117 dest->elems[id + delta] = dest->elems[id];
1123 /* Copy remaining SRC elements. */
1124 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
1129 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1130 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1132 static reg_errcode_t
1133 internal_function __attribute_warn_unused_result__
1134 re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1135 const re_node_set *src2)
1138 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1140 dest->alloc = src1->nelem + src2->nelem;
1141 dest->elems = re_malloc (int, dest->alloc);
1142 if (BE (dest->elems == NULL, 0))
1147 if (src1 != NULL && src1->nelem > 0)
1148 return re_node_set_init_copy (dest, src1);
1149 else if (src2 != NULL && src2->nelem > 0)
1150 return re_node_set_init_copy (dest, src2);
1152 re_node_set_init_empty (dest);
1155 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1157 if (src1->elems[i1] > src2->elems[i2])
1159 dest->elems[id++] = src2->elems[i2++];
1162 if (src1->elems[i1] == src2->elems[i2])
1164 dest->elems[id++] = src1->elems[i1++];
1166 if (i1 < src1->nelem)
1168 memcpy (dest->elems + id, src1->elems + i1,
1169 (src1->nelem - i1) * sizeof (int));
1170 id += src1->nelem - i1;
1172 else if (i2 < src2->nelem)
1174 memcpy (dest->elems + id, src2->elems + i2,
1175 (src2->nelem - i2) * sizeof (int));
1176 id += src2->nelem - i2;
1182 /* Calculate the union set of the sets DEST and SRC. And store it to
1183 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1185 static reg_errcode_t
1186 internal_function __attribute_warn_unused_result__
1187 re_node_set_merge (re_node_set *dest, const re_node_set *src)
1189 int is, id, sbase, delta;
1190 if (src == NULL || src->nelem == 0)
1192 if (dest->alloc < 2 * src->nelem + dest->nelem)
1194 int new_alloc = 2 * (src->nelem + dest->alloc);
1195 int *new_buffer = re_realloc (dest->elems, int, new_alloc);
1196 if (BE (new_buffer == NULL, 0))
1198 dest->elems = new_buffer;
1199 dest->alloc = new_alloc;
1202 if (BE (dest->nelem == 0, 0))
1204 dest->nelem = src->nelem;
1205 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1209 /* Copy into the top of DEST the items of SRC that are not
1210 found in DEST. Maybe we could binary search in DEST? */
1211 for (sbase = dest->nelem + 2 * src->nelem,
1212 is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
1214 if (dest->elems[id] == src->elems[is])
1216 else if (dest->elems[id] < src->elems[is])
1217 dest->elems[--sbase] = src->elems[is--];
1218 else /* if (dest->elems[id] > src->elems[is]) */
1224 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1226 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
1229 id = dest->nelem - 1;
1230 is = dest->nelem + 2 * src->nelem - 1;
1231 delta = is - sbase + 1;
1235 /* Now copy. When DELTA becomes zero, the remaining
1236 DEST elements are already in place. */
1237 dest->nelem += delta;
1240 if (dest->elems[is] > dest->elems[id])
1242 /* Copy from the top. */
1243 dest->elems[id + delta--] = dest->elems[is--];
1249 /* Slide from the bottom. */
1250 dest->elems[id + delta] = dest->elems[id];
1253 /* Copy remaining SRC elements. */
1254 memcpy (dest->elems, dest->elems + sbase,
1255 delta * sizeof (int));
1264 /* Insert the new element ELEM to the re_node_set* SET.
1265 SET should not already have ELEM.
1266 return -1 if an error is occured, return 1 otherwise. */
1269 internal_function __attribute_warn_unused_result__
1270 re_node_set_insert (re_node_set *set, int elem)
1273 /* In case the set is empty. */
1274 if (set->alloc == 0)
1276 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
1282 if (BE (set->nelem, 0) == 0)
1284 /* We already guaranteed above that set->alloc != 0. */
1285 set->elems[0] = elem;
1290 /* Realloc if we need. */
1291 if (set->alloc == set->nelem)
1294 set->alloc = set->alloc * 2;
1295 new_elems = re_realloc (set->elems, int, set->alloc);
1296 if (BE (new_elems == NULL, 0))
1298 set->elems = new_elems;
1301 /* Move the elements which follows the new element. Test the
1302 first element separately to skip a check in the inner loop. */
1303 if (elem < set->elems[0])
1306 for (idx = set->nelem; idx > 0; idx--)
1307 set->elems[idx] = set->elems[idx - 1];
1311 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1312 set->elems[idx] = set->elems[idx - 1];
1315 /* Insert the new element. */
1316 set->elems[idx] = elem;
1321 /* Insert the new element ELEM to the re_node_set* SET.
1322 SET should not already have any element greater than or equal to ELEM.
1323 Return -1 if an error is occured, return 1 otherwise. */
1326 internal_function __attribute_warn_unused_result__
1327 re_node_set_insert_last (re_node_set *set, int elem)
1329 /* Realloc if we need. */
1330 if (set->alloc == set->nelem)
1333 set->alloc = (set->alloc + 1) * 2;
1334 new_elems = re_realloc (set->elems, int, set->alloc);
1335 if (BE (new_elems == NULL, 0))
1337 set->elems = new_elems;
1340 /* Insert the new element. */
1341 set->elems[set->nelem++] = elem;
1345 /* Compare two node sets SET1 and SET2.
1346 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
1349 internal_function __attribute ((pure))
1350 re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1353 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1355 for (i = set1->nelem ; --i >= 0 ; )
1356 if (set1->elems[i] != set2->elems[i])
1361 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1364 internal_function __attribute ((pure))
1365 re_node_set_contains (const re_node_set *set, int elem)
1367 unsigned int idx, right, mid;
1368 if (set->nelem <= 0)
1371 /* Binary search the element. */
1373 right = set->nelem - 1;
1376 mid = (idx + right) / 2;
1377 if (set->elems[mid] < elem)
1382 return set->elems[idx] == elem ? idx + 1 : 0;
1387 re_node_set_remove_at (re_node_set *set, int idx)
1389 if (idx < 0 || idx >= set->nelem)
1392 for (; idx < set->nelem; idx++)
1393 set->elems[idx] = set->elems[idx + 1];
1397 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1398 Or return -1, if an error will be occured. */
1402 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1404 int type = token.type;
1405 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1407 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1408 int *new_nexts, *new_indices;
1409 re_node_set *new_edests, *new_eclosures;
1410 re_token_t *new_nodes;
1412 /* Avoid overflows in realloc. */
1413 const size_t max_object_size = MAX (sizeof (re_token_t),
1414 MAX (sizeof (re_node_set),
1416 if (BE (SIZE_MAX / max_object_size < new_nodes_alloc, 0))
1419 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1420 if (BE (new_nodes == NULL, 0))
1422 dfa->nodes = new_nodes;
1423 new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
1424 new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
1425 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1426 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1427 if (BE (new_nexts == NULL || new_indices == NULL
1428 || new_edests == NULL || new_eclosures == NULL, 0))
1430 dfa->nexts = new_nexts;
1431 dfa->org_indices = new_indices;
1432 dfa->edests = new_edests;
1433 dfa->eclosures = new_eclosures;
1434 dfa->nodes_alloc = new_nodes_alloc;
1436 dfa->nodes[dfa->nodes_len] = token;
1437 dfa->nodes[dfa->nodes_len].constraint = 0;
1438 #ifdef RE_ENABLE_I18N
1439 dfa->nodes[dfa->nodes_len].accept_mb =
1440 (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1442 dfa->nexts[dfa->nodes_len] = -1;
1443 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1444 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1445 return dfa->nodes_len++;
1448 static inline unsigned int
1450 calc_state_hash (const re_node_set *nodes, unsigned int context)
1452 unsigned int hash = nodes->nelem + context;
1454 for (i = 0 ; i < nodes->nelem ; i++)
1455 hash += nodes->elems[i];
1459 /* Search for the state whose node_set is equivalent to NODES.
1460 Return the pointer to the state, if we found it in the DFA.
1461 Otherwise create the new one and return it. In case of an error
1462 return NULL and set the error code in ERR.
1463 Note: - We assume NULL as the invalid state, then it is possible that
1464 return value is NULL and ERR is REG_NOERROR.
1465 - We never return non-NULL value in case of any errors, it is for
1468 static re_dfastate_t *
1469 internal_function __attribute_warn_unused_result__
1470 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1471 const re_node_set *nodes)
1474 re_dfastate_t *new_state;
1475 struct re_state_table_entry *spot;
1477 if (BE (nodes->nelem == 0, 0))
1482 hash = calc_state_hash (nodes, 0);
1483 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1485 for (i = 0 ; i < spot->num ; i++)
1487 re_dfastate_t *state = spot->array[i];
1488 if (hash != state->hash)
1490 if (re_node_set_compare (&state->nodes, nodes))
1494 /* There are no appropriate state in the dfa, create the new one. */
1495 new_state = create_ci_newstate (dfa, nodes, hash);
1496 if (BE (new_state == NULL, 0))
1502 /* Search for the state whose node_set is equivalent to NODES and
1503 whose context is equivalent to CONTEXT.
1504 Return the pointer to the state, if we found it in the DFA.
1505 Otherwise create the new one and return it. In case of an error
1506 return NULL and set the error code in ERR.
1507 Note: - We assume NULL as the invalid state, then it is possible that
1508 return value is NULL and ERR is REG_NOERROR.
1509 - We never return non-NULL value in case of any errors, it is for
1512 static re_dfastate_t *
1513 internal_function __attribute_warn_unused_result__
1514 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1515 const re_node_set *nodes, unsigned int context)
1518 re_dfastate_t *new_state;
1519 struct re_state_table_entry *spot;
1521 if (nodes->nelem == 0)
1526 hash = calc_state_hash (nodes, context);
1527 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1529 for (i = 0 ; i < spot->num ; i++)
1531 re_dfastate_t *state = spot->array[i];
1532 if (state->hash == hash
1533 && state->context == context
1534 && re_node_set_compare (state->entrance_nodes, nodes))
1537 /* There are no appropriate state in `dfa', create the new one. */
1538 new_state = create_cd_newstate (dfa, nodes, context, hash);
1539 if (BE (new_state == NULL, 0))
1545 /* Finish initialization of the new state NEWSTATE, and using its hash value
1546 HASH put in the appropriate bucket of DFA's state table. Return value
1547 indicates the error code if failed. */
1549 static reg_errcode_t
1550 __attribute_warn_unused_result__
1551 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1554 struct re_state_table_entry *spot;
1558 newstate->hash = hash;
1559 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1560 if (BE (err != REG_NOERROR, 0))
1562 for (i = 0; i < newstate->nodes.nelem; i++)
1564 int elem = newstate->nodes.elems[i];
1565 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1566 if (re_node_set_insert_last (&newstate->non_eps_nodes, elem) < 0)
1570 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1571 if (BE (spot->alloc <= spot->num, 0))
1573 int new_alloc = 2 * spot->num + 2;
1574 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1576 if (BE (new_array == NULL, 0))
1578 spot->array = new_array;
1579 spot->alloc = new_alloc;
1581 spot->array[spot->num++] = newstate;
1586 free_state (re_dfastate_t *state)
1588 re_node_set_free (&state->non_eps_nodes);
1589 re_node_set_free (&state->inveclosure);
1590 if (state->entrance_nodes != &state->nodes)
1592 re_node_set_free (state->entrance_nodes);
1593 re_free (state->entrance_nodes);
1595 re_node_set_free (&state->nodes);
1596 re_free (state->word_trtable);
1597 re_free (state->trtable);
1601 /* Create the new state which is independ of contexts.
1602 Return the new state if succeeded, otherwise return NULL. */
1604 static re_dfastate_t *
1605 internal_function __attribute_warn_unused_result__
1606 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1611 re_dfastate_t *newstate;
1613 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1614 if (BE (newstate == NULL, 0))
1616 err = re_node_set_init_copy (&newstate->nodes, nodes);
1617 if (BE (err != REG_NOERROR, 0))
1623 newstate->entrance_nodes = &newstate->nodes;
1624 for (i = 0 ; i < nodes->nelem ; i++)
1626 re_token_t *node = dfa->nodes + nodes->elems[i];
1627 re_token_type_t type = node->type;
1628 if (type == CHARACTER && !node->constraint)
1630 #ifdef RE_ENABLE_I18N
1631 newstate->accept_mb |= node->accept_mb;
1632 #endif /* RE_ENABLE_I18N */
1634 /* If the state has the halt node, the state is a halt state. */
1635 if (type == END_OF_RE)
1637 else if (type == OP_BACK_REF)
1638 newstate->has_backref = 1;
1639 else if (type == ANCHOR || node->constraint)
1640 newstate->has_constraint = 1;
1642 err = register_state (dfa, newstate, hash);
1643 if (BE (err != REG_NOERROR, 0))
1645 free_state (newstate);
1651 /* Create the new state which is depend on the context CONTEXT.
1652 Return the new state if succeeded, otherwise return NULL. */
1654 static re_dfastate_t *
1655 internal_function __attribute_warn_unused_result__
1656 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1657 unsigned int context, unsigned int hash)
1659 int i, nctx_nodes = 0;
1661 re_dfastate_t *newstate;
1663 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1664 if (BE (newstate == NULL, 0))
1666 err = re_node_set_init_copy (&newstate->nodes, nodes);
1667 if (BE (err != REG_NOERROR, 0))
1673 newstate->context = context;
1674 newstate->entrance_nodes = &newstate->nodes;
1676 for (i = 0 ; i < nodes->nelem ; i++)
1678 re_token_t *node = dfa->nodes + nodes->elems[i];
1679 re_token_type_t type = node->type;
1680 unsigned int constraint = node->constraint;
1682 if (type == CHARACTER && !constraint)
1684 #ifdef RE_ENABLE_I18N
1685 newstate->accept_mb |= node->accept_mb;
1686 #endif /* RE_ENABLE_I18N */
1688 /* If the state has the halt node, the state is a halt state. */
1689 if (type == END_OF_RE)
1691 else if (type == OP_BACK_REF)
1692 newstate->has_backref = 1;
1696 if (newstate->entrance_nodes == &newstate->nodes)
1698 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1699 if (BE (newstate->entrance_nodes == NULL, 0))
1701 free_state (newstate);
1704 if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1708 newstate->has_constraint = 1;
1711 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1713 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1718 err = register_state (dfa, newstate, hash);
1719 if (BE (err != REG_NOERROR, 0))
1721 free_state (newstate);