1 /* -*- buffer-read-only: t -*- vi: set ro: */
2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
3 /* Extended regular expression matching and search library.
4 Copyright (C) 2002-2011 Free Software Foundation, Inc.
5 This file is part of the GNU C Library.
6 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation,
20 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
24 static void re_string_construct_common (const char *str, Idx len,
26 RE_TRANSLATE_TYPE trans, bool icase,
27 const re_dfa_t *dfa) internal_function;
28 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
29 const re_node_set *nodes,
30 re_hashval_t hash) internal_function;
31 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
32 const re_node_set *nodes,
34 re_hashval_t hash) internal_function;
36 /* Functions for string operation. */
38 /* This function allocate the buffers. It is necessary to call
39 re_string_reconstruct before using the object. */
42 internal_function __attribute_warn_unused_result__
43 re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
44 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
49 /* Ensure at least one character fits into the buffers. */
50 if (init_len < dfa->mb_cur_max)
51 init_len = dfa->mb_cur_max;
52 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
53 re_string_construct_common (str, len, pstr, trans, icase, dfa);
55 ret = re_string_realloc_buffers (pstr, init_buf_len);
56 if (BE (ret != REG_NOERROR, 0))
59 pstr->word_char = dfa->word_char;
60 pstr->word_ops_used = dfa->word_ops_used;
61 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
62 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
63 pstr->valid_raw_len = pstr->valid_len;
67 /* This function allocate the buffers, and initialize them. */
70 internal_function __attribute_warn_unused_result__
71 re_string_construct (re_string_t *pstr, const char *str, Idx len,
72 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
75 memset (pstr, '\0', sizeof (re_string_t));
76 re_string_construct_common (str, len, pstr, trans, icase, dfa);
80 ret = re_string_realloc_buffers (pstr, len + 1);
81 if (BE (ret != REG_NOERROR, 0))
84 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
89 if (dfa->mb_cur_max > 1)
93 ret = build_wcs_upper_buffer (pstr);
94 if (BE (ret != REG_NOERROR, 0))
96 if (pstr->valid_raw_len >= len)
98 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
100 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
101 if (BE (ret != REG_NOERROR, 0))
106 #endif /* RE_ENABLE_I18N */
107 build_upper_buffer (pstr);
111 #ifdef RE_ENABLE_I18N
112 if (dfa->mb_cur_max > 1)
113 build_wcs_buffer (pstr);
115 #endif /* RE_ENABLE_I18N */
118 re_string_translate_buffer (pstr);
121 pstr->valid_len = pstr->bufs_len;
122 pstr->valid_raw_len = pstr->bufs_len;
130 /* Helper functions for re_string_allocate, and re_string_construct. */
133 internal_function __attribute_warn_unused_result__
134 re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
136 #ifdef RE_ENABLE_I18N
137 if (pstr->mb_cur_max > 1)
141 /* Avoid overflow. */
142 size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
143 if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
146 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
147 if (BE (new_wcs == NULL, 0))
150 if (pstr->offsets != NULL)
152 Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
153 if (BE (new_offsets == NULL, 0))
155 pstr->offsets = new_offsets;
158 #endif /* RE_ENABLE_I18N */
159 if (pstr->mbs_allocated)
161 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
163 if (BE (new_mbs == NULL, 0))
167 pstr->bufs_len = new_buf_len;
174 re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
175 RE_TRANSLATE_TYPE trans, bool icase,
178 pstr->raw_mbs = (const unsigned char *) str;
183 pstr->mbs_allocated = (trans != NULL || icase);
184 pstr->mb_cur_max = dfa->mb_cur_max;
185 pstr->is_utf8 = dfa->is_utf8;
186 pstr->map_notascii = dfa->map_notascii;
187 pstr->stop = pstr->len;
188 pstr->raw_stop = pstr->stop;
191 #ifdef RE_ENABLE_I18N
193 /* Build wide character buffer PSTR->WCS.
194 If the byte sequence of the string are:
195 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
196 Then wide character buffer will be:
197 <wc1> , WEOF , <wc2> , WEOF , <wc3>
198 We use WEOF for padding, they indicate that the position isn't
199 a first byte of a multibyte character.
201 Note that this function assumes PSTR->VALID_LEN elements are already
202 built and starts from PSTR->VALID_LEN. */
206 build_wcs_buffer (re_string_t *pstr)
209 unsigned char buf[MB_LEN_MAX];
210 assert (MB_LEN_MAX >= pstr->mb_cur_max);
212 unsigned char buf[64];
215 Idx byte_idx, end_idx, remain_len;
218 /* Build the buffers from pstr->valid_len to either pstr->len or
220 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
221 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
226 remain_len = end_idx - byte_idx;
227 prev_st = pstr->cur_state;
228 /* Apply the translation if we need. */
229 if (BE (pstr->trans != NULL, 0))
233 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
235 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
236 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
238 p = (const char *) buf;
241 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
242 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
243 if (BE (mbclen == (size_t) -2, 0))
245 /* The buffer doesn't have enough space, finish to build. */
246 pstr->cur_state = prev_st;
249 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
251 /* We treat these cases as a singlebyte character. */
253 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
254 if (BE (pstr->trans != NULL, 0))
255 wc = pstr->trans[wc];
256 pstr->cur_state = prev_st;
259 /* Write wide character and padding. */
260 pstr->wcs[byte_idx++] = wc;
261 /* Write paddings. */
262 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
263 pstr->wcs[byte_idx++] = WEOF;
265 pstr->valid_len = byte_idx;
266 pstr->valid_raw_len = byte_idx;
269 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
270 but for REG_ICASE. */
273 internal_function __attribute_warn_unused_result__
274 build_wcs_upper_buffer (re_string_t *pstr)
277 Idx src_idx, byte_idx, end_idx, remain_len;
280 char buf[MB_LEN_MAX];
281 assert (MB_LEN_MAX >= pstr->mb_cur_max);
286 byte_idx = pstr->valid_len;
287 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
289 /* The following optimization assumes that ASCII characters can be
290 mapped to wide characters with a simple cast. */
291 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
293 while (byte_idx < end_idx)
297 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
298 && mbsinit (&pstr->cur_state))
300 /* In case of a singlebyte character. */
302 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
303 /* The next step uses the assumption that wchar_t is encoded
304 ASCII-safe: all ASCII values can be converted like this. */
305 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
310 remain_len = end_idx - byte_idx;
311 prev_st = pstr->cur_state;
312 mbclen = __mbrtowc (&wc,
313 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
314 + byte_idx), remain_len, &pstr->cur_state);
315 if (BE (mbclen < (size_t) -2, 1))
323 mbcdlen = wcrtomb (buf, wcu, &prev_st);
324 if (BE (mbclen == mbcdlen, 1))
325 memcpy (pstr->mbs + byte_idx, buf, mbclen);
333 memcpy (pstr->mbs + byte_idx,
334 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
335 pstr->wcs[byte_idx++] = wcu;
336 /* Write paddings. */
337 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
338 pstr->wcs[byte_idx++] = WEOF;
340 else if (mbclen == (size_t) -1 || mbclen == 0)
342 /* It is an invalid character or '\0'. Just use the byte. */
343 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
344 pstr->mbs[byte_idx] = ch;
345 /* And also cast it to wide char. */
346 pstr->wcs[byte_idx++] = (wchar_t) ch;
347 if (BE (mbclen == (size_t) -1, 0))
348 pstr->cur_state = prev_st;
352 /* The buffer doesn't have enough space, finish to build. */
353 pstr->cur_state = prev_st;
357 pstr->valid_len = byte_idx;
358 pstr->valid_raw_len = byte_idx;
362 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
367 remain_len = end_idx - byte_idx;
368 prev_st = pstr->cur_state;
369 if (BE (pstr->trans != NULL, 0))
373 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
375 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
376 buf[i] = pstr->trans[ch];
378 p = (const char *) buf;
381 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
382 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
383 if (BE (mbclen < (size_t) -2, 1))
391 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
392 if (BE (mbclen == mbcdlen, 1))
393 memcpy (pstr->mbs + byte_idx, buf, mbclen);
394 else if (mbcdlen != (size_t) -1)
398 if (byte_idx + mbcdlen > pstr->bufs_len)
400 pstr->cur_state = prev_st;
404 if (pstr->offsets == NULL)
406 pstr->offsets = re_malloc (Idx, pstr->bufs_len);
408 if (pstr->offsets == NULL)
411 if (!pstr->offsets_needed)
413 for (i = 0; i < (size_t) byte_idx; ++i)
414 pstr->offsets[i] = i;
415 pstr->offsets_needed = 1;
418 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
419 pstr->wcs[byte_idx] = wcu;
420 pstr->offsets[byte_idx] = src_idx;
421 for (i = 1; i < mbcdlen; ++i)
423 pstr->offsets[byte_idx + i]
424 = src_idx + (i < mbclen ? i : mbclen - 1);
425 pstr->wcs[byte_idx + i] = WEOF;
427 pstr->len += mbcdlen - mbclen;
428 if (pstr->raw_stop > src_idx)
429 pstr->stop += mbcdlen - mbclen;
430 end_idx = (pstr->bufs_len > pstr->len)
431 ? pstr->len : pstr->bufs_len;
437 memcpy (pstr->mbs + byte_idx, p, mbclen);
440 memcpy (pstr->mbs + byte_idx, p, mbclen);
442 if (BE (pstr->offsets_needed != 0, 0))
445 for (i = 0; i < mbclen; ++i)
446 pstr->offsets[byte_idx + i] = src_idx + i;
450 pstr->wcs[byte_idx++] = wcu;
451 /* Write paddings. */
452 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
453 pstr->wcs[byte_idx++] = WEOF;
455 else if (mbclen == (size_t) -1 || mbclen == 0)
457 /* It is an invalid character or '\0'. Just use the byte. */
458 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
460 if (BE (pstr->trans != NULL, 0))
461 ch = pstr->trans [ch];
462 pstr->mbs[byte_idx] = ch;
464 if (BE (pstr->offsets_needed != 0, 0))
465 pstr->offsets[byte_idx] = src_idx;
468 /* And also cast it to wide char. */
469 pstr->wcs[byte_idx++] = (wchar_t) ch;
470 if (BE (mbclen == (size_t) -1, 0))
471 pstr->cur_state = prev_st;
475 /* The buffer doesn't have enough space, finish to build. */
476 pstr->cur_state = prev_st;
480 pstr->valid_len = byte_idx;
481 pstr->valid_raw_len = src_idx;
485 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
490 re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
497 /* Skip the characters which are not necessary to check. */
498 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
499 rawbuf_idx < new_raw_idx;)
503 remain_len = pstr->len - rawbuf_idx;
504 prev_st = pstr->cur_state;
505 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
506 remain_len, &pstr->cur_state);
507 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
509 /* We treat these cases as a single byte character. */
510 if (mbclen == 0 || remain_len == 0)
513 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
515 pstr->cur_state = prev_st;
519 /* Then proceed the next character. */
520 rawbuf_idx += mbclen;
525 #endif /* RE_ENABLE_I18N */
527 /* Build the buffer PSTR->MBS, and apply the translation if we need.
528 This function is used in case of REG_ICASE. */
532 build_upper_buffer (re_string_t *pstr)
534 Idx char_idx, end_idx;
535 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
537 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
539 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
540 if (BE (pstr->trans != NULL, 0))
541 ch = pstr->trans[ch];
543 pstr->mbs[char_idx] = toupper (ch);
545 pstr->mbs[char_idx] = ch;
547 pstr->valid_len = char_idx;
548 pstr->valid_raw_len = char_idx;
551 /* Apply TRANS to the buffer in PSTR. */
555 re_string_translate_buffer (re_string_t *pstr)
557 Idx buf_idx, end_idx;
558 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
560 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
562 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
563 pstr->mbs[buf_idx] = pstr->trans[ch];
566 pstr->valid_len = buf_idx;
567 pstr->valid_raw_len = buf_idx;
570 /* This function re-construct the buffers.
571 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
572 convert to upper case in case of REG_ICASE, apply translation. */
575 internal_function __attribute_warn_unused_result__
576 re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
580 if (BE (pstr->raw_mbs_idx <= idx, 0))
581 offset = idx - pstr->raw_mbs_idx;
585 #ifdef RE_ENABLE_I18N
586 if (pstr->mb_cur_max > 1)
587 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
588 #endif /* RE_ENABLE_I18N */
589 pstr->len = pstr->raw_len;
590 pstr->stop = pstr->raw_stop;
592 pstr->raw_mbs_idx = 0;
593 pstr->valid_raw_len = 0;
594 pstr->offsets_needed = 0;
595 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
596 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
597 if (!pstr->mbs_allocated)
598 pstr->mbs = (unsigned char *) pstr->raw_mbs;
602 if (BE (offset != 0, 1))
604 /* Should the already checked characters be kept? */
605 if (BE (offset < pstr->valid_raw_len, 1))
607 /* Yes, move them to the front of the buffer. */
608 #ifdef RE_ENABLE_I18N
609 if (BE (pstr->offsets_needed, 0))
611 Idx low = 0, high = pstr->valid_len, mid;
614 mid = (high + low) / 2;
615 if (pstr->offsets[mid] > offset)
617 else if (pstr->offsets[mid] < offset)
623 if (pstr->offsets[mid] < offset)
625 pstr->tip_context = re_string_context_at (pstr, mid - 1,
627 /* This can be quite complicated, so handle specially
628 only the common and easy case where the character with
629 different length representation of lower and upper
630 case is present at or after offset. */
631 if (pstr->valid_len > offset
632 && mid == offset && pstr->offsets[mid] == offset)
634 memmove (pstr->wcs, pstr->wcs + offset,
635 (pstr->valid_len - offset) * sizeof (wint_t));
636 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
637 pstr->valid_len -= offset;
638 pstr->valid_raw_len -= offset;
639 for (low = 0; low < pstr->valid_len; low++)
640 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
644 /* Otherwise, just find out how long the partial multibyte
645 character at offset is and fill it with WEOF/255. */
646 pstr->len = pstr->raw_len - idx + offset;
647 pstr->stop = pstr->raw_stop - idx + offset;
648 pstr->offsets_needed = 0;
649 while (mid > 0 && pstr->offsets[mid - 1] == offset)
651 while (mid < pstr->valid_len)
652 if (pstr->wcs[mid] != WEOF)
656 if (mid == pstr->valid_len)
660 pstr->valid_len = pstr->offsets[mid] - offset;
663 for (low = 0; low < pstr->valid_len; ++low)
664 pstr->wcs[low] = WEOF;
665 memset (pstr->mbs, 255, pstr->valid_len);
668 pstr->valid_raw_len = pstr->valid_len;
674 pstr->tip_context = re_string_context_at (pstr, offset - 1,
676 #ifdef RE_ENABLE_I18N
677 if (pstr->mb_cur_max > 1)
678 memmove (pstr->wcs, pstr->wcs + offset,
679 (pstr->valid_len - offset) * sizeof (wint_t));
680 #endif /* RE_ENABLE_I18N */
681 if (BE (pstr->mbs_allocated, 0))
682 memmove (pstr->mbs, pstr->mbs + offset,
683 pstr->valid_len - offset);
684 pstr->valid_len -= offset;
685 pstr->valid_raw_len -= offset;
687 assert (pstr->valid_len > 0);
693 #ifdef RE_ENABLE_I18N
694 /* No, skip all characters until IDX. */
695 Idx prev_valid_len = pstr->valid_len;
697 if (BE (pstr->offsets_needed, 0))
699 pstr->len = pstr->raw_len - idx + offset;
700 pstr->stop = pstr->raw_stop - idx + offset;
701 pstr->offsets_needed = 0;
705 #ifdef RE_ENABLE_I18N
706 if (pstr->mb_cur_max > 1)
713 const unsigned char *raw, *p, *end;
715 /* Special case UTF-8. Multi-byte chars start with any
716 byte other than 0x80 - 0xbf. */
717 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
718 end = raw + (offset - pstr->mb_cur_max);
719 if (end < pstr->raw_mbs)
721 p = raw + offset - 1;
723 /* We know the wchar_t encoding is UCS4, so for the simple
724 case, ASCII characters, skip the conversion step. */
725 if (isascii (*p) && BE (pstr->trans == NULL, 1))
727 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
728 /* pstr->valid_len = 0; */
733 for (; p >= end; --p)
734 if ((*p & 0xc0) != 0x80)
738 Idx mlen = raw + pstr->len - p;
741 #if 0 /* dead code: buf is set but never used */
742 unsigned char buf[6];
743 if (BE (pstr->trans != NULL, 0))
745 int i = mlen < 6 ? mlen : 6;
747 buf[i] = pstr->trans[p[i]];
750 /* XXX Don't use mbrtowc, we know which conversion
751 to use (UTF-8 -> UCS4). */
752 memset (&cur_state, 0, sizeof (cur_state));
753 mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
755 if (raw + offset - p <= mbclen
756 && mbclen < (size_t) -2)
758 memset (&pstr->cur_state, '\0',
760 pstr->valid_len = mbclen - (raw + offset - p);
768 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
771 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
773 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
774 && IS_WIDE_WORD_CHAR (wc))
776 : ((IS_WIDE_NEWLINE (wc)
777 && pstr->newline_anchor)
778 ? CONTEXT_NEWLINE : 0));
779 if (BE (pstr->valid_len, 0))
781 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
782 pstr->wcs[wcs_idx] = WEOF;
783 if (pstr->mbs_allocated)
784 memset (pstr->mbs, 255, pstr->valid_len);
786 pstr->valid_raw_len = pstr->valid_len;
789 #endif /* RE_ENABLE_I18N */
791 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
792 pstr->valid_raw_len = 0;
795 pstr->tip_context = (bitset_contain (pstr->word_char, c)
797 : ((IS_NEWLINE (c) && pstr->newline_anchor)
798 ? CONTEXT_NEWLINE : 0));
801 if (!BE (pstr->mbs_allocated, 0))
804 pstr->raw_mbs_idx = idx;
806 pstr->stop -= offset;
808 /* Then build the buffers. */
809 #ifdef RE_ENABLE_I18N
810 if (pstr->mb_cur_max > 1)
814 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
815 if (BE (ret != REG_NOERROR, 0))
819 build_wcs_buffer (pstr);
822 #endif /* RE_ENABLE_I18N */
823 if (BE (pstr->mbs_allocated, 0))
826 build_upper_buffer (pstr);
827 else if (pstr->trans != NULL)
828 re_string_translate_buffer (pstr);
831 pstr->valid_len = pstr->len;
838 internal_function __attribute ((pure))
839 re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
844 /* Handle the common (easiest) cases first. */
845 if (BE (!pstr->mbs_allocated, 1))
846 return re_string_peek_byte (pstr, idx);
848 #ifdef RE_ENABLE_I18N
849 if (pstr->mb_cur_max > 1
850 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
851 return re_string_peek_byte (pstr, idx);
854 off = pstr->cur_idx + idx;
855 #ifdef RE_ENABLE_I18N
856 if (pstr->offsets_needed)
857 off = pstr->offsets[off];
860 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
862 #ifdef RE_ENABLE_I18N
863 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
864 this function returns CAPITAL LETTER I instead of first byte of
865 DOTLESS SMALL LETTER I. The latter would confuse the parser,
866 since peek_byte_case doesn't advance cur_idx in any way. */
867 if (pstr->offsets_needed && !isascii (ch))
868 return re_string_peek_byte (pstr, idx);
875 internal_function __attribute ((pure))
876 re_string_fetch_byte_case (re_string_t *pstr)
878 if (BE (!pstr->mbs_allocated, 1))
879 return re_string_fetch_byte (pstr);
881 #ifdef RE_ENABLE_I18N
882 if (pstr->offsets_needed)
887 /* For tr_TR.UTF-8 [[:islower:]] there is
888 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
889 in that case the whole multi-byte character and return
890 the original letter. On the other side, with
891 [[: DOTLESS SMALL LETTER I return [[:I, as doing
892 anything else would complicate things too much. */
894 if (!re_string_first_byte (pstr, pstr->cur_idx))
895 return re_string_fetch_byte (pstr);
897 off = pstr->offsets[pstr->cur_idx];
898 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
901 return re_string_fetch_byte (pstr);
903 re_string_skip_bytes (pstr,
904 re_string_char_size_at (pstr, pstr->cur_idx));
909 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
914 re_string_destruct (re_string_t *pstr)
916 #ifdef RE_ENABLE_I18N
918 re_free (pstr->offsets);
919 #endif /* RE_ENABLE_I18N */
920 if (pstr->mbs_allocated)
924 /* Return the context at IDX in INPUT. */
928 re_string_context_at (const re_string_t *input, Idx idx, int eflags)
931 if (BE (! REG_VALID_INDEX (idx), 0))
932 /* In this case, we use the value stored in input->tip_context,
933 since we can't know the character in input->mbs[-1] here. */
934 return input->tip_context;
935 if (BE (idx == input->len, 0))
936 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
937 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
938 #ifdef RE_ENABLE_I18N
939 if (input->mb_cur_max > 1)
943 while(input->wcs[wc_idx] == WEOF)
946 /* It must not happen. */
947 assert (REG_VALID_INDEX (wc_idx));
950 if (! REG_VALID_INDEX (wc_idx))
951 return input->tip_context;
953 wc = input->wcs[wc_idx];
954 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
956 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
957 ? CONTEXT_NEWLINE : 0);
962 c = re_string_byte_at (input, idx);
963 if (bitset_contain (input->word_char, c))
965 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
969 /* Functions for set operation. */
972 internal_function __attribute_warn_unused_result__
973 re_node_set_alloc (re_node_set *set, Idx size)
977 set->elems = re_malloc (Idx, size);
978 if (BE (set->elems == NULL, 0))
984 internal_function __attribute_warn_unused_result__
985 re_node_set_init_1 (re_node_set *set, Idx elem)
989 set->elems = re_malloc (Idx, 1);
990 if (BE (set->elems == NULL, 0))
992 set->alloc = set->nelem = 0;
995 set->elems[0] = elem;
1000 internal_function __attribute_warn_unused_result__
1001 re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
1004 set->elems = re_malloc (Idx, 2);
1005 if (BE (set->elems == NULL, 0))
1010 set->elems[0] = elem1;
1017 set->elems[0] = elem1;
1018 set->elems[1] = elem2;
1022 set->elems[0] = elem2;
1023 set->elems[1] = elem1;
1029 static reg_errcode_t
1030 internal_function __attribute_warn_unused_result__
1031 re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1033 dest->nelem = src->nelem;
1036 dest->alloc = dest->nelem;
1037 dest->elems = re_malloc (Idx, dest->alloc);
1038 if (BE (dest->elems == NULL, 0))
1040 dest->alloc = dest->nelem = 0;
1043 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1046 re_node_set_init_empty (dest);
1050 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1051 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1052 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1054 static reg_errcode_t
1055 internal_function __attribute_warn_unused_result__
1056 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1057 const re_node_set *src2)
1059 Idx i1, i2, is, id, delta, sbase;
1060 if (src1->nelem == 0 || src2->nelem == 0)
1063 /* We need dest->nelem + 2 * elems_in_intersection; this is a
1064 conservative estimate. */
1065 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1067 Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
1068 Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
1069 if (BE (new_elems == NULL, 0))
1071 dest->elems = new_elems;
1072 dest->alloc = new_alloc;
1075 /* Find the items in the intersection of SRC1 and SRC2, and copy
1076 into the top of DEST those that are not already in DEST itself. */
1077 sbase = dest->nelem + src1->nelem + src2->nelem;
1078 i1 = src1->nelem - 1;
1079 i2 = src2->nelem - 1;
1080 id = dest->nelem - 1;
1083 if (src1->elems[i1] == src2->elems[i2])
1085 /* Try to find the item in DEST. Maybe we could binary search? */
1086 while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
1089 if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
1090 dest->elems[--sbase] = src1->elems[i1];
1092 if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
1096 /* Lower the highest of the two items. */
1097 else if (src1->elems[i1] < src2->elems[i2])
1099 if (! REG_VALID_INDEX (--i2))
1104 if (! REG_VALID_INDEX (--i1))
1109 id = dest->nelem - 1;
1110 is = dest->nelem + src1->nelem + src2->nelem - 1;
1111 delta = is - sbase + 1;
1113 /* Now copy. When DELTA becomes zero, the remaining
1114 DEST elements are already in place; this is more or
1115 less the same loop that is in re_node_set_merge. */
1116 dest->nelem += delta;
1117 if (delta > 0 && REG_VALID_INDEX (id))
1120 if (dest->elems[is] > dest->elems[id])
1122 /* Copy from the top. */
1123 dest->elems[id + delta--] = dest->elems[is--];
1129 /* Slide from the bottom. */
1130 dest->elems[id + delta] = dest->elems[id];
1131 if (! REG_VALID_INDEX (--id))
1136 /* Copy remaining SRC elements. */
1137 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
1142 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1143 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1145 static reg_errcode_t
1146 internal_function __attribute_warn_unused_result__
1147 re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1148 const re_node_set *src2)
1151 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1153 dest->alloc = src1->nelem + src2->nelem;
1154 dest->elems = re_malloc (Idx, dest->alloc);
1155 if (BE (dest->elems == NULL, 0))
1160 if (src1 != NULL && src1->nelem > 0)
1161 return re_node_set_init_copy (dest, src1);
1162 else if (src2 != NULL && src2->nelem > 0)
1163 return re_node_set_init_copy (dest, src2);
1165 re_node_set_init_empty (dest);
1168 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1170 if (src1->elems[i1] > src2->elems[i2])
1172 dest->elems[id++] = src2->elems[i2++];
1175 if (src1->elems[i1] == src2->elems[i2])
1177 dest->elems[id++] = src1->elems[i1++];
1179 if (i1 < src1->nelem)
1181 memcpy (dest->elems + id, src1->elems + i1,
1182 (src1->nelem - i1) * sizeof (Idx));
1183 id += src1->nelem - i1;
1185 else if (i2 < src2->nelem)
1187 memcpy (dest->elems + id, src2->elems + i2,
1188 (src2->nelem - i2) * sizeof (Idx));
1189 id += src2->nelem - i2;
1195 /* Calculate the union set of the sets DEST and SRC. And store it to
1196 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1198 static reg_errcode_t
1199 internal_function __attribute_warn_unused_result__
1200 re_node_set_merge (re_node_set *dest, const re_node_set *src)
1202 Idx is, id, sbase, delta;
1203 if (src == NULL || src->nelem == 0)
1205 if (dest->alloc < 2 * src->nelem + dest->nelem)
1207 Idx new_alloc = 2 * (src->nelem + dest->alloc);
1208 Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
1209 if (BE (new_buffer == NULL, 0))
1211 dest->elems = new_buffer;
1212 dest->alloc = new_alloc;
1215 if (BE (dest->nelem == 0, 0))
1217 dest->nelem = src->nelem;
1218 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1222 /* Copy into the top of DEST the items of SRC that are not
1223 found in DEST. Maybe we could binary search in DEST? */
1224 for (sbase = dest->nelem + 2 * src->nelem,
1225 is = src->nelem - 1, id = dest->nelem - 1;
1226 REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
1228 if (dest->elems[id] == src->elems[is])
1230 else if (dest->elems[id] < src->elems[is])
1231 dest->elems[--sbase] = src->elems[is--];
1232 else /* if (dest->elems[id] > src->elems[is]) */
1236 if (REG_VALID_INDEX (is))
1238 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1240 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
1243 id = dest->nelem - 1;
1244 is = dest->nelem + 2 * src->nelem - 1;
1245 delta = is - sbase + 1;
1249 /* Now copy. When DELTA becomes zero, the remaining
1250 DEST elements are already in place. */
1251 dest->nelem += delta;
1254 if (dest->elems[is] > dest->elems[id])
1256 /* Copy from the top. */
1257 dest->elems[id + delta--] = dest->elems[is--];
1263 /* Slide from the bottom. */
1264 dest->elems[id + delta] = dest->elems[id];
1265 if (! REG_VALID_INDEX (--id))
1267 /* Copy remaining SRC elements. */
1268 memcpy (dest->elems, dest->elems + sbase,
1269 delta * sizeof (Idx));
1278 /* Insert the new element ELEM to the re_node_set* SET.
1279 SET should not already have ELEM.
1280 Return true if successful. */
1283 internal_function __attribute_warn_unused_result__
1284 re_node_set_insert (re_node_set *set, Idx elem)
1287 /* In case the set is empty. */
1288 if (set->alloc == 0)
1289 return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
1291 if (BE (set->nelem, 0) == 0)
1293 /* We already guaranteed above that set->alloc != 0. */
1294 set->elems[0] = elem;
1299 /* Realloc if we need. */
1300 if (set->alloc == set->nelem)
1303 set->alloc = set->alloc * 2;
1304 new_elems = re_realloc (set->elems, Idx, set->alloc);
1305 if (BE (new_elems == NULL, 0))
1307 set->elems = new_elems;
1310 /* Move the elements which follows the new element. Test the
1311 first element separately to skip a check in the inner loop. */
1312 if (elem < set->elems[0])
1315 for (idx = set->nelem; idx > 0; idx--)
1316 set->elems[idx] = set->elems[idx - 1];
1320 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1321 set->elems[idx] = set->elems[idx - 1];
1324 /* Insert the new element. */
1325 set->elems[idx] = elem;
1330 /* Insert the new element ELEM to the re_node_set* SET.
1331 SET should not already have any element greater than or equal to ELEM.
1332 Return true if successful. */
1335 internal_function __attribute_warn_unused_result__
1336 re_node_set_insert_last (re_node_set *set, Idx elem)
1338 /* Realloc if we need. */
1339 if (set->alloc == set->nelem)
1342 set->alloc = (set->alloc + 1) * 2;
1343 new_elems = re_realloc (set->elems, Idx, set->alloc);
1344 if (BE (new_elems == NULL, 0))
1346 set->elems = new_elems;
1349 /* Insert the new element. */
1350 set->elems[set->nelem++] = elem;
1354 /* Compare two node sets SET1 and SET2.
1355 Return true if SET1 and SET2 are equivalent. */
1358 internal_function __attribute ((pure))
1359 re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1362 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1364 for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
1365 if (set1->elems[i] != set2->elems[i])
1370 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1373 internal_function __attribute ((pure))
1374 re_node_set_contains (const re_node_set *set, Idx elem)
1376 __re_size_t idx, right, mid;
1377 if (! REG_VALID_NONZERO_INDEX (set->nelem))
1380 /* Binary search the element. */
1382 right = set->nelem - 1;
1385 mid = (idx + right) / 2;
1386 if (set->elems[mid] < elem)
1391 return set->elems[idx] == elem ? idx + 1 : 0;
1396 re_node_set_remove_at (re_node_set *set, Idx idx)
1398 verify (! TYPE_SIGNED (Idx));
1401 if (idx >= set->nelem)
1404 for (; idx < set->nelem; idx++)
1405 set->elems[idx] = set->elems[idx + 1];
1409 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1410 Or return REG_MISSING if an error occurred. */
1414 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1416 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1418 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1419 Idx *new_nexts, *new_indices;
1420 re_node_set *new_edests, *new_eclosures;
1421 re_token_t *new_nodes;
1422 size_t max_object_size =
1423 MAX (sizeof (re_token_t),
1424 MAX (sizeof (re_node_set),
1427 /* Avoid overflows. */
1428 if (BE (SIZE_MAX / 2 / max_object_size < dfa->nodes_alloc, 0))
1431 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1432 if (BE (new_nodes == NULL, 0))
1434 dfa->nodes = new_nodes;
1435 new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1436 new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1437 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1438 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1439 if (BE (new_nexts == NULL || new_indices == NULL
1440 || new_edests == NULL || new_eclosures == NULL, 0))
1442 dfa->nexts = new_nexts;
1443 dfa->org_indices = new_indices;
1444 dfa->edests = new_edests;
1445 dfa->eclosures = new_eclosures;
1446 dfa->nodes_alloc = new_nodes_alloc;
1448 dfa->nodes[dfa->nodes_len] = token;
1449 dfa->nodes[dfa->nodes_len].constraint = 0;
1450 #ifdef RE_ENABLE_I18N
1452 int type = token.type;
1453 dfa->nodes[dfa->nodes_len].accept_mb =
1454 (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1457 dfa->nexts[dfa->nodes_len] = REG_MISSING;
1458 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1459 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1460 return dfa->nodes_len++;
1463 static inline re_hashval_t
1465 calc_state_hash (const re_node_set *nodes, unsigned int context)
1467 re_hashval_t hash = nodes->nelem + context;
1469 for (i = 0 ; i < nodes->nelem ; i++)
1470 hash += nodes->elems[i];
1474 /* Search for the state whose node_set is equivalent to NODES.
1475 Return the pointer to the state, if we found it in the DFA.
1476 Otherwise create the new one and return it. In case of an error
1477 return NULL and set the error code in ERR.
1478 Note: - We assume NULL as the invalid state, then it is possible that
1479 return value is NULL and ERR is REG_NOERROR.
1480 - We never return non-NULL value in case of any errors, it is for
1483 static re_dfastate_t *
1484 internal_function __attribute_warn_unused_result__
1485 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1486 const re_node_set *nodes)
1489 re_dfastate_t *new_state;
1490 struct re_state_table_entry *spot;
1493 /* Suppress bogus uninitialized-variable warnings. */
1496 if (BE (nodes->nelem == 0, 0))
1501 hash = calc_state_hash (nodes, 0);
1502 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1504 for (i = 0 ; i < spot->num ; i++)
1506 re_dfastate_t *state = spot->array[i];
1507 if (hash != state->hash)
1509 if (re_node_set_compare (&state->nodes, nodes))
1513 /* There are no appropriate state in the dfa, create the new one. */
1514 new_state = create_ci_newstate (dfa, nodes, hash);
1515 if (BE (new_state == NULL, 0))
1521 /* Search for the state whose node_set is equivalent to NODES and
1522 whose context is equivalent to CONTEXT.
1523 Return the pointer to the state, if we found it in the DFA.
1524 Otherwise create the new one and return it. In case of an error
1525 return NULL and set the error code in ERR.
1526 Note: - We assume NULL as the invalid state, then it is possible that
1527 return value is NULL and ERR is REG_NOERROR.
1528 - We never return non-NULL value in case of any errors, it is for
1531 static re_dfastate_t *
1532 internal_function __attribute_warn_unused_result__
1533 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1534 const re_node_set *nodes, unsigned int context)
1537 re_dfastate_t *new_state;
1538 struct re_state_table_entry *spot;
1541 /* Suppress bogus uninitialized-variable warnings. */
1544 if (nodes->nelem == 0)
1549 hash = calc_state_hash (nodes, context);
1550 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1552 for (i = 0 ; i < spot->num ; i++)
1554 re_dfastate_t *state = spot->array[i];
1555 if (state->hash == hash
1556 && state->context == context
1557 && re_node_set_compare (state->entrance_nodes, nodes))
1560 /* There are no appropriate state in `dfa', create the new one. */
1561 new_state = create_cd_newstate (dfa, nodes, context, hash);
1562 if (BE (new_state == NULL, 0))
1568 /* Finish initialization of the new state NEWSTATE, and using its hash value
1569 HASH put in the appropriate bucket of DFA's state table. Return value
1570 indicates the error code if failed. */
1572 static reg_errcode_t
1573 __attribute_warn_unused_result__
1574 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1577 struct re_state_table_entry *spot;
1581 newstate->hash = hash;
1582 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1583 if (BE (err != REG_NOERROR, 0))
1585 for (i = 0; i < newstate->nodes.nelem; i++)
1587 Idx elem = newstate->nodes.elems[i];
1588 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1589 if (BE (! re_node_set_insert_last (&newstate->non_eps_nodes, elem), 0))
1593 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1594 if (BE (spot->alloc <= spot->num, 0))
1596 Idx new_alloc = 2 * spot->num + 2;
1597 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1599 if (BE (new_array == NULL, 0))
1601 spot->array = new_array;
1602 spot->alloc = new_alloc;
1604 spot->array[spot->num++] = newstate;
1609 free_state (re_dfastate_t *state)
1611 re_node_set_free (&state->non_eps_nodes);
1612 re_node_set_free (&state->inveclosure);
1613 if (state->entrance_nodes != &state->nodes)
1615 re_node_set_free (state->entrance_nodes);
1616 re_free (state->entrance_nodes);
1618 re_node_set_free (&state->nodes);
1619 re_free (state->word_trtable);
1620 re_free (state->trtable);
1624 /* Create the new state which is independ of contexts.
1625 Return the new state if succeeded, otherwise return NULL. */
1627 static re_dfastate_t *
1628 internal_function __attribute_warn_unused_result__
1629 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1634 re_dfastate_t *newstate;
1636 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1637 if (BE (newstate == NULL, 0))
1639 err = re_node_set_init_copy (&newstate->nodes, nodes);
1640 if (BE (err != REG_NOERROR, 0))
1646 newstate->entrance_nodes = &newstate->nodes;
1647 for (i = 0 ; i < nodes->nelem ; i++)
1649 re_token_t *node = dfa->nodes + nodes->elems[i];
1650 re_token_type_t type = node->type;
1651 if (type == CHARACTER && !node->constraint)
1653 #ifdef RE_ENABLE_I18N
1654 newstate->accept_mb |= node->accept_mb;
1655 #endif /* RE_ENABLE_I18N */
1657 /* If the state has the halt node, the state is a halt state. */
1658 if (type == END_OF_RE)
1660 else if (type == OP_BACK_REF)
1661 newstate->has_backref = 1;
1662 else if (type == ANCHOR || node->constraint)
1663 newstate->has_constraint = 1;
1665 err = register_state (dfa, newstate, hash);
1666 if (BE (err != REG_NOERROR, 0))
1668 free_state (newstate);
1674 /* Create the new state which is depend on the context CONTEXT.
1675 Return the new state if succeeded, otherwise return NULL. */
1677 static re_dfastate_t *
1678 internal_function __attribute_warn_unused_result__
1679 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1680 unsigned int context, re_hashval_t hash)
1682 Idx i, nctx_nodes = 0;
1684 re_dfastate_t *newstate;
1686 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1687 if (BE (newstate == NULL, 0))
1689 err = re_node_set_init_copy (&newstate->nodes, nodes);
1690 if (BE (err != REG_NOERROR, 0))
1696 newstate->context = context;
1697 newstate->entrance_nodes = &newstate->nodes;
1699 for (i = 0 ; i < nodes->nelem ; i++)
1701 re_token_t *node = dfa->nodes + nodes->elems[i];
1702 re_token_type_t type = node->type;
1703 unsigned int constraint = node->constraint;
1705 if (type == CHARACTER && !constraint)
1707 #ifdef RE_ENABLE_I18N
1708 newstate->accept_mb |= node->accept_mb;
1709 #endif /* RE_ENABLE_I18N */
1711 /* If the state has the halt node, the state is a halt state. */
1712 if (type == END_OF_RE)
1714 else if (type == OP_BACK_REF)
1715 newstate->has_backref = 1;
1719 if (newstate->entrance_nodes == &newstate->nodes)
1721 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1722 if (BE (newstate->entrance_nodes == NULL, 0))
1724 free_state (newstate);
1727 if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1731 newstate->has_constraint = 1;
1734 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1736 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1741 err = register_state (dfa, newstate, hash);
1742 if (BE (err != REG_NOERROR, 0))
1744 free_state (newstate);