1 /* -*- buffer-read-only: t -*- vi: set ro: */
2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
3 /* Extended regular expression matching and search library.
4 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
5 Software Foundation, Inc.
6 This file is part of the GNU C Library.
7 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License along
20 with this program; if not, write to the Free Software Foundation,
21 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
25 static void re_string_construct_common (const char *str, Idx len,
27 RE_TRANSLATE_TYPE trans, bool icase,
28 const re_dfa_t *dfa) internal_function;
29 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
30 const re_node_set *nodes,
31 re_hashval_t hash) internal_function;
32 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
33 const re_node_set *nodes,
35 re_hashval_t hash) internal_function;
37 /* Functions for string operation. */
39 /* This function allocate the buffers. It is necessary to call
40 re_string_reconstruct before using the object. */
43 internal_function __attribute_warn_unused_result__
44 re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
45 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
50 /* Ensure at least one character fits into the buffers. */
51 if (init_len < dfa->mb_cur_max)
52 init_len = dfa->mb_cur_max;
53 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
54 re_string_construct_common (str, len, pstr, trans, icase, dfa);
56 ret = re_string_realloc_buffers (pstr, init_buf_len);
57 if (BE (ret != REG_NOERROR, 0))
60 pstr->word_char = dfa->word_char;
61 pstr->word_ops_used = dfa->word_ops_used;
62 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
63 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
64 pstr->valid_raw_len = pstr->valid_len;
68 /* This function allocate the buffers, and initialize them. */
71 internal_function __attribute_warn_unused_result__
72 re_string_construct (re_string_t *pstr, const char *str, Idx len,
73 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
76 memset (pstr, '\0', sizeof (re_string_t));
77 re_string_construct_common (str, len, pstr, trans, icase, dfa);
81 ret = re_string_realloc_buffers (pstr, len + 1);
82 if (BE (ret != REG_NOERROR, 0))
85 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
90 if (dfa->mb_cur_max > 1)
94 ret = build_wcs_upper_buffer (pstr);
95 if (BE (ret != REG_NOERROR, 0))
97 if (pstr->valid_raw_len >= len)
99 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
101 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
102 if (BE (ret != REG_NOERROR, 0))
107 #endif /* RE_ENABLE_I18N */
108 build_upper_buffer (pstr);
112 #ifdef RE_ENABLE_I18N
113 if (dfa->mb_cur_max > 1)
114 build_wcs_buffer (pstr);
116 #endif /* RE_ENABLE_I18N */
119 re_string_translate_buffer (pstr);
122 pstr->valid_len = pstr->bufs_len;
123 pstr->valid_raw_len = pstr->bufs_len;
131 /* Helper functions for re_string_allocate, and re_string_construct. */
134 internal_function __attribute_warn_unused_result__
135 re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
137 #ifdef RE_ENABLE_I18N
138 if (pstr->mb_cur_max > 1)
142 /* Avoid overflow. */
143 size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
144 if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
147 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
148 if (BE (new_wcs == NULL, 0))
151 if (pstr->offsets != NULL)
153 Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
154 if (BE (new_offsets == NULL, 0))
156 pstr->offsets = new_offsets;
159 #endif /* RE_ENABLE_I18N */
160 if (pstr->mbs_allocated)
162 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
164 if (BE (new_mbs == NULL, 0))
168 pstr->bufs_len = new_buf_len;
175 re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
176 RE_TRANSLATE_TYPE trans, bool icase,
179 pstr->raw_mbs = (const unsigned char *) str;
184 pstr->mbs_allocated = (trans != NULL || icase);
185 pstr->mb_cur_max = dfa->mb_cur_max;
186 pstr->is_utf8 = dfa->is_utf8;
187 pstr->map_notascii = dfa->map_notascii;
188 pstr->stop = pstr->len;
189 pstr->raw_stop = pstr->stop;
192 #ifdef RE_ENABLE_I18N
194 /* Build wide character buffer PSTR->WCS.
195 If the byte sequence of the string are:
196 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
197 Then wide character buffer will be:
198 <wc1> , WEOF , <wc2> , WEOF , <wc3>
199 We use WEOF for padding, they indicate that the position isn't
200 a first byte of a multibyte character.
202 Note that this function assumes PSTR->VALID_LEN elements are already
203 built and starts from PSTR->VALID_LEN. */
207 build_wcs_buffer (re_string_t *pstr)
210 unsigned char buf[MB_LEN_MAX];
211 assert (MB_LEN_MAX >= pstr->mb_cur_max);
213 unsigned char buf[64];
216 Idx byte_idx, end_idx, remain_len;
219 /* Build the buffers from pstr->valid_len to either pstr->len or
221 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
222 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
227 remain_len = end_idx - byte_idx;
228 prev_st = pstr->cur_state;
229 /* Apply the translation if we need. */
230 if (BE (pstr->trans != NULL, 0))
234 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
236 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
237 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
239 p = (const char *) buf;
242 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
243 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
244 if (BE (mbclen == (size_t) -2, 0))
246 /* The buffer doesn't have enough space, finish to build. */
247 pstr->cur_state = prev_st;
250 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
252 /* We treat these cases as a singlebyte character. */
254 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
255 if (BE (pstr->trans != NULL, 0))
256 wc = pstr->trans[wc];
257 pstr->cur_state = prev_st;
260 /* Write wide character and padding. */
261 pstr->wcs[byte_idx++] = wc;
262 /* Write paddings. */
263 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
264 pstr->wcs[byte_idx++] = WEOF;
266 pstr->valid_len = byte_idx;
267 pstr->valid_raw_len = byte_idx;
270 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
271 but for REG_ICASE. */
274 internal_function __attribute_warn_unused_result__
275 build_wcs_upper_buffer (re_string_t *pstr)
278 Idx src_idx, byte_idx, end_idx, remain_len;
281 char buf[MB_LEN_MAX];
282 assert (MB_LEN_MAX >= pstr->mb_cur_max);
287 byte_idx = pstr->valid_len;
288 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
290 /* The following optimization assumes that ASCII characters can be
291 mapped to wide characters with a simple cast. */
292 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
294 while (byte_idx < end_idx)
298 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
299 && mbsinit (&pstr->cur_state))
301 /* In case of a singlebyte character. */
303 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
304 /* The next step uses the assumption that wchar_t is encoded
305 ASCII-safe: all ASCII values can be converted like this. */
306 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
311 remain_len = end_idx - byte_idx;
312 prev_st = pstr->cur_state;
313 mbclen = __mbrtowc (&wc,
314 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
315 + byte_idx), remain_len, &pstr->cur_state);
316 if (BE (mbclen < (size_t) -2, 1))
324 mbcdlen = wcrtomb (buf, wcu, &prev_st);
325 if (BE (mbclen == mbcdlen, 1))
326 memcpy (pstr->mbs + byte_idx, buf, mbclen);
334 memcpy (pstr->mbs + byte_idx,
335 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
336 pstr->wcs[byte_idx++] = wcu;
337 /* Write paddings. */
338 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
339 pstr->wcs[byte_idx++] = WEOF;
341 else if (mbclen == (size_t) -1 || mbclen == 0)
343 /* It is an invalid character or '\0'. Just use the byte. */
344 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
345 pstr->mbs[byte_idx] = ch;
346 /* And also cast it to wide char. */
347 pstr->wcs[byte_idx++] = (wchar_t) ch;
348 if (BE (mbclen == (size_t) -1, 0))
349 pstr->cur_state = prev_st;
353 /* The buffer doesn't have enough space, finish to build. */
354 pstr->cur_state = prev_st;
358 pstr->valid_len = byte_idx;
359 pstr->valid_raw_len = byte_idx;
363 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
368 remain_len = end_idx - byte_idx;
369 prev_st = pstr->cur_state;
370 if (BE (pstr->trans != NULL, 0))
374 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
376 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
377 buf[i] = pstr->trans[ch];
379 p = (const char *) buf;
382 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
383 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
384 if (BE (mbclen < (size_t) -2, 1))
392 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
393 if (BE (mbclen == mbcdlen, 1))
394 memcpy (pstr->mbs + byte_idx, buf, mbclen);
395 else if (mbcdlen != (size_t) -1)
399 if (byte_idx + mbcdlen > pstr->bufs_len)
401 pstr->cur_state = prev_st;
405 if (pstr->offsets == NULL)
407 pstr->offsets = re_malloc (Idx, pstr->bufs_len);
409 if (pstr->offsets == NULL)
412 if (!pstr->offsets_needed)
414 for (i = 0; i < (size_t) byte_idx; ++i)
415 pstr->offsets[i] = i;
416 pstr->offsets_needed = 1;
419 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
420 pstr->wcs[byte_idx] = wcu;
421 pstr->offsets[byte_idx] = src_idx;
422 for (i = 1; i < mbcdlen; ++i)
424 pstr->offsets[byte_idx + i]
425 = src_idx + (i < mbclen ? i : mbclen - 1);
426 pstr->wcs[byte_idx + i] = WEOF;
428 pstr->len += mbcdlen - mbclen;
429 if (pstr->raw_stop > src_idx)
430 pstr->stop += mbcdlen - mbclen;
431 end_idx = (pstr->bufs_len > pstr->len)
432 ? pstr->len : pstr->bufs_len;
438 memcpy (pstr->mbs + byte_idx, p, mbclen);
441 memcpy (pstr->mbs + byte_idx, p, mbclen);
443 if (BE (pstr->offsets_needed != 0, 0))
446 for (i = 0; i < mbclen; ++i)
447 pstr->offsets[byte_idx + i] = src_idx + i;
451 pstr->wcs[byte_idx++] = wcu;
452 /* Write paddings. */
453 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
454 pstr->wcs[byte_idx++] = WEOF;
456 else if (mbclen == (size_t) -1 || mbclen == 0)
458 /* It is an invalid character or '\0'. Just use the byte. */
459 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
461 if (BE (pstr->trans != NULL, 0))
462 ch = pstr->trans [ch];
463 pstr->mbs[byte_idx] = ch;
465 if (BE (pstr->offsets_needed != 0, 0))
466 pstr->offsets[byte_idx] = src_idx;
469 /* And also cast it to wide char. */
470 pstr->wcs[byte_idx++] = (wchar_t) ch;
471 if (BE (mbclen == (size_t) -1, 0))
472 pstr->cur_state = prev_st;
476 /* The buffer doesn't have enough space, finish to build. */
477 pstr->cur_state = prev_st;
481 pstr->valid_len = byte_idx;
482 pstr->valid_raw_len = src_idx;
486 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
491 re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
498 /* Skip the characters which are not necessary to check. */
499 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
500 rawbuf_idx < new_raw_idx;)
504 remain_len = pstr->len - rawbuf_idx;
505 prev_st = pstr->cur_state;
506 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
507 remain_len, &pstr->cur_state);
508 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
510 /* We treat these cases as a single byte character. */
511 if (mbclen == 0 || remain_len == 0)
514 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
516 pstr->cur_state = prev_st;
520 /* Then proceed the next character. */
521 rawbuf_idx += mbclen;
526 #endif /* RE_ENABLE_I18N */
528 /* Build the buffer PSTR->MBS, and apply the translation if we need.
529 This function is used in case of REG_ICASE. */
533 build_upper_buffer (re_string_t *pstr)
535 Idx char_idx, end_idx;
536 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
538 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
540 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
541 if (BE (pstr->trans != NULL, 0))
542 ch = pstr->trans[ch];
544 pstr->mbs[char_idx] = toupper (ch);
546 pstr->mbs[char_idx] = ch;
548 pstr->valid_len = char_idx;
549 pstr->valid_raw_len = char_idx;
552 /* Apply TRANS to the buffer in PSTR. */
556 re_string_translate_buffer (re_string_t *pstr)
558 Idx buf_idx, end_idx;
559 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
561 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
563 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
564 pstr->mbs[buf_idx] = pstr->trans[ch];
567 pstr->valid_len = buf_idx;
568 pstr->valid_raw_len = buf_idx;
571 /* This function re-construct the buffers.
572 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
573 convert to upper case in case of REG_ICASE, apply translation. */
576 internal_function __attribute_warn_unused_result__
577 re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
581 if (BE (pstr->raw_mbs_idx <= idx, 0))
582 offset = idx - pstr->raw_mbs_idx;
586 #ifdef RE_ENABLE_I18N
587 if (pstr->mb_cur_max > 1)
588 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
589 #endif /* RE_ENABLE_I18N */
590 pstr->len = pstr->raw_len;
591 pstr->stop = pstr->raw_stop;
593 pstr->raw_mbs_idx = 0;
594 pstr->valid_raw_len = 0;
595 pstr->offsets_needed = 0;
596 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
597 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
598 if (!pstr->mbs_allocated)
599 pstr->mbs = (unsigned char *) pstr->raw_mbs;
603 if (BE (offset != 0, 1))
605 /* Should the already checked characters be kept? */
606 if (BE (offset < pstr->valid_raw_len, 1))
608 /* Yes, move them to the front of the buffer. */
609 #ifdef RE_ENABLE_I18N
610 if (BE (pstr->offsets_needed, 0))
612 Idx low = 0, high = pstr->valid_len, mid;
615 mid = (high + low) / 2;
616 if (pstr->offsets[mid] > offset)
618 else if (pstr->offsets[mid] < offset)
624 if (pstr->offsets[mid] < offset)
626 pstr->tip_context = re_string_context_at (pstr, mid - 1,
628 /* This can be quite complicated, so handle specially
629 only the common and easy case where the character with
630 different length representation of lower and upper
631 case is present at or after offset. */
632 if (pstr->valid_len > offset
633 && mid == offset && pstr->offsets[mid] == offset)
635 memmove (pstr->wcs, pstr->wcs + offset,
636 (pstr->valid_len - offset) * sizeof (wint_t));
637 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
638 pstr->valid_len -= offset;
639 pstr->valid_raw_len -= offset;
640 for (low = 0; low < pstr->valid_len; low++)
641 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
645 /* Otherwise, just find out how long the partial multibyte
646 character at offset is and fill it with WEOF/255. */
647 pstr->len = pstr->raw_len - idx + offset;
648 pstr->stop = pstr->raw_stop - idx + offset;
649 pstr->offsets_needed = 0;
650 while (mid > 0 && pstr->offsets[mid - 1] == offset)
652 while (mid < pstr->valid_len)
653 if (pstr->wcs[mid] != WEOF)
657 if (mid == pstr->valid_len)
661 pstr->valid_len = pstr->offsets[mid] - offset;
664 for (low = 0; low < pstr->valid_len; ++low)
665 pstr->wcs[low] = WEOF;
666 memset (pstr->mbs, 255, pstr->valid_len);
669 pstr->valid_raw_len = pstr->valid_len;
675 pstr->tip_context = re_string_context_at (pstr, offset - 1,
677 #ifdef RE_ENABLE_I18N
678 if (pstr->mb_cur_max > 1)
679 memmove (pstr->wcs, pstr->wcs + offset,
680 (pstr->valid_len - offset) * sizeof (wint_t));
681 #endif /* RE_ENABLE_I18N */
682 if (BE (pstr->mbs_allocated, 0))
683 memmove (pstr->mbs, pstr->mbs + offset,
684 pstr->valid_len - offset);
685 pstr->valid_len -= offset;
686 pstr->valid_raw_len -= offset;
688 assert (pstr->valid_len > 0);
694 #ifdef RE_ENABLE_I18N
695 /* No, skip all characters until IDX. */
696 Idx prev_valid_len = pstr->valid_len;
698 if (BE (pstr->offsets_needed, 0))
700 pstr->len = pstr->raw_len - idx + offset;
701 pstr->stop = pstr->raw_stop - idx + offset;
702 pstr->offsets_needed = 0;
706 #ifdef RE_ENABLE_I18N
707 if (pstr->mb_cur_max > 1)
714 const unsigned char *raw, *p, *end;
716 /* Special case UTF-8. Multi-byte chars start with any
717 byte other than 0x80 - 0xbf. */
718 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
719 end = raw + (offset - pstr->mb_cur_max);
720 if (end < pstr->raw_mbs)
722 p = raw + offset - 1;
724 /* We know the wchar_t encoding is UCS4, so for the simple
725 case, ASCII characters, skip the conversion step. */
726 if (isascii (*p) && BE (pstr->trans == NULL, 1))
728 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
729 /* pstr->valid_len = 0; */
734 for (; p >= end; --p)
735 if ((*p & 0xc0) != 0x80)
739 Idx mlen = raw + pstr->len - p;
742 #if 0 /* dead code: buf is set but never used */
743 unsigned char buf[6];
744 if (BE (pstr->trans != NULL, 0))
746 int i = mlen < 6 ? mlen : 6;
748 buf[i] = pstr->trans[p[i]];
751 /* XXX Don't use mbrtowc, we know which conversion
752 to use (UTF-8 -> UCS4). */
753 memset (&cur_state, 0, sizeof (cur_state));
754 mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
756 if (raw + offset - p <= mbclen
757 && mbclen < (size_t) -2)
759 memset (&pstr->cur_state, '\0',
761 pstr->valid_len = mbclen - (raw + offset - p);
769 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
772 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
774 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
775 && IS_WIDE_WORD_CHAR (wc))
777 : ((IS_WIDE_NEWLINE (wc)
778 && pstr->newline_anchor)
779 ? CONTEXT_NEWLINE : 0));
780 if (BE (pstr->valid_len, 0))
782 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
783 pstr->wcs[wcs_idx] = WEOF;
784 if (pstr->mbs_allocated)
785 memset (pstr->mbs, 255, pstr->valid_len);
787 pstr->valid_raw_len = pstr->valid_len;
790 #endif /* RE_ENABLE_I18N */
792 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
793 pstr->valid_raw_len = 0;
796 pstr->tip_context = (bitset_contain (pstr->word_char, c)
798 : ((IS_NEWLINE (c) && pstr->newline_anchor)
799 ? CONTEXT_NEWLINE : 0));
802 if (!BE (pstr->mbs_allocated, 0))
805 pstr->raw_mbs_idx = idx;
807 pstr->stop -= offset;
809 /* Then build the buffers. */
810 #ifdef RE_ENABLE_I18N
811 if (pstr->mb_cur_max > 1)
815 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
816 if (BE (ret != REG_NOERROR, 0))
820 build_wcs_buffer (pstr);
823 #endif /* RE_ENABLE_I18N */
824 if (BE (pstr->mbs_allocated, 0))
827 build_upper_buffer (pstr);
828 else if (pstr->trans != NULL)
829 re_string_translate_buffer (pstr);
832 pstr->valid_len = pstr->len;
839 internal_function __attribute ((pure))
840 re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
845 /* Handle the common (easiest) cases first. */
846 if (BE (!pstr->mbs_allocated, 1))
847 return re_string_peek_byte (pstr, idx);
849 #ifdef RE_ENABLE_I18N
850 if (pstr->mb_cur_max > 1
851 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
852 return re_string_peek_byte (pstr, idx);
855 off = pstr->cur_idx + idx;
856 #ifdef RE_ENABLE_I18N
857 if (pstr->offsets_needed)
858 off = pstr->offsets[off];
861 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
863 #ifdef RE_ENABLE_I18N
864 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
865 this function returns CAPITAL LETTER I instead of first byte of
866 DOTLESS SMALL LETTER I. The latter would confuse the parser,
867 since peek_byte_case doesn't advance cur_idx in any way. */
868 if (pstr->offsets_needed && !isascii (ch))
869 return re_string_peek_byte (pstr, idx);
876 internal_function __attribute ((pure))
877 re_string_fetch_byte_case (re_string_t *pstr)
879 if (BE (!pstr->mbs_allocated, 1))
880 return re_string_fetch_byte (pstr);
882 #ifdef RE_ENABLE_I18N
883 if (pstr->offsets_needed)
888 /* For tr_TR.UTF-8 [[:islower:]] there is
889 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
890 in that case the whole multi-byte character and return
891 the original letter. On the other side, with
892 [[: DOTLESS SMALL LETTER I return [[:I, as doing
893 anything else would complicate things too much. */
895 if (!re_string_first_byte (pstr, pstr->cur_idx))
896 return re_string_fetch_byte (pstr);
898 off = pstr->offsets[pstr->cur_idx];
899 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
902 return re_string_fetch_byte (pstr);
904 re_string_skip_bytes (pstr,
905 re_string_char_size_at (pstr, pstr->cur_idx));
910 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
915 re_string_destruct (re_string_t *pstr)
917 #ifdef RE_ENABLE_I18N
919 re_free (pstr->offsets);
920 #endif /* RE_ENABLE_I18N */
921 if (pstr->mbs_allocated)
925 /* Return the context at IDX in INPUT. */
929 re_string_context_at (const re_string_t *input, Idx idx, int eflags)
932 if (BE (! REG_VALID_INDEX (idx), 0))
933 /* In this case, we use the value stored in input->tip_context,
934 since we can't know the character in input->mbs[-1] here. */
935 return input->tip_context;
936 if (BE (idx == input->len, 0))
937 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
938 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
939 #ifdef RE_ENABLE_I18N
940 if (input->mb_cur_max > 1)
944 while(input->wcs[wc_idx] == WEOF)
947 /* It must not happen. */
948 assert (REG_VALID_INDEX (wc_idx));
951 if (! REG_VALID_INDEX (wc_idx))
952 return input->tip_context;
954 wc = input->wcs[wc_idx];
955 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
957 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
958 ? CONTEXT_NEWLINE : 0);
963 c = re_string_byte_at (input, idx);
964 if (bitset_contain (input->word_char, c))
966 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
970 /* Functions for set operation. */
973 internal_function __attribute_warn_unused_result__
974 re_node_set_alloc (re_node_set *set, Idx size)
978 set->elems = re_malloc (Idx, size);
979 if (BE (set->elems == NULL, 0))
985 internal_function __attribute_warn_unused_result__
986 re_node_set_init_1 (re_node_set *set, Idx elem)
990 set->elems = re_malloc (Idx, 1);
991 if (BE (set->elems == NULL, 0))
993 set->alloc = set->nelem = 0;
996 set->elems[0] = elem;
1000 static reg_errcode_t
1001 internal_function __attribute_warn_unused_result__
1002 re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
1005 set->elems = re_malloc (Idx, 2);
1006 if (BE (set->elems == NULL, 0))
1011 set->elems[0] = elem1;
1018 set->elems[0] = elem1;
1019 set->elems[1] = elem2;
1023 set->elems[0] = elem2;
1024 set->elems[1] = elem1;
1030 static reg_errcode_t
1031 internal_function __attribute_warn_unused_result__
1032 re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1034 dest->nelem = src->nelem;
1037 dest->alloc = dest->nelem;
1038 dest->elems = re_malloc (Idx, dest->alloc);
1039 if (BE (dest->elems == NULL, 0))
1041 dest->alloc = dest->nelem = 0;
1044 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1047 re_node_set_init_empty (dest);
1051 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1052 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1053 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1055 static reg_errcode_t
1056 internal_function __attribute_warn_unused_result__
1057 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1058 const re_node_set *src2)
1060 Idx i1, i2, is, id, delta, sbase;
1061 if (src1->nelem == 0 || src2->nelem == 0)
1064 /* We need dest->nelem + 2 * elems_in_intersection; this is a
1065 conservative estimate. */
1066 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1068 Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
1069 Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
1070 if (BE (new_elems == NULL, 0))
1072 dest->elems = new_elems;
1073 dest->alloc = new_alloc;
1076 /* Find the items in the intersection of SRC1 and SRC2, and copy
1077 into the top of DEST those that are not already in DEST itself. */
1078 sbase = dest->nelem + src1->nelem + src2->nelem;
1079 i1 = src1->nelem - 1;
1080 i2 = src2->nelem - 1;
1081 id = dest->nelem - 1;
1084 if (src1->elems[i1] == src2->elems[i2])
1086 /* Try to find the item in DEST. Maybe we could binary search? */
1087 while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
1090 if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
1091 dest->elems[--sbase] = src1->elems[i1];
1093 if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
1097 /* Lower the highest of the two items. */
1098 else if (src1->elems[i1] < src2->elems[i2])
1100 if (! REG_VALID_INDEX (--i2))
1105 if (! REG_VALID_INDEX (--i1))
1110 id = dest->nelem - 1;
1111 is = dest->nelem + src1->nelem + src2->nelem - 1;
1112 delta = is - sbase + 1;
1114 /* Now copy. When DELTA becomes zero, the remaining
1115 DEST elements are already in place; this is more or
1116 less the same loop that is in re_node_set_merge. */
1117 dest->nelem += delta;
1118 if (delta > 0 && REG_VALID_INDEX (id))
1121 if (dest->elems[is] > dest->elems[id])
1123 /* Copy from the top. */
1124 dest->elems[id + delta--] = dest->elems[is--];
1130 /* Slide from the bottom. */
1131 dest->elems[id + delta] = dest->elems[id];
1132 if (! REG_VALID_INDEX (--id))
1137 /* Copy remaining SRC elements. */
1138 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
1143 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1144 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1146 static reg_errcode_t
1147 internal_function __attribute_warn_unused_result__
1148 re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1149 const re_node_set *src2)
1152 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1154 dest->alloc = src1->nelem + src2->nelem;
1155 dest->elems = re_malloc (Idx, dest->alloc);
1156 if (BE (dest->elems == NULL, 0))
1161 if (src1 != NULL && src1->nelem > 0)
1162 return re_node_set_init_copy (dest, src1);
1163 else if (src2 != NULL && src2->nelem > 0)
1164 return re_node_set_init_copy (dest, src2);
1166 re_node_set_init_empty (dest);
1169 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1171 if (src1->elems[i1] > src2->elems[i2])
1173 dest->elems[id++] = src2->elems[i2++];
1176 if (src1->elems[i1] == src2->elems[i2])
1178 dest->elems[id++] = src1->elems[i1++];
1180 if (i1 < src1->nelem)
1182 memcpy (dest->elems + id, src1->elems + i1,
1183 (src1->nelem - i1) * sizeof (Idx));
1184 id += src1->nelem - i1;
1186 else if (i2 < src2->nelem)
1188 memcpy (dest->elems + id, src2->elems + i2,
1189 (src2->nelem - i2) * sizeof (Idx));
1190 id += src2->nelem - i2;
1196 /* Calculate the union set of the sets DEST and SRC. And store it to
1197 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1199 static reg_errcode_t
1200 internal_function __attribute_warn_unused_result__
1201 re_node_set_merge (re_node_set *dest, const re_node_set *src)
1203 Idx is, id, sbase, delta;
1204 if (src == NULL || src->nelem == 0)
1206 if (dest->alloc < 2 * src->nelem + dest->nelem)
1208 Idx new_alloc = 2 * (src->nelem + dest->alloc);
1209 Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
1210 if (BE (new_buffer == NULL, 0))
1212 dest->elems = new_buffer;
1213 dest->alloc = new_alloc;
1216 if (BE (dest->nelem == 0, 0))
1218 dest->nelem = src->nelem;
1219 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1223 /* Copy into the top of DEST the items of SRC that are not
1224 found in DEST. Maybe we could binary search in DEST? */
1225 for (sbase = dest->nelem + 2 * src->nelem,
1226 is = src->nelem - 1, id = dest->nelem - 1;
1227 REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
1229 if (dest->elems[id] == src->elems[is])
1231 else if (dest->elems[id] < src->elems[is])
1232 dest->elems[--sbase] = src->elems[is--];
1233 else /* if (dest->elems[id] > src->elems[is]) */
1237 if (REG_VALID_INDEX (is))
1239 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1241 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
1244 id = dest->nelem - 1;
1245 is = dest->nelem + 2 * src->nelem - 1;
1246 delta = is - sbase + 1;
1250 /* Now copy. When DELTA becomes zero, the remaining
1251 DEST elements are already in place. */
1252 dest->nelem += delta;
1255 if (dest->elems[is] > dest->elems[id])
1257 /* Copy from the top. */
1258 dest->elems[id + delta--] = dest->elems[is--];
1264 /* Slide from the bottom. */
1265 dest->elems[id + delta] = dest->elems[id];
1266 if (! REG_VALID_INDEX (--id))
1268 /* Copy remaining SRC elements. */
1269 memcpy (dest->elems, dest->elems + sbase,
1270 delta * sizeof (Idx));
1279 /* Insert the new element ELEM to the re_node_set* SET.
1280 SET should not already have ELEM.
1281 Return true if successful. */
1284 internal_function __attribute_warn_unused_result__
1285 re_node_set_insert (re_node_set *set, Idx elem)
1288 /* In case the set is empty. */
1289 if (set->alloc == 0)
1290 return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
1292 if (BE (set->nelem, 0) == 0)
1294 /* We already guaranteed above that set->alloc != 0. */
1295 set->elems[0] = elem;
1300 /* Realloc if we need. */
1301 if (set->alloc == set->nelem)
1304 set->alloc = set->alloc * 2;
1305 new_elems = re_realloc (set->elems, Idx, set->alloc);
1306 if (BE (new_elems == NULL, 0))
1308 set->elems = new_elems;
1311 /* Move the elements which follows the new element. Test the
1312 first element separately to skip a check in the inner loop. */
1313 if (elem < set->elems[0])
1316 for (idx = set->nelem; idx > 0; idx--)
1317 set->elems[idx] = set->elems[idx - 1];
1321 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1322 set->elems[idx] = set->elems[idx - 1];
1325 /* Insert the new element. */
1326 set->elems[idx] = elem;
1331 /* Insert the new element ELEM to the re_node_set* SET.
1332 SET should not already have any element greater than or equal to ELEM.
1333 Return true if successful. */
1336 internal_function __attribute_warn_unused_result__
1337 re_node_set_insert_last (re_node_set *set, Idx elem)
1339 /* Realloc if we need. */
1340 if (set->alloc == set->nelem)
1343 set->alloc = (set->alloc + 1) * 2;
1344 new_elems = re_realloc (set->elems, Idx, set->alloc);
1345 if (BE (new_elems == NULL, 0))
1347 set->elems = new_elems;
1350 /* Insert the new element. */
1351 set->elems[set->nelem++] = elem;
1355 /* Compare two node sets SET1 and SET2.
1356 Return true if SET1 and SET2 are equivalent. */
1359 internal_function __attribute ((pure))
1360 re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1363 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1365 for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
1366 if (set1->elems[i] != set2->elems[i])
1371 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1374 internal_function __attribute ((pure))
1375 re_node_set_contains (const re_node_set *set, Idx elem)
1377 __re_size_t idx, right, mid;
1378 if (! REG_VALID_NONZERO_INDEX (set->nelem))
1381 /* Binary search the element. */
1383 right = set->nelem - 1;
1386 mid = (idx + right) / 2;
1387 if (set->elems[mid] < elem)
1392 return set->elems[idx] == elem ? idx + 1 : 0;
1397 re_node_set_remove_at (re_node_set *set, Idx idx)
1399 verify (! TYPE_SIGNED (Idx));
1402 if (idx >= set->nelem)
1405 for (; idx < set->nelem; idx++)
1406 set->elems[idx] = set->elems[idx + 1];
1410 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1411 Or return REG_MISSING if an error occurred. */
1415 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1417 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1419 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1420 Idx *new_nexts, *new_indices;
1421 re_node_set *new_edests, *new_eclosures;
1422 re_token_t *new_nodes;
1423 size_t max_object_size =
1424 MAX (sizeof (re_token_t),
1425 MAX (sizeof (re_node_set),
1428 /* Avoid overflows. */
1429 if (BE (SIZE_MAX / 2 / max_object_size < dfa->nodes_alloc, 0))
1432 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1433 if (BE (new_nodes == NULL, 0))
1435 dfa->nodes = new_nodes;
1436 new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1437 new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1438 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1439 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1440 if (BE (new_nexts == NULL || new_indices == NULL
1441 || new_edests == NULL || new_eclosures == NULL, 0))
1443 dfa->nexts = new_nexts;
1444 dfa->org_indices = new_indices;
1445 dfa->edests = new_edests;
1446 dfa->eclosures = new_eclosures;
1447 dfa->nodes_alloc = new_nodes_alloc;
1449 dfa->nodes[dfa->nodes_len] = token;
1450 dfa->nodes[dfa->nodes_len].constraint = 0;
1451 #ifdef RE_ENABLE_I18N
1453 int type = token.type;
1454 dfa->nodes[dfa->nodes_len].accept_mb =
1455 (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1458 dfa->nexts[dfa->nodes_len] = REG_MISSING;
1459 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1460 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1461 return dfa->nodes_len++;
1464 static inline re_hashval_t
1466 calc_state_hash (const re_node_set *nodes, unsigned int context)
1468 re_hashval_t hash = nodes->nelem + context;
1470 for (i = 0 ; i < nodes->nelem ; i++)
1471 hash += nodes->elems[i];
1475 /* Search for the state whose node_set is equivalent to NODES.
1476 Return the pointer to the state, if we found it in the DFA.
1477 Otherwise create the new one and return it. In case of an error
1478 return NULL and set the error code in ERR.
1479 Note: - We assume NULL as the invalid state, then it is possible that
1480 return value is NULL and ERR is REG_NOERROR.
1481 - We never return non-NULL value in case of any errors, it is for
1484 static re_dfastate_t *
1485 internal_function __attribute_warn_unused_result__
1486 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1487 const re_node_set *nodes)
1490 re_dfastate_t *new_state;
1491 struct re_state_table_entry *spot;
1494 /* Suppress bogus uninitialized-variable warnings. */
1497 if (BE (nodes->nelem == 0, 0))
1502 hash = calc_state_hash (nodes, 0);
1503 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1505 for (i = 0 ; i < spot->num ; i++)
1507 re_dfastate_t *state = spot->array[i];
1508 if (hash != state->hash)
1510 if (re_node_set_compare (&state->nodes, nodes))
1514 /* There are no appropriate state in the dfa, create the new one. */
1515 new_state = create_ci_newstate (dfa, nodes, hash);
1516 if (BE (new_state == NULL, 0))
1522 /* Search for the state whose node_set is equivalent to NODES and
1523 whose context is equivalent to CONTEXT.
1524 Return the pointer to the state, if we found it in the DFA.
1525 Otherwise create the new one and return it. In case of an error
1526 return NULL and set the error code in ERR.
1527 Note: - We assume NULL as the invalid state, then it is possible that
1528 return value is NULL and ERR is REG_NOERROR.
1529 - We never return non-NULL value in case of any errors, it is for
1532 static re_dfastate_t *
1533 internal_function __attribute_warn_unused_result__
1534 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1535 const re_node_set *nodes, unsigned int context)
1538 re_dfastate_t *new_state;
1539 struct re_state_table_entry *spot;
1542 /* Suppress bogus uninitialized-variable warnings. */
1545 if (nodes->nelem == 0)
1550 hash = calc_state_hash (nodes, context);
1551 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1553 for (i = 0 ; i < spot->num ; i++)
1555 re_dfastate_t *state = spot->array[i];
1556 if (state->hash == hash
1557 && state->context == context
1558 && re_node_set_compare (state->entrance_nodes, nodes))
1561 /* There are no appropriate state in `dfa', create the new one. */
1562 new_state = create_cd_newstate (dfa, nodes, context, hash);
1563 if (BE (new_state == NULL, 0))
1569 /* Finish initialization of the new state NEWSTATE, and using its hash value
1570 HASH put in the appropriate bucket of DFA's state table. Return value
1571 indicates the error code if failed. */
1573 static reg_errcode_t
1574 __attribute_warn_unused_result__
1575 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1578 struct re_state_table_entry *spot;
1582 newstate->hash = hash;
1583 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1584 if (BE (err != REG_NOERROR, 0))
1586 for (i = 0; i < newstate->nodes.nelem; i++)
1588 Idx elem = newstate->nodes.elems[i];
1589 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1590 if (BE (! re_node_set_insert_last (&newstate->non_eps_nodes, elem), 0))
1594 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1595 if (BE (spot->alloc <= spot->num, 0))
1597 Idx new_alloc = 2 * spot->num + 2;
1598 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1600 if (BE (new_array == NULL, 0))
1602 spot->array = new_array;
1603 spot->alloc = new_alloc;
1605 spot->array[spot->num++] = newstate;
1610 free_state (re_dfastate_t *state)
1612 re_node_set_free (&state->non_eps_nodes);
1613 re_node_set_free (&state->inveclosure);
1614 if (state->entrance_nodes != &state->nodes)
1616 re_node_set_free (state->entrance_nodes);
1617 re_free (state->entrance_nodes);
1619 re_node_set_free (&state->nodes);
1620 re_free (state->word_trtable);
1621 re_free (state->trtable);
1625 /* Create the new state which is independ of contexts.
1626 Return the new state if succeeded, otherwise return NULL. */
1628 static re_dfastate_t *
1629 internal_function __attribute_warn_unused_result__
1630 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1635 re_dfastate_t *newstate;
1637 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1638 if (BE (newstate == NULL, 0))
1640 err = re_node_set_init_copy (&newstate->nodes, nodes);
1641 if (BE (err != REG_NOERROR, 0))
1647 newstate->entrance_nodes = &newstate->nodes;
1648 for (i = 0 ; i < nodes->nelem ; i++)
1650 re_token_t *node = dfa->nodes + nodes->elems[i];
1651 re_token_type_t type = node->type;
1652 if (type == CHARACTER && !node->constraint)
1654 #ifdef RE_ENABLE_I18N
1655 newstate->accept_mb |= node->accept_mb;
1656 #endif /* RE_ENABLE_I18N */
1658 /* If the state has the halt node, the state is a halt state. */
1659 if (type == END_OF_RE)
1661 else if (type == OP_BACK_REF)
1662 newstate->has_backref = 1;
1663 else if (type == ANCHOR || node->constraint)
1664 newstate->has_constraint = 1;
1666 err = register_state (dfa, newstate, hash);
1667 if (BE (err != REG_NOERROR, 0))
1669 free_state (newstate);
1675 /* Create the new state which is depend on the context CONTEXT.
1676 Return the new state if succeeded, otherwise return NULL. */
1678 static re_dfastate_t *
1679 internal_function __attribute_warn_unused_result__
1680 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1681 unsigned int context, re_hashval_t hash)
1683 Idx i, nctx_nodes = 0;
1685 re_dfastate_t *newstate;
1687 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1688 if (BE (newstate == NULL, 0))
1690 err = re_node_set_init_copy (&newstate->nodes, nodes);
1691 if (BE (err != REG_NOERROR, 0))
1697 newstate->context = context;
1698 newstate->entrance_nodes = &newstate->nodes;
1700 for (i = 0 ; i < nodes->nelem ; i++)
1702 re_token_t *node = dfa->nodes + nodes->elems[i];
1703 re_token_type_t type = node->type;
1704 unsigned int constraint = node->constraint;
1706 if (type == CHARACTER && !constraint)
1708 #ifdef RE_ENABLE_I18N
1709 newstate->accept_mb |= node->accept_mb;
1710 #endif /* RE_ENABLE_I18N */
1712 /* If the state has the halt node, the state is a halt state. */
1713 if (type == END_OF_RE)
1715 else if (type == OP_BACK_REF)
1716 newstate->has_backref = 1;
1720 if (newstate->entrance_nodes == &newstate->nodes)
1722 newstate->entrance_nodes = re_malloc (re_node_set, 1);
1723 if (BE (newstate->entrance_nodes == NULL, 0))
1725 free_state (newstate);
1728 if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1732 newstate->has_constraint = 1;
1735 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1737 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1742 err = register_state (dfa, newstate, hash);
1743 if (BE (err != REG_NOERROR, 0))
1745 free_state (newstate);