Merge branch 'vendor/TRE'
[dragonfly.git] / contrib / tre / lib / tre-match-utils.h
1 /*
2   tre-match-utils.h - TRE matcher helper definitions
3
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6
7 */
8
9 #define str_source ((const tre_str_source*)string)
10
11 #ifdef TRE_WCHAR
12
13 #ifdef TRE_MULTIBYTE
14
15 /* Wide character and multibyte support. */
16
17 #define GET_NEXT_WCHAR()                                                      \
18   do {                                                                        \
19     prev_c = next_c;                                                          \
20     if (type == STR_BYTE)                                                     \
21       {                                                                       \
22         pos++;                                                                \
23         if (len >= 0 && pos >= len)                                           \
24           next_c = '\0';                                                      \
25         else                                                                  \
26           next_c = (unsigned char)(*str_byte++);                              \
27       }                                                                       \
28     else if (type == STR_WIDE)                                                \
29       {                                                                       \
30         pos++;                                                                \
31         if (len >= 0 && pos >= len)                                           \
32           next_c = L'\0';                                                     \
33         else                                                                  \
34           next_c = *str_wide++;                                               \
35       }                                                                       \
36     else if (type == STR_MBS)                                                 \
37       {                                                                       \
38         pos += pos_add_next;                                                  \
39         if (str_byte == NULL)                                                 \
40           next_c = L'\0';                                                     \
41         else                                                                  \
42           {                                                                   \
43             size_t w;                                                         \
44             int max;                                                          \
45             if (len >= 0)                                                     \
46               max = len - pos;                                                \
47             else                                                              \
48               max = 32;                                                       \
49             if (max <= 0)                                                     \
50               {                                                               \
51                 next_c = L'\0';                                               \
52                 pos_add_next = 1;                                             \
53               }                                                               \
54             else                                                              \
55               {                                                               \
56                 w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate);    \
57                 if (w == (size_t)-1 || w == (size_t)-2)                       \
58                   return REG_NOMATCH;                                         \
59                 if (w == 0 && len >= 0)                                       \
60                   {                                                           \
61                     pos_add_next = 1;                                         \
62                     next_c = 0;                                               \
63                     str_byte++;                                               \
64                   }                                                           \
65                 else                                                          \
66                   {                                                           \
67                     pos_add_next = w;                                         \
68                     str_byte += w;                                            \
69                   }                                                           \
70               }                                                               \
71           }                                                                   \
72       }                                                                       \
73     else if (type == STR_USER)                                                \
74       {                                                                       \
75         pos += pos_add_next;                                                  \
76         str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
77                                                  str_source->context);        \
78       }                                                                       \
79   } while(/*CONSTCOND*/0)
80
81 #else /* !TRE_MULTIBYTE */
82
83 /* Wide character support, no multibyte support. */
84
85 #define GET_NEXT_WCHAR()                                                      \
86   do {                                                                        \
87     prev_c = next_c;                                                          \
88     if (type == STR_BYTE)                                                     \
89       {                                                                       \
90         pos++;                                                                \
91         if (len >= 0 && pos >= len)                                           \
92           next_c = '\0';                                                      \
93         else                                                                  \
94           next_c = (unsigned char)(*str_byte++);                              \
95       }                                                                       \
96     else if (type == STR_WIDE)                                                \
97       {                                                                       \
98         pos++;                                                                \
99         if (len >= 0 && pos >= len)                                           \
100           next_c = L'\0';                                                     \
101         else                                                                  \
102           next_c = *str_wide++;                                               \
103       }                                                                       \
104     else if (type == STR_USER)                                                \
105       {                                                                       \
106         pos += pos_add_next;                                                  \
107         str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
108                                                  str_source->context);        \
109       }                                                                       \
110   } while(/*CONSTCOND*/0)
111
112 #endif /* !TRE_MULTIBYTE */
113
114 #else /* !TRE_WCHAR */
115
116 /* No wide character or multibyte support. */
117
118 #define GET_NEXT_WCHAR()                                                      \
119   do {                                                                        \
120     prev_c = next_c;                                                          \
121     if (type == STR_BYTE)                                                     \
122       {                                                                       \
123         pos++;                                                                \
124         if (len >= 0 && pos >= len)                                           \
125           next_c = '\0';                                                      \
126         else                                                                  \
127           next_c = (unsigned char)(*str_byte++);                              \
128       }                                                                       \
129     else if (type == STR_USER)                                                \
130       {                                                                       \
131         pos += pos_add_next;                                                  \
132         str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
133                                                  str_source->context);        \
134       }                                                                       \
135   } while(/*CONSTCOND*/0)
136
137 #endif /* !TRE_WCHAR */
138
139
140
141 #define IS_WORD_CHAR(c)  ((c) == L'_' || tre_isalnum(c))
142
143 #define CHECK_ASSERTIONS(assertions)                                          \
144   (((assertions & ASSERT_AT_BOL)                                              \
145     && (pos > 0 || reg_notbol)                                                \
146     && (prev_c != L'\n' || !reg_newline))                                     \
147    || ((assertions & ASSERT_AT_EOL)                                           \
148        && (next_c != L'\0' || reg_noteol)                                     \
149        && (next_c != L'\n' || !reg_newline))                                  \
150    || ((assertions & ASSERT_AT_BOW)                                           \
151        && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))                    \
152    || ((assertions & ASSERT_AT_EOW)                                           \
153        && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))                    \
154    || ((assertions & ASSERT_AT_WB)                                            \
155        && (pos != 0 && next_c != L'\0'                                        \
156            && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c)))                  \
157    || ((assertions & ASSERT_AT_WB_NEG)                                        \
158        && (pos == 0 || next_c == L'\0'                                        \
159            || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
160
161 #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)                             \
162   (((trans_i->assertions & ASSERT_CHAR_CLASS)                                 \
163        && !(tnfa->cflags & REG_ICASE)                                         \
164        && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class))                 \
165     || ((trans_i->assertions & ASSERT_CHAR_CLASS)                             \
166         && (tnfa->cflags & REG_ICASE)                                         \
167         && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class)     \
168         && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class))    \
169     || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG)                         \
170         && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
171                                       tnfa->cflags & REG_ICASE)))
172
173
174
175
176 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
177 inline static int
178 tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
179               int *t1, int *t2)
180 {
181   int i;
182   for (i = 0; i < num_tags; i++)
183     {
184       if (tag_directions[i] == TRE_TAG_MINIMIZE)
185         {
186           if (t1[i] < t2[i])
187             return 1;
188           if (t1[i] > t2[i])
189             return 0;
190         }
191       else
192         {
193           if (t1[i] > t2[i])
194             return 1;
195           if (t1[i] < t2[i])
196             return 0;
197         }
198     }
199   /*  assert(0);*/
200   return 0;
201 }
202
203 inline static int
204 tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
205 {
206   DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
207   while (*classes != (tre_ctype_t)0)
208     if ((!icase && tre_isctype(wc, *classes))
209         || (icase && (tre_isctype(tre_toupper(wc), *classes)
210                       || tre_isctype(tre_tolower(wc), *classes))))
211       return 1; /* Match. */
212     else
213       classes++;
214   return 0; /* No match. */
215 }