Import TRE regex library v0.8.0 to vendor branch
[dragonfly.git] / contrib / tre / lib / regexec.c
CommitLineData
5f2eab64
JM
1/*
2 tre_regexec.c - TRE POSIX compatible matching functions (and more).
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7*/
8
9#ifdef HAVE_CONFIG_H
10#include <config.h>
11#endif /* HAVE_CONFIG_H */
12
13#ifdef TRE_USE_ALLOCA
14/* AIX requires this to be the first thing in the file. */
15#ifndef __GNUC__
16# if HAVE_ALLOCA_H
17# include <alloca.h>
18# else
19# ifdef _AIX
20 #pragma alloca
21# else
22# ifndef alloca /* predefined by HP cc +Olibcalls */
23char *alloca ();
24# endif
25# endif
26# endif
27#endif
28#endif /* TRE_USE_ALLOCA */
29
30#include <assert.h>
31#include <stdlib.h>
32#include <string.h>
33#ifdef HAVE_WCHAR_H
34#include <wchar.h>
35#endif /* HAVE_WCHAR_H */
36#ifdef HAVE_WCTYPE_H
37#include <wctype.h>
38#endif /* HAVE_WCTYPE_H */
39#ifndef TRE_WCHAR
40#include <ctype.h>
41#endif /* !TRE_WCHAR */
42#ifdef HAVE_MALLOC_H
43#include <malloc.h>
44#endif /* HAVE_MALLOC_H */
45#include <limits.h>
46
47#include "tre-internal.h"
48#include "tre.h"
49#include "xmalloc.h"
50
51
52/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
53 endpoint values. */
54void
55tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
56 const tre_tnfa_t *tnfa, int *tags, int match_eo)
57{
58 tre_submatch_data_t *submatch_data;
59 unsigned int i, j;
60 int *parents;
61
62 i = 0;
63 if (match_eo >= 0 && !(cflags & REG_NOSUB))
64 {
65 /* Construct submatch offsets from the tags. */
66 DPRINT(("end tag = t%d = %d\n", tnfa->end_tag, match_eo));
67 submatch_data = tnfa->submatch_data;
68 while (i < tnfa->num_submatches && i < nmatch)
69 {
70 if (submatch_data[i].so_tag == tnfa->end_tag)
71 pmatch[i].rm_so = match_eo;
72 else
73 pmatch[i].rm_so = tags[submatch_data[i].so_tag];
74
75 if (submatch_data[i].eo_tag == tnfa->end_tag)
76 pmatch[i].rm_eo = match_eo;
77 else
78 pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
79
80 /* If either of the endpoints were not used, this submatch
81 was not part of the match. */
82 if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
83 pmatch[i].rm_so = pmatch[i].rm_eo = -1;
84
85 DPRINT(("pmatch[%d] = {t%d = %d, t%d = %d}\n", i,
86 submatch_data[i].so_tag, pmatch[i].rm_so,
87 submatch_data[i].eo_tag, pmatch[i].rm_eo));
88 i++;
89 }
90 /* Reset all submatches that are not within all of their parent
91 submatches. */
92 i = 0;
93 while (i < tnfa->num_submatches && i < nmatch)
94 {
95 if (pmatch[i].rm_eo == -1)
96 assert(pmatch[i].rm_so == -1);
97 assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
98
99 parents = submatch_data[i].parents;
100 if (parents != NULL)
101 for (j = 0; parents[j] >= 0; j++)
102 {
103 DPRINT(("pmatch[%d] parent %d\n", i, parents[j]));
104 if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
105 || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
106 pmatch[i].rm_so = pmatch[i].rm_eo = -1;
107 }
108 i++;
109 }
110 }
111
112 while (i < nmatch)
113 {
114 pmatch[i].rm_so = -1;
115 pmatch[i].rm_eo = -1;
116 i++;
117 }
118}
119
120
121/*
122 Wrapper functions for POSIX compatible regexp matching.
123*/
124
125int
126tre_have_backrefs(const regex_t *preg)
127{
128 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
129 return tnfa->have_backrefs;
130}
131
132int
133tre_have_approx(const regex_t *preg)
134{
135 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
136 return tnfa->have_approx;
137}
138
139static int
140tre_match(const tre_tnfa_t *tnfa, const void *string, size_t len,
141 tre_str_type_t type, size_t nmatch, regmatch_t pmatch[],
142 int eflags)
143{
144 reg_errcode_t status;
145 int *tags = NULL, eo;
146 if (tnfa->num_tags > 0 && nmatch > 0)
147 {
148#ifdef TRE_USE_ALLOCA
149 tags = alloca(sizeof(*tags) * tnfa->num_tags);
150#else /* !TRE_USE_ALLOCA */
151 tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
152#endif /* !TRE_USE_ALLOCA */
153 if (tags == NULL)
154 return REG_ESPACE;
155 }
156
157 /* Dispatch to the appropriate matcher. */
158 if (tnfa->have_backrefs || eflags & REG_BACKTRACKING_MATCHER)
159 {
160 /* The regex has back references, use the backtracking matcher. */
161 if (type == STR_USER)
162 {
163 const tre_str_source *source = string;
164 if (source->rewind == NULL || source->compare == NULL)
165 /* The backtracking matcher requires rewind and compare
166 capabilities from the input stream. */
167 return REG_BADPAT;
168 }
169 status = tre_tnfa_run_backtrack(tnfa, string, (int)len, type,
170 tags, eflags, &eo);
171 }
172#ifdef TRE_APPROX
173 else if (tnfa->have_approx || eflags & REG_APPROX_MATCHER)
174 {
175 /* The regex uses approximate matching, use the approximate matcher. */
176 regamatch_t match;
177 regaparams_t params;
178 tre_regaparams_default(&params);
179 params.max_err = 0;
180 params.max_cost = 0;
181 status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags,
182 &match, params, eflags, &eo);
183 }
184#endif /* TRE_APPROX */
185 else
186 {
187 /* Exact matching, no back references, use the parallel matcher. */
188 status = tre_tnfa_run_parallel(tnfa, string, (int)len, type,
189 tags, eflags, &eo);
190 }
191
192 if (status == REG_OK)
193 /* A match was found, so fill the submatch registers. */
194 tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
195#ifndef TRE_USE_ALLOCA
196 if (tags)
197 xfree(tags);
198#endif /* !TRE_USE_ALLOCA */
199 return status;
200}
201
202int
203tre_regnexec(const regex_t *preg, const char *str, size_t len,
204 size_t nmatch, regmatch_t pmatch[], int eflags)
205{
206 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
207 tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;
208
209 return tre_match(tnfa, str, len, type, nmatch, pmatch, eflags);
210}
211
212int
213tre_regexec(const regex_t *preg, const char *str,
214 size_t nmatch, regmatch_t pmatch[], int eflags)
215{
216 return tre_regnexec(preg, str, (unsigned)-1, nmatch, pmatch, eflags);
217}
218
219
220#ifdef TRE_WCHAR
221
222int
223tre_regwnexec(const regex_t *preg, const wchar_t *str, size_t len,
224 size_t nmatch, regmatch_t pmatch[], int eflags)
225{
226 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
227 return tre_match(tnfa, str, len, STR_WIDE, nmatch, pmatch, eflags);
228}
229
230int
231tre_regwexec(const regex_t *preg, const wchar_t *str,
232 size_t nmatch, regmatch_t pmatch[], int eflags)
233{
234 return tre_regwnexec(preg, str, (unsigned)-1, nmatch, pmatch, eflags);
235}
236
237#endif /* TRE_WCHAR */
238
239int
240tre_reguexec(const regex_t *preg, const tre_str_source *str,
241 size_t nmatch, regmatch_t pmatch[], int eflags)
242{
243 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
244 return tre_match(tnfa, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags);
245}
246
247
248#ifdef TRE_APPROX
249
250/*
251 Wrapper functions for approximate regexp matching.
252*/
253
254static int
255tre_match_approx(const tre_tnfa_t *tnfa, const void *string, size_t len,
256 tre_str_type_t type, regamatch_t *match, regaparams_t params,
257 int eflags)
258{
259 reg_errcode_t status;
260 int *tags = NULL, eo;
261
262 /* If the regexp does not use approximate matching features, the
263 maximum cost is zero, and the approximate matcher isn't forced,
264 use the exact matcher instead. */
265 if (params.max_cost == 0 && !tnfa->have_approx
266 && !(eflags & REG_APPROX_MATCHER))
267 return tre_match(tnfa, string, len, type, match->nmatch, match->pmatch,
268 eflags);
269
270 /* Back references are not supported by the approximate matcher. */
271 if (tnfa->have_backrefs)
272 return REG_BADPAT;
273
274 if (tnfa->num_tags > 0 && match->nmatch > 0)
275 {
276#if TRE_USE_ALLOCA
277 tags = alloca(sizeof(*tags) * tnfa->num_tags);
278#else /* !TRE_USE_ALLOCA */
279 tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
280#endif /* !TRE_USE_ALLOCA */
281 if (tags == NULL)
282 return REG_ESPACE;
283 }
284 status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags,
285 match, params, eflags, &eo);
286 if (status == REG_OK)
287 tre_fill_pmatch(match->nmatch, match->pmatch, tnfa->cflags, tnfa, tags, eo);
288#ifndef TRE_USE_ALLOCA
289 if (tags)
290 xfree(tags);
291#endif /* !TRE_USE_ALLOCA */
292 return status;
293}
294
295int
296tre_reganexec(const regex_t *preg, const char *str, size_t len,
297 regamatch_t *match, regaparams_t params, int eflags)
298{
299 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
300 tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;
301
302 return tre_match_approx(tnfa, str, len, type, match, params, eflags);
303}
304
305int
306tre_regaexec(const regex_t *preg, const char *str,
307 regamatch_t *match, regaparams_t params, int eflags)
308{
309 return tre_reganexec(preg, str, (unsigned)-1, match, params, eflags);
310}
311
312#ifdef TRE_WCHAR
313
314int
315tre_regawnexec(const regex_t *preg, const wchar_t *str, size_t len,
316 regamatch_t *match, regaparams_t params, int eflags)
317{
318 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
319 return tre_match_approx(tnfa, str, len, STR_WIDE,
320 match, params, eflags);
321}
322
323int
324tre_regawexec(const regex_t *preg, const wchar_t *str,
325 regamatch_t *match, regaparams_t params, int eflags)
326{
327 return tre_regawnexec(preg, str, (unsigned)-1, match, params, eflags);
328}
329
330#endif /* TRE_WCHAR */
331
332void
333tre_regaparams_default(regaparams_t *params)
334{
335 memset(params, 0, sizeof(*params));
336 params->cost_ins = 1;
337 params->cost_del = 1;
338 params->cost_subst = 1;
339 params->max_cost = INT_MAX;
340 params->max_ins = INT_MAX;
341 params->max_del = INT_MAX;
342 params->max_subst = INT_MAX;
343 params->max_err = INT_MAX;
344}
345
346#endif /* TRE_APPROX */
347
348/* EOF */