gdb - Local mods (compile)
[dragonfly.git] / contrib / tre / lib / tre-match-parallel.c
CommitLineData
5f2eab64
JM
1/*
2 tre-match-parallel.c - TRE parallel regex matching engine
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7*/
8
9/*
10 This algorithm searches for matches basically by reading characters
11 in the searched string one by one, starting at the beginning. All
12 matching paths in the TNFA are traversed in parallel. When two or
13 more paths reach the same state, exactly one is chosen according to
14 tag ordering rules; if returning submatches is not required it does
15 not matter which path is chosen.
16
17 The worst case time required for finding the leftmost and longest
18 match, or determining that there is no match, is always linearly
19 dependent on the length of the text being searched.
20
21 This algorithm cannot handle TNFAs with back referencing nodes.
22 See `tre-match-backtrack.c'.
23*/
24
25
26#ifdef HAVE_CONFIG_H
27#include <config.h>
28#endif /* HAVE_CONFIG_H */
29
d5f8dde1
JM
30/* Unset TRE_USE_ALLOCA to avoid using the stack to hold all the state
31 info while running */
32#undef TRE_USE_ALLOCA
33
5f2eab64
JM
34#ifdef TRE_USE_ALLOCA
35/* AIX requires this to be the first thing in the file. */
36#ifndef __GNUC__
37# if HAVE_ALLOCA_H
38# include <alloca.h>
39# else
40# ifdef _AIX
41 #pragma alloca
42# else
43# ifndef alloca /* predefined by HP cc +Olibcalls */
44char *alloca ();
45# endif
46# endif
47# endif
48#endif
49#endif /* TRE_USE_ALLOCA */
50
51#include <assert.h>
52#include <stdlib.h>
53#include <string.h>
54#ifdef HAVE_WCHAR_H
55#include <wchar.h>
56#endif /* HAVE_WCHAR_H */
57#ifdef HAVE_WCTYPE_H
58#include <wctype.h>
59#endif /* HAVE_WCTYPE_H */
60#ifndef TRE_WCHAR
61#include <ctype.h>
62#endif /* !TRE_WCHAR */
63#ifdef HAVE_MALLOC_H
64#include <malloc.h>
65#endif /* HAVE_MALLOC_H */
66
67#include "tre-internal.h"
68#include "tre-match-utils.h"
69#include "tre.h"
70#include "xmalloc.h"
71
72
73
74typedef struct {
75 tre_tnfa_transition_t *state;
d5f8dde1 76 tre_tag_t *tags;
5f2eab64
JM
77} tre_tnfa_reach_t;
78
79typedef struct {
80 int pos;
d5f8dde1 81 tre_tag_t **tags;
5f2eab64
JM
82} tre_reach_pos_t;
83
84
85#ifdef TRE_DEBUG
86static void
d5f8dde1 87tre_print_reach1(tre_tnfa_transition_t *state, tre_tag_t *tags, int num_tags)
5f2eab64 88{
d5f8dde1
JM
89 DPRINT((" %p", (void *)state));
90 if (num_tags > 0)
91 {
92 DPRINT(("/"));
93 tre_print_tags(tags, num_tags);
94 }
95}
5f2eab64 96
d5f8dde1
JM
97static void
98tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_reach_t *reach, int num_tags)
99{
5f2eab64
JM
100 while (reach->state != NULL)
101 {
d5f8dde1 102 tre_print_reach1(reach->state, reach->tags, num_tags);
5f2eab64
JM
103 reach++;
104 }
105 DPRINT(("\n"));
106
107}
108#endif /* TRE_DEBUG */
109
110reg_errcode_t
111tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
d5f8dde1 112 tre_str_type_t type, tre_tag_t *match_tags, int eflags,
5f2eab64
JM
113 int *match_end_ofs)
114{
115 /* State variables required by GET_NEXT_WCHAR. */
116 tre_char_t prev_c = 0, next_c = 0;
117 const char *str_byte = string;
118 int pos = -1;
119 unsigned int pos_add_next = 1;
120#ifdef TRE_WCHAR
121 const wchar_t *str_wide = string;
122#ifdef TRE_MBSTATE
123 mbstate_t mbstate;
124#endif /* TRE_MBSTATE */
125#endif /* TRE_WCHAR */
126 int reg_notbol = eflags & REG_NOTBOL;
127 int reg_noteol = eflags & REG_NOTEOL;
128 int reg_newline = tnfa->cflags & REG_NEWLINE;
5f2eab64
JM
129
130 char *buf;
131 tre_tnfa_transition_t *trans_i;
132 tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
133 tre_reach_pos_t *reach_pos;
134 int *tag_i;
135 int num_tags, i;
136
137 int match_eo = -1; /* end offset of match (-1 if no match found yet) */
d5f8dde1
JM
138#ifdef TRE_DEBUG
139 int once;
140#endif /* TRE_DEBUG */
141 tre_tag_t *tmp_tags = NULL;
142 tre_tag_t *tmp_iptr;
143 int tbytes;
144 int touch = 1;
5f2eab64
JM
145
146#ifdef TRE_MBSTATE
147 memset(&mbstate, '\0', sizeof(mbstate));
148#endif /* TRE_MBSTATE */
149
150 DPRINT(("tre_tnfa_run_parallel, input type %d\n", type));
151
152 if (!match_tags)
153 num_tags = 0;
154 else
155 num_tags = tnfa->num_tags;
156
157 /* Allocate memory for temporary data required for matching. This needs to
158 be done for every matching operation to be thread safe. This allocates
159 everything in a single large block from the stack frame using alloca()
160 or with malloc() if alloca is unavailable. */
161 {
d5f8dde1 162 int rbytes, pbytes, total_bytes;
5f2eab64
JM
163 char *tmp_buf;
164 /* Compute the length of the block we need. */
165 tbytes = sizeof(*tmp_tags) * num_tags;
166 rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
167 pbytes = sizeof(*reach_pos) * tnfa->num_states;
5f2eab64
JM
168 total_bytes =
169 (sizeof(long) - 1) * 4 /* for alignment paddings */
d5f8dde1 170 + (rbytes + tbytes * tnfa->num_states) * 2 + tbytes + pbytes;
5f2eab64 171
d5f8dde1 172 DPRINT(("tre_tnfa_run_parallel, allocate %d bytes\n", total_bytes));
5f2eab64
JM
173 /* Allocate the memory. */
174#ifdef TRE_USE_ALLOCA
175 buf = alloca(total_bytes);
176#else /* !TRE_USE_ALLOCA */
177 buf = xmalloc((unsigned)total_bytes);
178#endif /* !TRE_USE_ALLOCA */
179 if (buf == NULL)
180 return REG_ESPACE;
181 memset(buf, 0, (size_t)total_bytes);
182
183 /* Get the various pointers within tmp_buf (properly aligned). */
184 tmp_tags = (void *)buf;
185 tmp_buf = buf + tbytes;
186 tmp_buf += ALIGN(tmp_buf, long);
187 reach_next = (void *)tmp_buf;
188 tmp_buf += rbytes;
189 tmp_buf += ALIGN(tmp_buf, long);
190 reach = (void *)tmp_buf;
191 tmp_buf += rbytes;
192 tmp_buf += ALIGN(tmp_buf, long);
193 reach_pos = (void *)tmp_buf;
194 tmp_buf += pbytes;
195 tmp_buf += ALIGN(tmp_buf, long);
196 for (i = 0; i < tnfa->num_states; i++)
197 {
198 reach[i].tags = (void *)tmp_buf;
d5f8dde1 199 tmp_buf += tbytes;
5f2eab64 200 reach_next[i].tags = (void *)tmp_buf;
d5f8dde1 201 tmp_buf += tbytes;
5f2eab64
JM
202 }
203 }
204
205 for (i = 0; i < tnfa->num_states; i++)
206 reach_pos[i].pos = -1;
207
208 /* If only one character can start a match, find it first. */
d5f8dde1 209 if (tnfa->first_char >= 0 && str_byte)
5f2eab64
JM
210 {
211 const char *orig_str = str_byte;
212 int first = tnfa->first_char;
d5f8dde1 213 int found_high_bit = 0;
5f2eab64 214
d5f8dde1
JM
215
216 if (type == STR_BYTE)
217 {
218 if (len >= 0)
219 str_byte = memchr(orig_str, first, (size_t)len);
220 else
221 str_byte = strchr(orig_str, first);
222 }
223 else if (type == STR_MBS)
224 {
225 /*
226 * If the match character is ASCII, try to match the character
227 * directly, but if a high bit character is found, we stop there.
228 */
229 if (first < 0x80)
230 {
231 if (len >= 0)
232 {
233 int i;
234 for (i = 0; ; str_byte++, i++)
235 {
236 if (i >= len)
237 {
238 str_byte = NULL;
239 break;
240 }
241 if (*str_byte == first)
242 break;
243 if (*str_byte & 0x80)
244 {
245 found_high_bit = 1;
246 break;
247 }
248 }
249 }
250 else
251 {
252 for (; ; str_byte++)
253 {
254 if (!*str_byte)
255 {
256 str_byte = NULL;
257 break;
258 }
259 if (*str_byte == first)
260 break;
261 if (*str_byte & 0x80)
262 {
263 found_high_bit = 1;
264 break;
265 }
266 }
267 }
268 }
269 else
270 {
271 if (len >= 0)
272 {
273 int i;
274 for (i = 0; ; str_byte++, i++)
275 {
276 if (i >= len)
277 {
278 str_byte = NULL;
279 break;
280 }
281 if (*str_byte & 0x80)
282 {
283 found_high_bit = 1;
284 break;
285 }
286 }
287 }
288 else
289 {
290 for (; ; str_byte++)
291 {
292 if (!*str_byte)
293 {
294 str_byte = NULL;
295 break;
296 }
297 if (*str_byte & 0x80)
298 {
299 found_high_bit = 1;
300 break;
301 }
302 }
303 }
304 }
305 }
5f2eab64
JM
306 if (str_byte == NULL)
307 {
308#ifndef TRE_USE_ALLOCA
309 if (buf)
310 xfree(buf);
311#endif /* !TRE_USE_ALLOCA */
312 return REG_NOMATCH;
313 }
314 DPRINT(("skipped %lu chars\n", (unsigned long)(str_byte - orig_str)));
d5f8dde1 315 if (!found_high_bit)
5f2eab64 316 {
d5f8dde1
JM
317 if (str_byte >= orig_str + 1)
318 prev_c = (unsigned char)*(str_byte - 1);
319 next_c = (unsigned char)*str_byte;
320 pos = str_byte - orig_str;
321 if (len < 0 || pos < len)
322 str_byte++;
5f2eab64
JM
323 }
324 else
325 {
d5f8dde1
JM
326 if (str_byte == orig_str)
327 goto no_first_optimization;
328 /*
329 * Back up one character, fix up the position, then call
330 * GET_NEXT_WCHAR() to process the multibyte character.
331 */
332 /* no need to set prev_c, since GET_NEXT_WCHAR will overwrite */
333 next_c = (unsigned char)*(str_byte - 1);
334 pos = (str_byte - 1) - orig_str;
335 GET_NEXT_WCHAR();
5f2eab64
JM
336 }
337 }
d5f8dde1
JM
338 else
339 {
340no_first_optimization:
341 GET_NEXT_WCHAR();
342 pos = 0;
343 }
5f2eab64
JM
344
345 DPRINT(("length: %d\n", len));
346 DPRINT(("pos:chr/code | states and tags\n"));
347 DPRINT(("-------------+------------------------------------------------\n"));
348
349 reach_next_i = reach_next;
350 while (/*CONSTCOND*/1)
351 {
352 /* If no match found yet, add the initial states to `reach_next'. */
353 if (match_eo < 0)
354 {
355 DPRINT((" init >"));
356 trans_i = tnfa->initial;
357 while (trans_i->state != NULL)
358 {
359 if (reach_pos[trans_i->state_id].pos < pos)
360 {
361 if (trans_i->assertions
362 && CHECK_ASSERTIONS(trans_i->assertions))
363 {
364 DPRINT(("assertion failed\n"));
365 trans_i++;
366 continue;
367 }
368
369 DPRINT((" %p", (void *)trans_i->state));
370 reach_next_i->state = trans_i->state;
d5f8dde1 371 memset(reach_next_i->tags, 0, tbytes);
5f2eab64
JM
372 tag_i = trans_i->tags;
373 if (tag_i)
d5f8dde1
JM
374 {
375 while (*tag_i >= 0)
376 {
377 if (*tag_i < num_tags)
378 tre_tag_set(reach_next_i->tags, *tag_i, pos, touch);
379 tag_i++;
380 }
381 touch++;
382 }
5f2eab64
JM
383 if (reach_next_i->state == tnfa->final)
384 {
385 DPRINT((" found empty match\n"));
386 match_eo = pos;
d5f8dde1 387 memcpy(match_tags, reach_next_i->tags, tbytes);
5f2eab64
JM
388 }
389 reach_pos[trans_i->state_id].pos = pos;
390 reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
391 reach_next_i++;
392 }
393 trans_i++;
394 }
395 DPRINT(("\n"));
396 reach_next_i->state = NULL;
397 }
398 else
399 {
400 if (num_tags == 0 || reach_next_i == reach_next)
401