gdb - Local mods (compile)
[dragonfly.git] / contrib / tre / lib / tre-compile.c
CommitLineData
5f2eab64
JM
1/*
2 tre-compile.c - TRE regex compiler
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7*/
8
9/*
10 TODO:
11 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
12 function calls.
13*/
14
15
16#ifdef HAVE_CONFIG_H
17#include <config.h>
18#endif /* HAVE_CONFIG_H */
19#include <stdio.h>
20#include <assert.h>
21#include <string.h>
d5f8dde1 22#include <limits.h>
5f2eab64
JM
23
24#include "tre-internal.h"
25#include "tre-mem.h"
26#include "tre-stack.h"
27#include "tre-ast.h"
28#include "tre-parse.h"
29#include "tre-compile.h"
30#include "tre.h"
31#include "xmalloc.h"
32
d5f8dde1
JM
33/*
34 The bit_ffs() macro in bitstring.h is flawed. Replace it with a working one.
35*/
36#undef bit_ffs
37#define bit_ffs(name, nbits, value) { \
38 register bitstr_t *_name = name; \
39 register int _byte, _nbits = nbits; \
40 register int _stopbyte = _bit_byte(_nbits), _value = -1; \
41 for (_byte = 0; _byte <= _stopbyte; ++_byte) \
42 if (_name[_byte]) { \
43 _value = _byte << 3; \
44 for (_stopbyte = _name[_byte]; !(_stopbyte&0x1); \
45 ++_value, _stopbyte >>= 1); \
46 break; \
47 } \
48 *(value) = _value; \
49}
50
5f2eab64
JM
51/*
52 Algorithms to setup tags so that submatch addressing can be done.
53*/
54
55
d5f8dde1
JM
56#ifdef TRE_DEBUG
57static const char *tag_dir_str[] = {
58 "minimize",
59 "maximize",
60 "left-maximize"
61};
62
63static const char _indent[] = " ";
64
65static void
66print_indent(int indent)
67{
68 while (indent-- > 0)
69 DPRINT((_indent));
70}
71
72static void print_last_matched_pre(tre_last_matched_pre_t *lm, int indent,
73 int num_tags);
74static void
75print_last_match_branch_pre(tre_last_matched_branch_pre_t *branch, int indent,
76 int num_tags)
77{
78 tre_last_matched_pre_t *u = branch->last_matched;
79 int n_last_matched = 0;
80
81 while (u)
82 {
83 n_last_matched++;
84 u = u->next;
85 }
86
87 print_indent(indent);
88 DPRINT(("BRANCH: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
89 branch->tot_branches, branch->tot_last_matched, branch->tot_tags));
90 print_indent(indent);
91 DPRINT(("..n_last_matched=%d last_matched=%d\n", branch->n_last_matched,
92 n_last_matched));
93 if (branch->n_last_matched != n_last_matched)
94 DPRINT(("*** mismatch between n_last_matched and unions ***\n"));
95 if (branch->cmp_tag > 0)
96 {
97 int i;
98 const char *sep = " tags=";
99 print_indent(indent);
100 DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
101 for (i = 0; i < num_tags; i++)
102 if (bit_test(branch->tags, i))
103 {
104 DPRINT(("%s%d", sep, i));
105 sep = ",";
106 }
107 DPRINT(("\n"));
108 }
109
110 u = branch->last_matched;
111 indent++;
112 while (u)
113 {
114 print_last_matched_pre(u, indent, num_tags);
115 u = u->next;
116 }
117}
118
119static void
120print_last_matched_pre(tre_last_matched_pre_t *lm, int indent, int num_tags)
121{
122 tre_last_matched_branch_pre_t *b = lm->branches;
123 int n_branches = 0;
124
125 while (b)
126 {
127 n_branches++;
128 b = b->next;
129 }
130
131 print_indent(indent);
132 DPRINT(("LAST_MATCHED: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
133 lm->tot_branches, lm->tot_last_matched, lm->tot_tags));
134 print_indent(indent);
135 DPRINT(("..start_tag=%d n_branches=%d branches=%d\n", lm->start_tag,
136 lm->n_branches, n_branches));
137 if (lm->n_branches != n_branches)
138 DPRINT(("*** mismatch between n and branches ***\n"));
139
140 b = lm->branches;
141 indent++;
142 while (b)
143 {
144 print_last_match_branch_pre(b, indent, num_tags);
145 b = b->next;
146 }
147}
148
149static void print_last_matched(tre_last_matched_t *lm, int indent);
150static void
151print_last_match_branch(tre_last_matched_branch_t *branch, int indent)
152{
153 tre_last_matched_t *u;
154 int i;
155
156 print_indent(indent);
157 DPRINT(("BRANCH: n_last_matched=%d\n", branch->n_last_matched));
158 if (branch->cmp_tag > 0)
159 {
160 print_indent(indent);
161 DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
162 if (branch->n_tags > 0)
163 {
164 const char *sep = " tags=";
165 for (i = 0; i < branch->n_tags; i++)
166 {
167 DPRINT(("%s%d", sep, branch->tags[i]));
168 sep = ",";
169 }
170 }
171 DPRINT(("\n"));
172 }
173
174 u = branch->last_matched;
175 indent++;
176 for (i = branch->n_last_matched; i > 0; i--, u++)
177 print_last_matched(u, indent);
178}
179
180static void
181print_last_matched(tre_last_matched_t *lm, int indent)
182{
183 int i;
184 tre_last_matched_branch_t *b;
185
186 print_indent(indent);
187 DPRINT(("LAST_MATCHED: n_branches=%d start_tag=%d\n", lm->n_branches,
188 lm->start_tag));
189
190 b = lm->branches;
191 indent++;
192 for (i = lm->n_branches; i > 0; i--, b++)
193 print_last_match_branch(b, indent);
194}
195#endif /* TRE_DEBUG */
196
197
198/* Merge the tre_last_matched_branch_pre_t of src into dst, creating a new
199 one if needed. If tag_id > 0, add that tag as well (a negative tag_id will
200 create an unset tre_last_matched_branch_pre_t. */
201static reg_errcode_t
202tre_merge_branches(tre_mem_t mem, tre_ast_node_t *dst, tre_ast_node_t *src,
203 int tag_id, int num_tags)
204{
205 tre_last_matched_branch_pre_t *db = dst->last_matched_branch;
206 tre_last_matched_branch_pre_t *sb = (src ? src->last_matched_branch : NULL);
207
208 if (db)
209 {
210 if (sb)
211 {
212 bitstr_t *l = db->tags;
213 bitstr_t *r = sb->tags;
214 int i = bitstr_size(num_tags);
215
216 while(i-- > 0)
217 *l++ |= *r++;
218 /* db and sb are the info from two parallel sub-trees, so the tags
219 must be mutually exclusive, and we can just add their numbers */
220 db->n_tags += sb->n_tags;
221 db->tot_tags += sb->tot_tags;
222 if (db->last_matched)
223 {
224 if (sb->last_matched)
225 {
226 tre_last_matched_pre_t *u = db->last_matched;
227
228 while(u->next)
229 u = u->next;
230 u->next = sb->last_matched;
231 db->n_last_matched += sb->n_last_matched;
232 db->tot_branches += sb->tot_branches;
233 db->tot_last_matched += sb->tot_last_matched;
234 }
235 }
236 else if (sb->last_matched)
237 {
238 db->last_matched = sb->last_matched;
239 db->n_last_matched = sb->n_last_matched;
240 db->tot_branches = sb->tot_branches;
241 db->tot_last_matched = sb->tot_last_matched;
242 }
243 }
244 }
245 else
246 db = sb;
247
248 if (tag_id != 0)
249 {
250 if (!db)
251 {
252 db = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
253 + bitstr_size(num_tags));
254 if (db == NULL)
255 return REG_ESPACE;
256 db->tot_branches = 1;
257 }
258 if (tag_id > 0)
259 {
260 /* tag_id is a new tag, and shouldn't exist in db's tags,
261 so we can always increment n_tags */
262 bit_set(db->tags, tag_id);
263 db->n_tags++;
264 db->tot_tags++;
265 }
266 }
267 dst->last_matched_branch = db;
268 return REG_OK;
269}
270
271
5f2eab64
JM
272/* Inserts a catenation node to the root of the tree given in `node'.
273 As the left child a new tag with number `tag_id' to `node' is added,
274 and the right child is the old root. */
275static reg_errcode_t
276tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
277{
278 tre_catenation_t *c;
279
280 DPRINT(("add_tag_left: tag %d\n", tag_id));
281
282 c = tre_mem_alloc(mem, sizeof(*c));
283 if (c == NULL)
284 return REG_ESPACE;
285 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
286 if (c->left == NULL)
287 return REG_ESPACE;
d5f8dde1 288 c->right = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
5f2eab64
JM
289 if (c->right == NULL)
290 return REG_ESPACE;
291
292 c->right->obj = node->obj;
293 c->right->type = node->type;
d5f8dde1 294 c->right->last_matched_branch = node->last_matched_branch;
5f2eab64
JM
295 c->right->nullable = -1;
296 c->right->submatch_id = -1;
5f2eab64
JM
297 node->obj = c;
298 node->type = CATENATION;
d5f8dde1 299 node->original = c->right;
5f2eab64
JM
300 return REG_OK;
301}
302
303/* Inserts a catenation node to the root of the tree given in `node'.
304 As the right child a new tag with number `tag_id' to `node' is added,
305 and the left child is the old root. */
306static reg_errcode_t
307tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
308{
309 tre_catenation_t *c;
310
311 DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
312
313 c = tre_mem_alloc(mem, sizeof(*c));
314 if (c == NULL)
315 return REG_ESPACE;
316 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
317 if (c->right == NULL)
318 return REG_ESPACE;
d5f8dde1 319 c->left = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
5f2eab64
JM
320 if (c->left == NULL)
321 return REG_ESPACE;
322
323 c->left->obj = node->obj;
324 c->left->type = node->type;
d5f8dde1 325 c->left->last_matched_branch = node->last_matched_branch;
5f2eab64
JM
326 c->left->nullable = -1;
327 c->left->submatch_id = -1;
5f2eab64
JM
328 node->obj = c;
329 node->type = CATENATION;
d5f8dde1 330 node->original = c->left;
5f2eab64
JM
331 return REG_OK;
332}
333
334typedef enum {
335 ADDTAGS_RECURSE,
d5f8dde1 336 ADDTAGS_RECURSE_NOT_TOP_UNION,
5f2eab64
JM
337 ADDTAGS_AFTER_ITERATION,
338 ADDTAGS_AFTER_UNION_LEFT,
339 ADDTAGS_AFTER_UNION_RIGHT,
340 ADDTAGS_AFTER_CAT_LEFT,
341 ADDTAGS_AFTER_CAT_RIGHT,
d5f8dde1
JM
342 ADDTAGS_SET_SUBMATCH_END,
343 ADDTAGS_UNION_RECURSE,
344 ADDTAGS_UNION_RIGHT_RECURSE,
345 ADDTAGS_AFTER_UNION_TOP,
5f2eab64
JM
346} tre_addtags_symbol_t;
347
d5f8dde1
JM
348enum {
349 COPY_LAST_MATCHED_BRANCH,
350 COPY_LAST_MATCHED_BRANCH_NEXT,
351 COPY_LAST_MATCHED,
352 COPY_LAST_MATCHED_NEXT,
353};
5f2eab64 354
5f2eab64 355
d5f8dde1 356#define REGSET_UNSET ((unsigned)-1)
5f2eab64
JM
357
358/* Go through `regset' and set submatch data for submatches that are
359 using this tag. */
360static void
d5f8dde1 361tre_purge_regset(unsigned *regset, tre_tnfa_t *tnfa, int tag)
5f2eab64
JM
362{
363 int i;
364
d5f8dde1 365 for (i = 0; regset[i] != REGSET_UNSET; i++)
5f2eab64
JM
366 {
367 int id = regset[i] / 2;
368 int start = !(regset[i] % 2);
d5f8dde1
JM
369 if (id >= SUBMATCH_ID_INVISIBLE_START)
370 continue;
5f2eab64
JM
371 DPRINT((" Using tag %d for %s offset of "
372 "submatch %d\n", tag,
373 start ? "start" : "end", id));
374 if (start)
375 tnfa->submatch_data[id].so_tag = tag;
376 else
377 tnfa->submatch_data[id].eo_tag = tag;
378 }
379 regset[0] = -1;
380}
381
382
d5f8dde1
JM
383#define REGSET_HAS_STARTS 0x1
384#define REGSET_HAS_ENDS 0x2
385
386
5f2eab64
JM
387/* Adds tags to appropriate locations in the parse tree in `tree', so that
388 subexpressions marked for submatch addressing can be traced. */
389static reg_errcode_t
390tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
391 tre_tnfa_t *tnfa)
392{
393 reg_errcode_t status = REG_OK;
394 tre_addtags_symbol_t symbol;
395 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
396 int bottom = tre_stack_num_objects(stack);
397 /* True for first pass (counting number of needed tags) */
398 int first_pass = (mem == NULL || tnfa == NULL);
d5f8dde1
JM
399 unsigned *regset, *orig_regset;
400 int regset_contains = 0;
5f2eab64
JM
401 int num_tags = 0; /* Total number of tags. */
402 int num_minimals = 0; /* Number of special minimal tags. */
403 int tag = 0; /* The tag that is to be added next. */
404 int next_tag = 1; /* Next tag to use after this one. */
5f2eab64 405 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
d5f8dde1
JM
406 int *reorder_tags = NULL; /* Tag reorder array: a pair for each reorder,
407 * the first is the tag to reorder, the second
408 * is the tag after which the first is reordered */
409 int *rtp; /* Pointer used to fill in reorder_tags and
410 * tag_order */
411 int *to_reorder; /* Transform array converting sequential order to
412 * that specified by reorder_tags */
413 int id;
414
415 tre_tag_direction_t direction = TRE_TAG_LEFT_MAXIMIZE;
5f2eab64
JM
416 if (!first_pass)
417 {
d5f8dde1 418 DPRINT(("Initializing direction to %s\n", tag_dir_str[direction]));
5f2eab64
JM
419 tnfa->end_tag = 0;
420 tnfa->minimal_tags[0] = -1;
421 }
422
d5f8dde1
JM
423 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches
424 + tnfa->num_submatches_invisible + 1) * 2));
5f2eab64 425 if (regset == NULL)
5f2eab64 426 {
d5f8dde1
JM
427 status = REG_ESPACE;
428 goto error_regset;
5f2eab64 429 }
d5f8dde1
JM
430 regset[0] = REGSET_UNSET;
431 orig_regset = regset;
5f2eab64 432
d5f8dde1 433 if (!first_pass)
5f2eab64 434 {
d5f8dde1
JM
435 /* Allocate all memory for reorder_tags, tag_order, to_seq_order and
436 * to_reorder in one batch (assuming all are the same type) */
437 rtp = reorder_tags = xmalloc(sizeof(*reorder_tags) *
438 ((2 * tnfa->num_reorder_tags + 1) +
439 tnfa->num_tags));
440 if (reorder_tags == NULL)
441 {
442 status = REG_ESPACE;
443 goto error_reorder_tags;
444 }
445 to_reorder = reorder_tags + (2 * tnfa->num_reorder_tags + 1);
5f2eab64
JM
446 }
447
448 STACK_PUSH(stack, voidptr, node);
449 STACK_PUSH(stack, int, ADDTAGS_RECURSE);
450
451 while (tre_stack_num_objects(stack) > bottom)
452 {
453 if (status != REG_OK)
454 break;
455
456 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
457 switch (symbol)
458 {
d5f8dde1 459 int top_union;
5f2eab64
JM
460
461 case ADDTAGS_SET_SUBMATCH_END:
462 {
5f2eab64
JM
463 int i;
464
d5f8dde1
JM
465 id = tre_stack_pop_int(stack);
466 node = tre_stack_pop_voidptr(stack);
5f2eab64 467 /* Add end of this submatch to regset. */
d5f8dde1 468 for (i = 0; regset[i] != REGSET_UNSET; i++);
5f2eab64
JM
469 regset[i] = id * 2 + 1;
470 regset[i + 1] = -1;
d5f8dde1 471 regset_contains |= REGSET_HAS_ENDS;
5f2eab64 472
d5f8dde1
JM
473 /* Always put a tag after a minimal iterator. */
474 if (minimal_tag >= 0)
475 {
476 if (first_pass)
477 {
478 node->num_tags++;
479 DPRINT((" ADDTAGS_SET_SUBMATCH_END: node->num_tags = %d\n",
480 node->num_tags));
481 }
482 else
483 {
484 int i;
485 status = tre_merge_branches(mem, node, NULL, tag,
486 tnfa->num_tags);
487 if (status != REG_OK)
488 break;
489 status = tre_add_tag_right(mem, node, tag);
490 if (status != REG_OK)
491 break;
492 tnfa->tag_directions[tag] = TRE_TAG_MINIMIZE;
493 DPRINT(("Setting t%d direction to %s\n", tag,
494 tag_dir_str[tnfa->tag_directions[tag]]));
495 DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
496 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
497 tnfa->minimal_tags[i] = tag;
498 tnfa->minimal_tags[i + 1] = minimal_tag;
499 tnfa->minimal_tags[i + 2] = -1;
500
501 DPRINT((" Minimal end: t%d reordered to "
502 "after t%d\n", tag, minimal_tag));
503 /* Append to tag_order, move "tag" after
504 * "minimal_tag" */
505 *rtp++ = tag;
506 *rtp++ = minimal_tag;
507
508 num_minimals++;
509 tre_purge_regset(regset, tnfa, tag);
510 }
511
512 minimal_tag = -1;
513 DPRINT((" ADDTAGS_SET_SUBMATCH_END num_tags++ tag=%d\n", tag));
514 regset[0] = REGSET_UNSET;
515 regset_contains = 0;
516 tag = next_tag;
517 num_tags++;
518 next_tag++;
519 }
5f2eab64
JM
520 break;
521 }
522
d5f8dde1
JM
523 case ADDTAGS_RECURSE_NOT_TOP_UNION:
524 /* Like ADDTAGS_RECURSE, except that top_union is set to zero,
525 * indicating that if a union is being processed, it is not the
526 * top-most of a series */
527 top_union = 0;
528 goto do_addtags_recurse;
529
5f2eab64 530 case ADDTAGS_RECURSE:
d5f8dde1
JM
531 /* Setting top_union to 1 means that if a union is begin processed,
532 * it is the top-most of a series, and should recurse through the
533 * series to set the left_tag and right_tag values */
534 top_union = 1;
535
536do_addtags_recurse:
5f2eab64
JM
537 node = tre_stack_pop_voidptr(stack);
538
d5f8dde1
JM
539 id = node->submatch_id;
540 if (id >= 0)
5f2eab64 541 {
5f2eab64
JM
542 int i;
543
544
545 /* Add start of this submatch to regset. */
d5f8dde1 546 for (i = 0; regset[i] != REGSET_UNSET; i++);
5f2eab64
JM
547 regset[i] = id * 2;
548 regset[i + 1] = -1;
d5f8dde1 549 regset_contains |= REGSET_HAS_STARTS;
5f2eab64
JM
550
551 /* Add end of this submatch to regset after processing this
552 node. */
d5f8dde1
JM
553 STACK_PUSH(stack, voidptr, node);
554 STACK_PUSHX(stack, int, id);
5f2eab64
JM
555 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
556 }
557
558 switch (node->type)
559 {
560 case LITERAL:
561 {
562 tre_literal_t *lit = node->obj;
563
d5f8dde1 564 if (!IS_SPECIAL(lit) || IS_BACKREF(lit) || IS_EMPTY(lit) || IS_ASSERTION(lit))
5f2eab64 565 {
5f2eab64
JM
566 DPRINT(("Literal %d-%d\n",
567 (int)lit->code_min, (int)lit->code_max));
d5f8dde1 568 if (regset_contains)
5f2eab64
JM
569 {
570 /* Regset is not empty, so add a tag before the
571 literal or backref. */
d5f8dde1 572 if (first_pass)
5f2eab64 573 {
d5f8dde1
JM
574 DPRINT((" ADDTAGS_RECURSE:LITERAL node->num_tags = 1\n"));
575 node->num_tags = 1;
5f2eab64
JM
576 }
577 else
578 {
d5f8dde1
JM
579 status = tre_merge_branches(mem, node, NULL, tag,
580 tnfa->num_tags);
581 if (status != REG_OK)
582 break;
583 status = tre_add_tag_left(mem, node, tag);
584 if (status != REG_OK)
585 break;
586 if (regset_contains == REGSET_HAS_STARTS)
587 tnfa->tag_directions[tag] = TRE_TAG_LEFT_MAXIMIZE;
588 else
589 tnfa->tag_directions[tag] = direction;
590 DPRINT(("Setting t%d direction to %s\n", tag,
591 tag_dir_str[tnfa->tag_directions[tag]]));
592 tre_purge_regset(regset, tnfa, tag);
593
594 if (IS_BACKREF(lit))
595 {
596 int b = lit->code_max;
597 int t = tnfa->submatch_data[b].so_tag;
598 /* Fail if the referenced submatch hasn't been
599 * completed yet */
600 if (tnfa->submatch_data[b].eo_tag < 0)
601 {
602 status = REG_ESUBREG;
603 break;
604 }
605 if (t < tag)
606 {
607 DPRINT((" Backref %d start: "
608 "t%d reordered to before t%d\n",
609 b, tag, t));
610 if(t > 0)
611 t--;
612 /* Append to tag_order, move "tag" after
613 * "t" */
614 *rtp++ = tag;
615 *rtp++ = t;
616 }
617#if TRE_DEBUG
618 else
619 DPRINT((" Backref %d start: "
620 "(t%d already before t%d)\n",
621 b, tag, t));
622#endif /* TRE_DEBUG */
623 }
5f2eab64
JM
624 }
625
d5f8dde1
JM
626 DPRINT((" ADDTAGS_RECURSE:LITERAL num_tags++ tag=%d\n",
627 tag));
628 regset[0] = REGSET_UNSET;
629 regset_contains = 0;
5f2eab64
JM
630 tag = next_tag;
631 num_tags++;
632 next_tag++;
633 }
634 }
635 else
636 {
637 assert(!IS_TAG(lit));
638 }
639 break;
640 }
641 case CATENATION:
642 {
643 tre_catenation_t *cat = node->obj;
644 tre_ast_node_t *left = cat->left;
645 tre_ast_node_t *right = cat->right;
646 int reserved_tag = -1;
647 DPRINT(("Catenation, next_tag = %d\n", next_tag));
648
649
650 /* After processing right child. */
651 STACK_PUSHX(stack, voidptr, node);
652 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
653
654 /* Process right child. */
655 STACK_PUSHX(stack, voidptr, right);
656 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
657
658 /* After processing left child. */
659 STACK_PUSHX(stack, int, next_tag + left->num_tags);
660 DPRINT((" Pushing %d for after left\n",
661 next_tag + left->num_tags));
662 if (left->num_tags > 0 && right->num_tags > 0)
663 {
664 /* Reserve the next tag to the right child. */
d5f8dde1
JM
665 DPRINT((" ADDTAGS_RECURSE:CATENATION num_tags++ "
666 "Reserving next_tag %d to right child\n",
5f2eab64
JM
667 next_tag));
668 reserved_tag = next_tag;
669 next_tag++;
670 }
671 STACK_PUSHX(stack, int, reserved_tag);
672 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
673
674 /* Process left child. */
675 STACK_PUSHX(stack, voidptr, left);
676 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
677
678 }
679 break;
680 case ITERATION:
681 {
682 tre_iteration_t *iter = node->obj;
683 DPRINT(("Iteration\n"));
684
685 if (first_pass)
d5f8dde1
JM
686 STACK_PUSHX(stack, int, regset_contains != 0);
687 STACK_PUSHX(stack, int, tag);
5f2eab64
JM
688 STACK_PUSHX(stack, voidptr, node);
689 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
690
691 STACK_PUSHX(stack, voidptr, iter->arg);
692 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
693
d5f8dde1
JM
694 /* Regset is not empty, so add a tag here (this always happens
695 because iterators always get submatch id, even if in the
696 invisible range) */
697 if (regset_contains)
5f2eab64
JM
698 {
699 if (!first_pass)
700 {
d5f8dde1
JM
701 status = tre_merge_branches(mem, node, NULL, tag,
702 tnfa->num_tags);
703 if (status != REG_OK)
704 break;
5f2eab64 705 status = tre_add_tag_left(mem, node, tag);
d5f8dde1
JM
706 if (status != REG_OK)
707 break;
708 if (regset_contains == REGSET_HAS_STARTS && tag != 0)
709 tnfa->tag_directions[tag] = iter->minimal ?
710 TRE_TAG_MINIMIZE :
711 TRE_TAG_LEFT_MAXIMIZE;
5f2eab64
JM
712 else
713 tnfa->tag_directions[tag] = direction;
d5f8dde1
JM
714 DPRINT(("Setting t%d direction to %s\n", tag,
715 tag_dir_str[tnfa->tag_directions[tag]]));
5f2eab64
JM
716 tre_purge_regset(regset, tnfa, tag);
717 }
718
d5f8dde1
JM
719 DPRINT((" ADDTAGS_RECURSE:ITERATION num_tags++ tag=%d\n",
720 tag));
721 regset[0] = REGSET_UNSET;
722 regset_contains = 0;
5f2eab64
JM
723 tag = next_tag;
724 num_tags++;
725 next_tag++;
726 }
d5f8dde1
JM
727 direction = TRE_TAG_LEFT_MAXIMIZE;
728 DPRINT((" Setting direction to %s\n", tag_dir_str[direction]));
5f2eab64
JM
729 }
730 break;
731 case UNION:
732 {
d5f8dde1
JM
733 tre_union_t *uni;
734 tre_ast_node_t *left;
735 tre_ast_node_t *right;
736 int front_tag = -1;
5f2eab64 737
d5f8dde1
JM
738 DPRINT(("Union\n"));
739
740 if (regset_contains)
5f2eab64 741 {
d5f8dde1
JM
742 DPRINT((" UNION num_tags++ tag=%d\n", tag));
743 front_tag = tag;
744 tag = next_tag;
745 num_tags++;
746 next_tag++;
5f2eab64 747 }
d5f8dde1
JM
748
749 /* For the top union, walk the tree of consecutive unions,
750 * setting the left_tag and right_tag values in increasing
751 * order (left to right priority) */
752 if (top_union &&
753 (node->num_submatches -
754 (node->submatch_id >= 0 &&
755 node->submatch_id < SUBMATCH_ID_INVISIBLE_START)) > 0)
5f2eab64 756 {
d5f8dde1
JM
757 tre_ast_node_t *n;
758 int last = tre_stack_num_objects(stack);
5f2eab64 759
d5f8dde1
JM
760 STACK_PUSH(stack, voidptr, node);
761 STACK_PUSH(stack, int, ADDTAGS_UNION_RECURSE);
762
763 while (tre_stack_num_objects(stack) > last)
764 {
765 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
766 switch (symbol)
767 {
768 case ADDTAGS_UNION_RECURSE:
769 n = tre_stack_pop_voidptr(stack);
770 uni = n->obj;
771 left = uni->left;
772
773 /* Since the top union has num_submatches > 0,
774 * we set all the consecutive union's
775 * make_branches to 1 to force the generation
776 * of end tags for each union branch. */
777 n->make_branches = 1;
778
779 STACK_PUSH(stack, voidptr, n);
780 STACK_PUSH(stack, int,
781 ADDTAGS_UNION_RIGHT_RECURSE);
782
783 if (left->type == UNION)
784 {
785 STACK_PUSH(stack, voidptr, left);
786 STACK_PUSH(stack, int,
787 ADDTAGS_UNION_RECURSE);
788 }
789 else
790 {
791 DPRINT((" ADDTAGS_UNION_RECURSE "
792 "num_tags++ tag=%d\n", tag));
793 uni->left_tag = tag;
794 tag = next_tag;
795 num_tags++;
796 next_tag++;
797 }
798 break;
799
800 case ADDTAGS_UNION_RIGHT_RECURSE:
801 n = tre_stack_pop_voidptr(stack);
802 uni = n->obj;
803 right = uni->right;
804
805 if (right->type == UNION)
806 {
807 STACK_PUSH(stack, voidptr, right);
808 STACK_PUSH(stack, int,
809 ADDTAGS_UNION_RECURSE);
810 }
811 else
812 {
813 DPRINT((" ADDTAGS_UNION_RIGHT_RECURSE "
814 "num_tags++ tag=%d\n", tag));
815 uni->right_tag = tag;
816 tag = next_tag;
817 num_tags++;
818 next_tag++;
819 }
820
821 break;
822
823 default:
824 assert(0);
825 break;
826
827 } /* end switch(symbol) */
828 } /* end while(tre_stack_num_objects(stack) > last */
829 if (!first_pass)
830 {
831 STACK_PUSHX(stack, int, front_tag);
832 STACK_PUSHX(stack, voidptr, node);
833 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_TOP);
834 }
835 } /* end if (top_union && ...) */
836
837 uni = node->obj;
838 left = uni->left;
839 right = uni->right;
5f2eab64
JM
840
841 /* After processing right child. */
5f2eab64 842 STACK_PUSHX(stack, voidptr, regset);
d5f8dde1 843 STACK_PUSHX(stack, int, regset_contains != 0);
5f2eab64 844 STACK_PUSHX(stack, voidptr, node);
5f2eab64
JM
845 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
846
847 /* Process right child. */
848 STACK_PUSHX(stack, voidptr, right);
d5f8dde1 849 STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
5f2eab64
JM
850
851 /* After processing left child. */
852 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
853
854 /* Process left child. */
855 STACK_PUSHX(stack, voidptr, left);
d5f8dde1 856 STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
5f2eab64
JM
857
858 /* Regset is not empty, so add a tag here. */
d5f8dde1 859 if (regset_contains)
5f2eab64
JM
860 {
861 if (!first_pass)
862 {
d5f8dde1
JM
863 status = tre_merge_branches(mem, node, NULL, front_tag,
864 tnfa->num_tags);
865 if (status != REG_OK)
866 break;
867 status = tre_add_tag_left(mem, node, front_tag);
868 if (status != REG_OK)
869 break;
870 if (regset_contains == REGSET_HAS_STARTS)
871 tnfa->tag_directions[front_tag] = TRE_TAG_LEFT_MAXIMIZE;
872 else
873 tnfa->tag_directions[front_tag] = direction;
874 DPRINT(("Setting t%d direction to %s\n", front_tag,
875 tag_dir_str[tnfa->tag_directions[front_tag]]));
876 tre_purge_regset(regset, tnfa, front_tag);
5f2eab64
JM
877 }
878
d5f8dde1
JM
879 regset[0] = REGSET_UNSET;
880 regset_contains = 0;
5f2eab64
JM
881 }
882
883 break;
884 }
d5f8dde1 885 } /* end switch (node->type) */
5f2eab64
JM
886
887 break; /* end case: ADDTAGS_RECURSE */
888
889 case ADDTAGS_AFTER_ITERATION:
890 {
d5f8dde1
JM
891 tre_iteration_t *iter;
892 tre_ast_node_t *orig;
5f2eab64 893 int enter_tag;
d5f8dde1 894
5f2eab64 895 node = tre_stack_pop_voidptr(stack);
d5f8dde1
JM
896 orig = node->original ? node->original : node;
897 iter = (tre_iteration_t *)orig->obj;
898 enter_tag = tre_stack_pop_int(stack);
899 if (iter->minimal)
900 minimal_tag = enter_tag;
901
902 DPRINT(("After iteration\n"));
5f2eab64
JM
903 if (first_pass)
904 {
d5f8dde1
JM
905 node->num_tags = iter->arg->num_tags + tre_stack_pop_int(stack);
906 DPRINT((" ADDTAGS_AFTER_ITERATION: node->num_tags = %d\n",
907 node->num_tags));
5f2eab64
JM
908 }
909 else
910 {
d5f8dde1
JM
911 /* node->last_matched_branch will have the start tag (the tag
912 just *before* the iteration). iter->arg->last_matched_branch
913 will have the tag(s) inside the iteration, the ones that
914 may need to be reset if the iteration doesn't match. So
915 before we merge iter->arg into node, we need to set up
916 a new tre_last_matched_t and tre_last_matched_branch_t,
917 using any of the inside tags as cmp_tag (we choose the first
918 tag found by bit_ffs). If there are no inside tags, we
919 don't bother creating the extra structures. */
920 tre_last_matched_branch_pre_t *b =
921 iter->arg->last_matched_branch;
922
923 if (b && b->n_tags > 0)
924 {
925 tre_last_matched_pre_t *u;
5f2eab64 926
d5f8dde1
JM
927 bit_ffs(b->tags, num_tags, &b->cmp_tag);
928 DPRINT((" ADDTAGS_AFTER_ITERATION: n_tags=%d "
929 "cmp_tag = %d\n", b->n_tags, b->cmp_tag));
930
931 u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t) +
932 sizeof(tre_last_matched_branch_pre_t)
933 + bitstr_size(tnfa->num_tags));
934 if (!u)
935 {
936 status = REG_ESPACE;
937 break;
938 }
939 u->branches = b;
940 u->n_branches = 1;
941 u->start_tag = b->cmp_tag;
942 u->tot_branches = b->tot_branches;
943 u->tot_last_matched = 1 + b->tot_last_matched;
944 u->tot_tags = b->tot_tags;
945
946 b = (tre_last_matched_branch_pre_t *)(u + 1);
947 b->last_matched = u;
948 b->n_last_matched = 1;
949 b->tot_branches = 1 + u->tot_branches;
950 b->tot_last_matched = u->tot_last_matched;
951 b->tot_tags = u->tot_tags;
952
953 iter->arg->last_matched_branch = b;
954 }
955 status = tre_merge_branches(mem, node, iter->arg, 0,
956 tnfa->num_tags);
957 if (status != REG_OK)
958 break;
959
960 if (iter->minimal)
961 {
962 /* Add a union with a left EMPTY literal and the right
963 being iter->arg. This should force the tags inside
964 the minimal iteration to prefer being unset */
965 if (iter->min == 0 && iter->max <= 1)
966 {
967 tre_ast_node_t *u, *e;
968
969 e = tre_ast_new_literal(mem, EMPTY, -1, -1);
970 if (e == NULL)
971 {
972 status = REG_ESPACE;
973 break;
974 }
975 u = tre_ast_new_union(mem, e, iter->arg);
976 if (u == NULL)
977 {
978 status = REG_ESPACE;
979 break;
980 }
981 iter->arg = u;
982 }
983
984 direction = TRE_TAG_MINIMIZE;
985 }
5f2eab64
JM
986 else
987 direction = TRE_TAG_MAXIMIZE;
d5f8dde1 988 DPRINT((" Setting direction to %s\n", tag_dir_str[direction]));
5f2eab64
JM
989 }
990 break;
991 }
992
993 case ADDTAGS_AFTER_CAT_LEFT:
994 {
995 int new_tag = tre_stack_pop_int(stack);
996 next_tag = tre_stack_pop_int(stack);
997 DPRINT(("After cat left, tag = %d, next_tag = %d\n",
998 tag, next_tag));
999 if (new_tag >= 0)
1000 {
1001 DPRINT((" Setting tag to %d\n", new_tag));
1002 tag = new_tag;
1003 }
1004 break;
1005 }
1006
1007 case ADDTAGS_AFTER_CAT_RIGHT:
d5f8dde1
JM
1008 {
1009 tre_catenation_t *cat;
1010
1011 DPRINT(("After cat right\n"));
1012 node = tre_stack_pop_voidptr(stack);
1013 cat = node->obj;
1014 if (first_pass)
1015 {
1016 node->num_tags = cat->left->num_tags + cat->right->num_tags;
1017 DPRINT((" ADDTAGS_AFTER_CAT_RIGHT: node->num_tags = %d\n",
1018 node->num_tags));
1019 }
1020 else
1021 {
1022 status = tre_merge_branches(mem, cat->left, cat->right, 0,
1023 tnfa->num_tags);
1024 if (status != REG_OK)
1025 break;
1026 status = tre_merge_branches(mem, node, cat->left, 0,
1027 tnfa->num_tags);
1028 }
1029 break;
1030 }
5f2eab64
JM
1031
1032 case ADDTAGS_AFTER_UNION_LEFT:
1033 DPRINT(("After union left\n"));
1034 /* Lift the bottom of the `regset' array so that when processing
1035 the right operand the items currently in the array are
1036 invisible. The original bottom was saved at ADDTAGS_UNION and
1037 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
d5f8dde1 1038 while (*regset != REGSET_UNSET)
5f2eab64 1039 regset++;
d5f8dde1 1040 regset_contains = 0;
5f2eab64
JM
1041 break;
1042
1043 case ADDTAGS_AFTER_UNION_RIGHT:
1044 {
d5f8dde1
JM
1045 int added_tags;
1046 tre_ast_node_t *orig;
1047 tre_union_t *uni;
1048 /* Note: node may not be a UNION, but a CATENATION with a left
1049 * tag. So that is why we pass the original node->obj on the
1050 * stack, to get the union's true values. */
1051
5f2eab64
JM
1052 DPRINT(("After union right\n"));
1053 node = tre_stack_pop_voidptr(stack);
d5f8dde1
JM
1054 orig = node->original ? node->original : node;
1055 uni = (tre_union_t *)orig->obj;
5f2eab64
JM
1056 added_tags = tre_stack_pop_int(stack);
1057 if (first_pass)
1058 {
d5f8dde1
JM
1059 node->num_tags = uni->left->num_tags + uni->right->num_tags
1060 + added_tags;
1061 if (uni->left_tag > 0)
1062 node->num_tags++;
1063 if (uni->right_tag > 0)
1064 node->num_tags++;
1065 DPRINT((" ADDTAGS_AFTER_UNION_RIGHT: node->num_tags = %d\n",
1066 node->num_tags));
5f2eab64
JM
1067 }
1068 regset = tre_stack_pop_voidptr(stack);
5f2eab64
JM
1069
1070 /* Add tags after both children, the left child gets a smaller
1071 tag than the right child. This guarantees that we prefer
1072 the left child over the right child. */
1073 /* XXX - This is not always necessary (if the children have
1074 tags which must be seen for every match of that child). */
d5f8dde1 1075 if (!first_pass && node->make_branches)
5f2eab64 1076 {
d5f8dde1
JM
1077 tre_last_matched_branch_pre_t *lb =
1078 uni->left->last_matched_branch;
1079 tre_last_matched_branch_pre_t *rb =
1080 uni->right->last_matched_branch;
1081 tre_last_matched_pre_t *lu =
1082 uni->left->last_matched_in_progress;
1083 tre_last_matched_pre_t *ru =
1084 uni->right->last_matched_in_progress;
1085 tre_last_matched_pre_t *u;
1086 /* We don't need to call tre_merge_branches because these
1087 * tags don't participate in submatch ranges, so don't need
1088 * to be recorded. But we do set the cmp_tag entry of the
1089 * tre_last_matched_branch_pre_t, so we might call
1090 * tre_merge_branches if we need to create an empty
1091 * tre_last_matched_branch_pre_t. */
1092 if (uni->left_tag > 0)
1093 {
1094 DPRINT(("Setting t%d direction to maximize\n",
1095 uni->left_tag));
1096 status = tre_add_tag_right(mem, uni->left, uni->left_tag);
1097 if (status != REG_OK)
1098 break;
1099 tnfa->tag_directions[uni->left_tag] = TRE_TAG_MAXIMIZE;
1100 if (!lb)
1101 {
1102 status = tre_merge_branches(mem, uni->left, NULL, -1,
1103 tnfa->num_tags);
1104 if (status != REG_OK)
1105 break;
1106 lb = uni->left->last_matched_branch;
1107 }
1108 lb->cmp_tag = uni->left_tag;
1109 }
1110 if (uni->right_tag > 0)
5f2eab64 1111 {
d5f8dde1
JM
1112 DPRINT(("Setting t%d direction to maximize\n",
1113 uni->right_tag));
1114 status = tre_add_tag_right(mem, uni->right, uni->right_tag);
1115 if (status != REG_OK)
1116 break;
1117 tnfa->tag_directions[uni->right_tag] = TRE_TAG_MAXIMIZE;
1118 if (!rb)
1119 {
1120 status = tre_merge_branches(mem, uni->right, NULL, -1,
1121 tnfa->num_tags);
1122 if (status != REG_OK)
1123 break;
1124 rb = uni->right->last_matched_branch;
1125 }
1126 rb->cmp_tag = uni->right_tag;
5f2eab64 1127 }
d5f8dde1
JM
1128 /* Now merge the tre_last_matched_branch_pre_t into a
1129 tre_last_matched_pre_t */
1130 if (lu == NULL)
1131 {
1132 if (ru == NULL)
1133 {
1134 /* Create a new tre_last_matched_pre_t */
1135 u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t));
1136 if (!u)
1137 {
1138 status = REG_ESPACE;
1139 break;
1140 }
1141 u->tot_last_matched = 1;
1142
1143 if (lb)
1144 {
1145 u->branches = lb;
1146 u->n_branches = 1;
1147 u->tot_branches += lb->tot_branches;
1148 u->tot_last_matched += lb->tot_last_matched;
1149 u->tot_tags += lb->tot_tags;
1150 if (rb)
1151 {
1152 lb->next = rb;
1153 u->n_branches++;
1154 u->tot_branches += rb->tot_branches;
1155 u->tot_last_matched += rb->tot_last_matched;
1156 u->tot_tags += rb->tot_tags;
1157 }
1158 }
1159 else if (rb)
1160 {
1161 u->branches = rb;
1162 u->n_branches = 1;
1163 u->tot_branches += rb->tot_branches;
1164 u->tot_last_matched += rb->tot_last_matched;
1165 u->tot_tags += rb->tot_tags;
1166 }
1167 }
1168 else
1169 {
1170 /* Use ru, and add lb */
1171 u = ru;
1172 if (lb)
1173 {
1174 lb->next = u->branches;
1175 u->branches = lb;
1176 u->n_branches++;
1177 u->tot_branches += lb->tot_branches;
1178 u->tot_last_matched += lb->tot_last_matched;
1179 u->tot_tags += lb->tot_tags;
1180 }
1181 }
1182 }
1183 else if (ru == NULL)
1184 {
1185 /* Use lu, and add rb */
1186 u = lu;
1187 if (rb)
1188 {
1189 rb->next = u->branches;
1190 u->branches = rb;
1191 u->n_branches++;
1192 u->tot_branches += rb->tot_branches;
1193 u->tot_last_matched += rb->tot_last_matched;
1194 u->tot_tags += rb->tot_tags;
1195 }
1196 }
1197 else
1198 {
1199 /* Merge lu and ru into lu */
1200 if (lu->branches)
1201 {
1202 if (ru->branches)
1203 {
1204 tre_last_matched_branch_pre_t *b = lu->branches;
1205 while (b->next) b = b->next;
1206 b->next = ru->branches;
1207 lu->n_branches += ru->n_branches;
1208 }
1209 }
1210 else if (ru->branches)
1211 {
1212 lu->branches = ru->branches;
1213 lu->n_branches = ru->n_branches;
1214 }
1215 lu->tot_branches += ru->tot_branches;
1216 lu->tot_last_matched += ru->tot_last_matched - 1;
1217 lu->tot_tags += ru->tot_tags;
1218 u = lu;
1219 }
1220 node->last_matched_in_progress = u;
5f2eab64
JM
1221 }
1222 direction = TRE_TAG_MAXIMIZE;
1223 break;
1224 }
1225
d5f8dde1
JM
1226 case ADDTAGS_AFTER_UNION_TOP: /* only called when not first_pass */
1227 {
1228 tre_last_matched_branch_pre_t *b;
1229 tre_last_matched_pre_t *u;
1230 int start_tag;
1231
1232 DPRINT(("After union top\n"));
1233 node = tre_stack_pop_voidptr(stack);
1234 start_tag = tre_stack_pop_int(stack);
1235 b = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
1236 + bitstr_size(tnfa->num_tags));
1237 if (!b)
1238 {
1239 status = REG_ESPACE;
1240 break;
1241 }
1242
1243 u = node->last_matched_in_progress;
1244 u->start_tag = start_tag;
1245 b->tot_branches = 1 + u->tot_branches;
1246 b->tot_last_matched = u->tot_last_matched;
1247 b->tot_tags = u->tot_tags;
1248 b->last_matched = u;
1249 b->n_last_matched = 1;
1250 node->last_matched_branch = b;
1251 node->last_matched_in_progress = NULL;
1252 break;
1253 }
1254
5f2eab64
JM
1255 default:
1256 assert(0);
1257 break;
1258
1259 } /* end switch(symbol) */
1260 } /* end while(tre_stack_num_objects(stack) > bottom) */
1261
d5f8dde1
JM
1262 if (status != REG_OK)
1263 {
1264 DPRINT(("Error during %s pass\n", first_pass ? "first" : "second"));
1265 goto error_post_compile;
1266 }
1267
5f2eab64 1268 if (!first_pass)
d5f8dde1
JM
1269 {
1270 int i;
1271 if (num_tags != tnfa->num_tags)
1272 {
1273 DPRINT(("num_tags(%d) != tnfa->num_tags(%d)\n", num_tags,
1274 tnfa->num_tags));
1275 status = REG_BADPAT;
1276 goto error_post_compile;
1277 }
1278
1279 tre_purge_regset(regset, tnfa, tag);
1280 DPRINT(("Setting t%d to %s\n", num_tags,
1281 tag_dir_str[direction]));
1282 tnfa->tag_directions[num_tags] = direction;
1283
1284 if (rtp > reorder_tags + 2 * tnfa->num_reorder_tags)
1285 {
1286 DPRINT(("Processed %d reorder tags instead of %d\n",
1287 (int)(rtp - reorder_tags) / 2, tnfa->num_reorder_tags));
1288 status = REG_BADPAT;
1289 goto error_post_compile;
1290 }
1291 *rtp = -1;
1292#if TRE_DEBUG
1293 if (reorder_tags[0] >= 0)
1294 {
1295 DPRINT(("reorder_tags:\n"));
1296 for (rtp = reorder_tags; *rtp >= 0;)
1297 {
1298 DPRINT(("%d after ", *rtp++));
1299 DPRINT(("%d\n", *rtp++));
1300 }
1301 }
1302 else
1303 DPRINT(("No reorder_tags\n"));
1304#endif /* TRE_DEBUG */
1305
1306 /* Initialize to_reorder */
1307 for (i = 0; i < num_tags; i++)
1308 to_reorder[i] = i;
1309 /* Use to_seq_order to convert reorder_tags values, and use those to
1310 * reorder to_reorder */
1311 for (rtp = reorder_tags; *rtp >= 0;)
1312 {
1313 int j, high, low;
1314 int ti = *rtp++;
1315
1316 /* Skip reordering the final tag */
1317 if (ti >= num_tags)
1318 {
1319 DPRINT(("Skipping reorder of %d\n", ti));
1320 rtp++;
1321 continue;
1322 }
1323 /* The number of the tag to reorder */
1324 high = to_reorder[ti];
1325 /* Reorder after this tag */
1326 low = to_reorder[*rtp++];
1327
1328 DPRINT(("ti=%d high=%d low=%d\n", ti, high, low));
1329 if (low > high)
1330 {
1331 DPRINT(("Tag %d already before %d\n", high, low));
1332 continue;
1333 }
1334 for (j = 0; j < num_tags; j++)
1335 if (to_reorder[j] > low && to_reorder[j] < high)
1336 to_reorder[j]++;
1337 to_reorder[ti] = low + 1;
1338#ifdef TRE_DEBUG
1339 DPRINT(("to_reorder=("));
1340 for (j = 0; j < num_tags; j++)
1341 {
1342 DPRINT(("%d", to_reorder[j]));
1343 if (j < num_tags - 1)
1344 DPRINT((","));
1345 }
1346 DPRINT((")\n"));
1347#endif /* TRE_DEBUG */
1348 }
1349 /* Determine if reordering in really necessary */
1350 {
1351 int need_reorder = 0;
1352 for (i = 0; i < num_tags; i++)
1353 if(to_reorder[i] != i)
1354 {
1355 need_reorder = 1;
1356 break;
1357 }
1358 /* If need_reorder is not set, free reorder_tags, and set to NULL,
1359 * indicating no reordering is needed */
1360 if (!need_reorder)
1361 {
1362 DPRINT(("Don't need to reorder\n"));
1363 xfree(reorder_tags);
1364 reorder_tags = NULL;
1365 }
1366 }
1367 }
5f2eab64 1368
d5f8dde1 1369 if (reorder_tags)
5f2eab64
JM
1370 {
1371 int i;
d5f8dde1
JM
1372 tre_tag_direction_t *new_tag_directions;
1373#if TRE_DEBUG
1374 DPRINT(("to_reorder:"));
1375 for (i = 0; i < num_tags; i++)
1376 DPRINT((" %d->%d", i, to_reorder[i]));
1377 DPRINT(("\n"));
1378#endif /* TRE_DEBUG */
1379
1380 DPRINT(("Reordering submatch_data\n"));
1381 for (i = 0; i < tnfa->num_submatches; i++)
1382 {
1383#if TRE_DEBUG
1384 int so = tnfa->submatch_data[i].so_tag;
1385 int eo = tnfa->submatch_data[i].eo_tag;
1386#endif /* TRE_DEBUG */
1387 tnfa->submatch_data[i].so_tag =
1388 to_reorder[tnfa->submatch_data[i].so_tag];
1389 tnfa->submatch_data[i].eo_tag =
1390 tnfa->submatch_data[i].eo_tag < num_tags ?
1391 to_reorder[tnfa->submatch_data[i].eo_tag] :
1392 tnfa->submatch_data[i].eo_tag;
1393 DPRINT(("pmatch[%d]: {%d, %d}->{%d, %d}\n", i, so, eo,
1394 tnfa->submatch_data[i].so_tag,
1395 tnfa->submatch_data[i].eo_tag));
1396 }
1397
1398 DPRINT(("Reordering tag_directions\n"));
1399 /* We only allocate num_tags directions and reorder them. The
1400 * num_tags-th direction (end tag) is left unchanged. */
1401 new_tag_directions = xmalloc(sizeof(*new_tag_directions) * num_tags);
1402 if (new_tag_directions == NULL)
1403 {
1404 status = REG_ESPACE;
1405 goto error_post_compile;
1406 }
1407 for (i = 0; i < num_tags; i++)
1408 {
1409 new_tag_directions[to_reorder[i]] = tnfa->tag_directions[i];
1410 }
1411#if TRE_DEBUG
1412 for (i = 0; i < num_tags; i++)
1413 {
1414 DPRINT(("t%d %s->%s\n", i,
1415 tag_dir_str[tnfa->tag_directions[i]],
1416 tag_dir_str[new_tag_directions[i]]));
1417 }
1418 DPRINT(("t%d %s->%s\n", num_tags,
1419 tag_dir_str[tnfa->tag_directions[num_tags]],
1420 tag_dir_str[tnfa->tag_directions[num_tags]]));
1421#endif /* TRE_DEBUG */
1422 memcpy(tnfa->tag_directions, new_tag_directions, sizeof(*new_tag_directions) * num_tags);
1423 xfree(new_tag_directions);
1424
1425 DPRINT(("Reordering minimal_tags\n"));
1426 for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
1427 tnfa->minimal_tags[i] = tnfa->minimal_tags[i] < num_tags ?
1428 to_reorder[tnfa->minimal_tags[i]] :
1429 tnfa->minimal_tags[i];
1430
1431 DPRINT(("Reordering AST tags\n"));
1432 STACK_PUSH(stack, voidptr, tree);
1433 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1434 {
1435 node = tre_stack_pop_voidptr(stack);
1436
1437 switch (node->type)
1438 {
1439 case LITERAL:
1440 {
1441 tre_literal_t *lit = (tre_literal_t *)node->obj;
1442 if (IS_TAG(lit))
1443 lit->code_max = to_reorder[lit->code_max];
1444 break;
1445 }
1446
1447 case UNION:
1448 {
1449 tre_union_t *uni = (tre_union_t *)node->obj;
1450 STACK_PUSHX(stack, voidptr, uni->right);
1451 STACK_PUSHX(stack, voidptr, uni->left);
1452 break;
1453 }
1454
1455 case CATENATION:
1456 {
1457 tre_catenation_t *cat = (tre_catenation_t *)node->obj;
1458 STACK_PUSHX(stack, voidptr, cat->right);
1459 STACK_PUSHX(stack, voidptr, cat->left);
1460 break;
1461 }
1462
1463 case ITERATION:
1464 {
1465 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
1466 STACK_PUSHX(stack, voidptr, iter->arg);
1467 break;
1468 }
1469
1470 default:
1471 assert(0);
1472 break;
1473 }
1474 }
1475 if (status != REG_OK)
1476 {
1477 DPRINT(("Error while reordering tags\n"));
1478 goto error_post_compile;
1479 }
1480 }
1481
1482
1483 if (!first_pass)
1484 {
1485 if (tree->last_matched_branch)
1486 {
1487 tre_last_matched_branch_t *buf, *b, *bb;
1488 tre_last_matched_branch_pre_t *bp;
1489 tre_last_matched_t *u, *uu;
1490 tre_last_matched_pre_t *up;
1491 int *t;
1492 int i;
1493#ifdef TRE_DEBUG
1494 tre_last_matched_branch_t *_b;
1495 tre_last_matched_t *_u;
1496 int *_t;
1497
1498 DPRINT(("last_match_branch_pre:\n"));
1499 print_last_match_branch_pre(tree->last_matched_branch, 0, num_tags);
1500#endif /* TRE_DEBUG */
1501 buf = (tre_last_matched_branch_t *)xcalloc(1,
1502 tree->last_matched_branch->tot_branches
1503 * sizeof(tre_last_matched_branch_t) +
1504 tree->last_matched_branch->tot_last_matched
1505 * sizeof(tre_last_matched_t) +
1506 tree->last_matched_branch->tot_tags *
1507 sizeof(int));
1508 if (!buf)
1509 {
1510 status = REG_ESPACE;
1511 goto error_post_compile;
1512 }
1513
1514 b = buf;
1515 u = (tre_last_matched_t *)(b +
1516 tree->last_matched_branch->tot_branches);
1517 t = (int *)(u + tree->last_matched_branch->tot_last_matched);
1518#ifdef TRE_DEBUG
1519 _b = b;
1520 _u = u;
1521 _t = t;
1522#endif /* TRE_DEBUG */
1523 DPRINT(("Copying info_pre to info\n"));
1524 STACK_PUSH(stack, voidptr, tree->last_matched_branch);
1525 STACK_PUSH(stack, int, 1);
1526 STACK_PUSH(stack, int, COPY_LAST_MATCHED_BRANCH);
1527
1528 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1529 {
1530 switch (tre_stack_pop_int(stack))
1531 {
1532 case COPY_LAST_MATCHED_BRANCH:
1533 i = tre_stack_pop_int(stack);
1534 /* The tre_last_matched_branch_pre_t * is still on the
1535 stack */
1536 STACK_PUSHX(stack, voidptr, b);
1537 STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1538 b += i;
1539 break;
1540
1541 case COPY_LAST_MATCHED_BRANCH_NEXT:
1542 bb = tre_stack_pop_voidptr(stack);
1543 bp = tre_stack_pop_voidptr(stack);
1544 bb->n_last_matched = bp->n_last_matched;
1545 bb->cmp_tag = bp->cmp_tag;
1546 if (bp->n_tags > 0)
1547 {
1548 int n;
1549 n = bb->n_tags = bp->n_tags;
1550 bb->tags = t;
1551 for (i = 0; i < num_tags; i++)
1552 if (bit_test(bp->tags, i))
1553 {
1554 *t++ = i;
1555 if (--n <= 0)
1556 break;
1557 }
1558 }
1559 if (bp->next)
1560 {
1561 STACK_PUSHX(stack, voidptr, bp->next);
1562 STACK_PUSHX(stack, voidptr, bb + 1);
1563 STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1564 }
1565 if (bp->n_last_matched > 0)
1566 {
1567 bb->last_matched = u;
1568 STACK_PUSHX(stack, voidptr, bp->last_matched);
1569 STACK_PUSHX(stack, int, bp->n_last_matched);
1570 STACK_PUSHX(stack, int, COPY_LAST_MATCHED);
1571 }
1572 break;
1573
1574 case COPY_LAST_MATCHED:
1575 i = tre_stack_pop_int(stack);
1576 /* The tre_last_matched_pre_t * is still on the stack */
1577 STACK_PUSHX(stack, voidptr, u);
1578 STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1579 u += i;
1580 break;
1581
1582 case COPY_LAST_MATCHED_NEXT:
1583 uu = tre_stack_pop_voidptr(stack);
1584 up = tre_stack_pop_voidptr(stack);
1585 uu->n_branches = up->n_branches;
1586 uu->branches = b;
1587 uu->start_tag = up->start_tag;
1588 if (up->next)
1589 {
1590 STACK_PUSHX(stack, voidptr, up->next);
1591 STACK_PUSHX(stack, voidptr, uu + 1);
1592 STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1593 }
1594 STACK_PUSHX(stack, voidptr, up->branches);
1595 STACK_PUSHX(stack, int, up->n_branches);
1596 STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH);
1597 break;
1598 }
1599 }
1600 if (status != REG_OK)
1601 goto error_post_compile;
1602#ifdef TRE_DEBUG
1603 DPRINT(("last_matched_branch:\n"));
1604 print_last_match_branch(buf, 0);
1605 if (b != _b + tree->last_matched_branch->tot_branches)
1606 DPRINT(("b/%p != _b + tree->last_matched_branch->tot_branches/%p\n",
1607 b, _b + tree->last_matched_branch->tot_branches));
1608 if (u != _u + tree->last_matched_branch->tot_last_matched)
1609 DPRINT(("u/%p != _u + "
1610 "tree->last_matched_branch->tot_last_matched/%p\n",
1611 u, _u + tree->last_matched_branch->tot_last_matched));
1612 if (t != _t + tree->last_matched_branch->tot_tags)
1613 DPRINT(("t/%p != _t + tree->last_matched_branch->tot_tags/%p\n",
1614 t, _t + tree->last_matched_branch->tot_tags));
1615#endif /* TRE_DEBUG */
1616 tnfa->last_matched_branch = buf;
1617 }
1618#ifdef TRE_DEBUG
1619 else
1620 DPRINT(("No last_match_branch_pre\n"));
1621#endif /* TRE_DEBUG */
5f2eab64
JM
1622 }
1623
1624 DPRINT(("tre_add_tags: %s complete. Number of tags %d.\n",
1625 first_pass? "First pass" : "Second pass", num_tags));
d5f8dde1
JM
1626#ifdef TRE_DEBUG
1627 tre_ast_print(tree);
1628#endif /* TRE_DEBUG */
1629 DPRINT(("tre_add_tags: tree->num_tags=%d num_tags=%d\n", tree->num_tags,
1630 num_tags));
5f2eab64
JM
1631 assert(tree->num_tags == num_tags);
1632 tnfa->end_tag = num_tags;
1633 tnfa->num_tags = num_tags;
1634 tnfa->num_minimals = num_minimals;
d5f8dde1
JM
1635error_post_compile:
1636 xfree(reorder_tags);
1637error_reorder_tags:
5f2eab64 1638 xfree(orig_regset);
d5f8dde1 1639error_regset:
5f2eab64
JM
1640 return status;
1641}
1642
1643
1644
1645/*
1646 AST to TNFA compilation routines.
1647*/
1648
1649typedef enum {
1650 COPY_RECURSE,
1651 COPY_SET_RESULT_PTR
1652} tre_copyast_symbol_t;
1653
1654/* Flags for tre_copy_ast(). */
1655#define COPY_REMOVE_TAGS 1
1656#define COPY_MAXIMIZE_FIRST_TAG 2
1657
1658static reg_errcode_t
1659tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1660 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1661 tre_ast_node_t **copy, int *max_pos)
1662{
1663 reg_errcode_t status = REG_OK;
1664 int bottom = tre_stack_num_objects(stack);
1665 int num_copied = 0;
1666 int first_tag = 1;
1667 tre_ast_node_t **result = copy;
1668 tre_copyast_symbol_t symbol;
1669
1670 STACK_PUSH(stack, voidptr, ast);
1671 STACK_PUSH(stack, int, COPY_RECURSE);
1672
1673 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1674 {
1675 tre_ast_node_t *node;
1676 if (status != REG_OK)
1677 break;
1678
1679 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1680 switch (symbol)
1681 {
1682 case COPY_SET_RESULT_PTR:
1683 result = tre_stack_pop_voidptr(stack);
1684 break;
1685 case COPY_RECURSE:
1686 node = tre_stack_pop_voidptr(stack);
1687 switch (node->type)
1688 {
1689 case LITERAL:
1690 {
1691 tre_literal_t *lit = node->obj;
1692 int pos = lit->position;
1693 int min = lit->code_min;
1694 int max = lit->code_max;
d5f8dde1
JM
1695 tre_bracket_match_list_t *list = !IS_SPECIAL(lit) ?
1696 lit->u.bracket_match_list :
1697 NULL;
5f2eab64
JM
1698 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1699 {
1700 /* XXX - e.g. [ab] has only one position but two
1701 nodes, so we are creating holes in the state space
1702 here. Not fatal, just wastes memory. */
1703 pos += *pos_add;
1704 num_copied++;
1705 }
1706 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1707 {
1708 /* Change this tag to empty. */
1709 min = EMPTY;
1710 max = pos = -1;
1711 }
1712 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1713 && first_tag)
1714 {
1715 /* Maximize the first tag. */
d5f8dde1
JM
1716 if (tag_directions[max] == TRE_TAG_LEFT_MAXIMIZE)
1717 tag_directions[max] = TRE_TAG_MAXIMIZE;
5f2eab64
JM
1718 first_tag = 0;
1719 }
1720 *result = tre_ast_new_literal(mem, min, max, pos);
1721 if (*result == NULL)
1722 status = REG_ESPACE;
1723
1724 if (pos > *max_pos)
1725 *max_pos = pos;
d5f8dde1
JM
1726
1727 if (!IS_SPECIAL(lit))
1728 ((tre_literal_t *)(*result)->obj)->u.bracket_match_list
1729 = list;
5f2eab64
JM
1730 break;
1731 }
1732 case UNION:
1733 {
1734 tre_union_t *uni = node->obj;
1735 tre_union_t *tmp;
1736 *result = tre_ast_new_union(mem, uni->left, uni->right);
1737 if (*result == NULL)
1738 {
1739 status = REG_ESPACE;
1740 break;
1741 }
1742 tmp = (*result)->obj;
1743 result = &tmp->left;
1744 STACK_PUSHX(stack, voidptr, uni->right);
1745 STACK_PUSHX(stack, int, COPY_RECURSE);
1746 STACK_PUSHX(stack, voidptr, &tmp->right);
1747 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1748 STACK_PUSHX(stack, voidptr, uni->left);
1749 STACK_PUSHX(stack, int, COPY_RECURSE);
1750 break;
1751 }
1752 case CATENATION:
1753 {
1754 tre_catenation_t *cat = node->obj;
1755 tre_catenation_t *tmp;
1756 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1757 if (*result == NULL)
1758 {
1759 status = REG_ESPACE;
1760 break;
1761 }
1762 tmp = (*result)->obj;
1763 tmp->left = NULL;
1764 tmp->right = NULL;
1765 result = &tmp->left;
1766
1767 STACK_PUSHX(stack, voidptr, cat->right);
1768 STACK_PUSHX(stack, int, COPY_RECURSE);
1769 STACK_PUSHX(stack, voidptr, &tmp->right);
1770 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1771 STACK_PUSHX(stack, voidptr, cat->left);
1772 STACK_PUSHX(stack, int, COPY_RECURSE);
1773 break;
1774 }
1775 case ITERATION:
1776 {
1777 tre_iteration_t *iter = node->obj;
1778 STACK_PUSHX(stack, voidptr, iter->arg);
1779 STACK_PUSHX(stack, int, COPY_RECURSE);
1780 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1781 iter->max, iter->minimal);
1782 if (*result == NULL)
1783 {
1784 status = REG_ESPACE;
1785 break;
1786 }
1787 iter = (*result)->obj;
1788 result = &iter->arg;
1789 break;
1790 }
1791 default:
1792 assert(0);
1793 break;
1794 }
1795 break;
1796 }
1797 }
1798 *pos_add += num_copied;
1799 return status;
1800}
1801
1802typedef enum {
1803 EXPAND_RECURSE,
1804 EXPAND_AFTER_ITER
1805} tre_expand_ast_symbol_t;
1806
1807/* Expands each iteration node that has a finite nonzero minimum or maximum
1808 iteration count to a catenated sequence of copies of the node. */
1809static reg_errcode_t
1810tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1811 int *position, tre_tag_direction_t *tag_directions,
1812 int *max_depth)
1813{
1814 reg_errcode_t status = REG_OK;
1815 int bottom = tre_stack_num_objects(stack);
1816 int pos_add = 0;
1817 int pos_add_total = 0;
1818 int max_pos = 0;
d5f8dde1 1819#ifdef TRE_APPROX
5f2eab64
JM
1820 /* Current approximate matching parameters. */
1821 int params[TRE_PARAM_LAST];
1822 /* Approximate parameter nesting level. */
1823 int params_depth = 0;
d5f8dde1 1824#endif /* TRE_APPROX */
5f2eab64 1825 int iter_depth = 0;
d5f8dde1 1826#ifdef TRE_APPROX
5f2eab64 1827 int i;
d5f8dde1 1828#endif /* TRE_APPROX */
5f2eab64 1829
d5f8dde1 1830#ifdef TRE_APPROX
5f2eab64
JM
1831 for (i = 0; i < TRE_PARAM_LAST; i++)
1832 params[i] = TRE_PARAM_DEFAULT;
d5f8dde1 1833#endif /* TRE_APPROX */
5f2eab64
JM
1834
1835 STACK_PUSHR(stack, voidptr, ast);
1836 STACK_PUSHR(stack, int, EXPAND_RECURSE);
1837 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1838 {
1839 tre_ast_node_t *node;
1840 tre_expand_ast_symbol_t symbol;
1841
1842 if (status != REG_OK)
1843 break;
1844
1845 DPRINT(("pos_add %d\n", pos_add));
1846
1847 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1848 node = tre_stack_pop_voidptr(stack);
1849 switch (symbol)
1850 {
1851 case EXPAND_RECURSE:
1852 switch (node->type)
1853 {
1854 case LITERAL:
1855 {
1856 tre_literal_t *lit= node->obj;
1857 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1858 {
1859 lit->position += pos_add;
1860 if (lit->position > max_pos)
1861 max_pos = lit->position;
1862 }
1863 break;
1864 }
1865 case UNION:
1866 {
1867 tre_union_t *uni = node->obj;
1868 STACK_PUSHX(stack, voidptr, uni->right);
1869 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1870 STACK_PUSHX(stack, voidptr, uni->left);
1871 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1872 break;
1873 }
1874 case CATENATION:
1875 {
1876 tre_catenation_t *cat = node->obj;
1877 STACK_PUSHX(stack, voidptr, cat->right);
1878 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1879 STACK_PUSHX(stack, voidptr, cat->left);
1880 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1881 break;
1882 }
1883 case ITERATION:
1884 {
1885 tre_iteration_t *iter = node->obj;
1886 STACK_PUSHX(stack, int, pos_add);
1887 STACK_PUSHX(stack, voidptr, node);
1888 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1889 STACK_PUSHX(stack, voidptr, iter->arg);
1890 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1891 /* If we are going to expand this node at EXPAND_AFTER_ITER
1892 then don't increase the `pos' fields of the nodes now, it
1893 will get done when expanding. */
1894 if (iter->min > 1 || iter->max > 1)
1895 pos_add = 0;
1896 iter_depth++;
1897 DPRINT(("iter\n"));
1898 break;
1899 }
1900 default:
1901 assert(0);
1902 break;
1903 }
1904 break;
1905 case EXPAND_AFTER_ITER:
1906 {
1907 tre_iteration_t *iter = node->obj;
1908 int pos_add_last;
1909 pos_add = tre_stack_pop_int(stack);
1910 pos_add_last = pos_add;
d5f8dde1
JM
1911 /* Originally (in tre_parse_bound), if min == 0 && max == 0, we
1912 immediate replace the whole iteration with EMPTY. This
1913 unfortunately drops any submatches, and messes up setting the
1914 pmatch values (we can get tags of -1, and tag values in the
1915 billions). So we left it there and replace with EMPTY here. */
1916 if (iter->min == 0 && iter->max == 0)
1917 {
1918 tre_ast_node_t *empty = tre_ast_new_literal(mem, EMPTY, -1, -1);
1919 if (empty == NULL)
1920 return REG_ESPACE;
1921 node->obj = empty->obj;
1922 node->type = empty->type;
1923 }
1924 else if (iter->min > 1 || iter->max > 1)
5f2eab64
JM
1925 {
1926 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1927 int j;
1928 int pos_add_save = pos_add;
1929
1930 /* Create a catenated sequence of copies of the node. */
1931 for (j = 0; j < iter->min; j++)
1932 {
1933 tre_ast_node_t *copy;
1934 /* Remove tags from all but the last copy. */
1935 int flags = ((j + 1 < iter->min)
1936 ? COPY_REMOVE_TAGS
1937 : COPY_MAXIMIZE_FIRST_TAG);
1938 DPRINT((" pos_add %d\n", pos_add));
1939 pos_add_save = pos_add;
1940 status = tre_copy_ast(mem, stack, iter->arg, flags,
1941 &pos_add, tag_directions, &copy,
1942 &max_pos);
1943 if (status != REG_OK)
1944 return status;
1945 if (seq1 != NULL)
1946 seq1 = tre_ast_new_catenation(mem, seq1, copy);
1947 else
1948 seq1 = copy;
1949 if (seq1 == NULL)
1950 return REG_ESPACE;
1951 }
1952
1953 if (iter->max == -1)
1954 {
1955 /* No upper limit. */
1956 pos_add_save = pos_add;
1957 status = tre_copy_ast(mem, stack, iter->arg, 0,
1958 &pos_add, NULL, &seq2, &max_pos);
1959 if (status != REG_OK)
1960 return status;
1961 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1962 if (seq2 == NULL)
1963 return REG_ESPACE;
1964 }
1965 else
1966 {
1967 for (j = iter->min; j < iter->max; j++)
1968 {
1969 tre_ast_node_t *tmp, *copy;
1970 pos_add_save = pos_add;
1971 status = tre_copy_ast(mem, stack, iter->arg, 0,
1972 &pos_add, NULL, &copy, &max_pos);
1973 if (status != REG_OK)
1974 return status;
1975 if (seq2 != NULL)
1976 seq2 = tre_ast_new_catenation(mem, copy, seq2);
1977 else
1978 seq2 = copy;
1979 if (seq2 == NULL)
1980 return REG_ESPACE;
1981 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1982 if (tmp == NULL)
1983 return REG_ESPACE;
1984 seq2 = tre_ast_new_union(mem, tmp, seq2);
1985 if (seq2 == NULL)
1986 return REG_ESPACE;
1987 }
1988 }
1989
1990 pos_add = pos_add_save;
1991 if (seq1 == NULL)
1992 seq1 = seq2;
1993 else if (seq2 != NULL)
1994 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1995 if (seq1 == NULL)
1996 return REG_ESPACE;
1997 node->obj = seq1->obj;
1998 node->type = seq1->type;
1999 }
2000
2001 iter_depth--;
2002 pos_add_total += pos_add - pos_add_last;
2003 if (iter_depth == 0)
2004 pos_add = pos_add_total;
2005
d5f8dde1 2006#ifdef TRE_APPROX
5f2eab64
JM
2007 /* If approximate parameters are specified, surround the result
2008 with two parameter setting nodes. The one on the left sets
2009 the specified parameters, and the one on the right restores
2010 the old parameters. */
2011 if (iter->params)
2012 {
2013 tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
2014 int *old_params;
2015
2016 tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
2017 if (!tmp_l)
2018 return REG_ESPACE;
2019 ((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
2020 iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
2021 tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
2022 if (!tmp_r)
2023 return REG_ESPACE;
2024 old_params = tre_mem_alloc(mem, sizeof(*old_params)
2025 * TRE_PARAM_LAST);
2026 if (!old_params)
2027 return REG_ESPACE;
2028 for (i = 0; i < TRE_PARAM_LAST; i++)
2029 old_params[i] = params[i];
2030 ((tre_literal_t *)tmp_r->obj)->u.params = old_params;
2031 old_params[TRE_PARAM_DEPTH] = params_depth;
2032 /* XXX - this is the only place where ast_new_node is
2033 needed -- should be moved inside AST module. */
2034 node_copy = tre_ast_new_node(mem, ITERATION,
2035 sizeof(tre_iteration_t));
2036 if (!node_copy)
2037 return REG_ESPACE;
2038 node_copy->obj = node->obj;
2039 tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
2040 if (!tmp_node)
2041 return REG_ESPACE;
2042 tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
2043 if (!tmp_node)
2044 return REG_ESPACE;
2045 /* Replace the contents of `node' with `tmp_node'. */
2046 memcpy(node, tmp_node, sizeof(*node));
2047 node->obj = tmp_node->obj;
2048 node->type = tmp_node->type;
2049 params_depth++;
2050 if (params_depth > *max_depth)
2051 *max_depth = params_depth;
2052 }
d5f8dde1 2053#endif /* TRE_APPROX */
5f2eab64
JM
2054 break;
2055 }
2056 default:
2057 assert(0);
2058 break;
2059 }
2060 }
2061
2062 *position += pos_add_total;
2063
2064 /* `max_pos' should never be larger than `*position' if the above
2065 code works, but just an extra safeguard let's make sure
2066 `*position' is set large enough so enough memory will be
2067 allocated for the transition table. */
2068 if (max_pos > *position)
2069 *position = max_pos;
2070
2071#ifdef TRE_DEBUG
2072 DPRINT(("Expanded AST:\n"));
2073 tre_ast_print(ast);
2074 DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
2075#endif
2076
2077 return status;
2078}
2079
2080static tre_pos_and_tags_t *
2081tre_set_empty(tre_mem_t mem)
2082{
2083 tre_pos_and_tags_t *new_set;
2084
2085 new_set = tre_mem_calloc(mem, sizeof(*new_set));
2086 if (new_set == NULL)
2087 return NULL;
2088
2089 new_set[0].position = -1;
2090 new_set[0].code_min = -1;
2091 new_set[0].code_max = -1;
2092
2093 return new_set;
2094}
2095
2096static tre_pos_and_tags_t *
2097tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
d5f8dde1 2098 tre_bracket_match_list_t *bracket_match_list, int backref)
5f2eab64
JM
2099{
2100 tre_pos_and_tags_t *new_set;
2101
2102 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2103 if (new_set == NULL)
2104 return NULL;
2105
2106 new_set[0].position = position;
2107 new_set[0].code_min = code_min;
2108 new_set[0].code_max = code_max;
d5f8dde1 2109 new_set[0].bracket_match_list = bracket_match_list;
5f2eab64
JM
2110 new_set[0].backref = backref;
2111 new_set[1].position = -1;
2112 new_set[1].code_min = -1;
2113 new_set[1].code_max = -1;
2114
2115 return new_set;
2116}
2117
2118static tre_pos_and_tags_t *
2119tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2120 int *tags, int assertions, int *params)
2121{
2122 int s1, s2, i, j;
2123 tre_pos_and_tags_t *new_set;
2124 int *new_tags;
2125 int num_tags;
2126
2127 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2128 for (s1 = 0; set1[s1].position >= 0; s1++);
2129 for (s2 = 0; set2[s2].position >= 0; s2++);
2130 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2131 if (!new_set )
2132 return NULL;
2133
2134 for (s1 = 0; set1[s1].position >= 0; s1++)
2135 {
2136 new_set[s1].position = set1[s1].position;
2137 new_set[s1].code_min = set1[s1].code_min;
2138 new_set[s1].code_max = set1[s1].code_max;
2139 new_set[s1].assertions = set1[s1].assertions | assertions;
d5f8dde1 2140 new_set[s1].bracket_match_list = set1[s1].bracket_match_list;
5f2eab64
JM
2141 new_set[s1].backref = set1[s1].backref;
2142 if (set1[s1].tags == NULL && tags == NULL)
2143 new_set[s1].tags = NULL;
2144 else
2145 {
2146 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2147 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2148 * (i + num_tags + 1)));
2149 if (new_tags == NULL)
2150 return NULL;
2151 for (j = 0; j < i; j++)
2152 new_tags[j] = set1[s1].tags[j];
2153 for (i = 0; i < num_tags; i++)
2154 new_tags[j + i] = tags[i];
2155 new_tags[j + i] = -1;
2156 new_set[s1].tags = new_tags;
2157 }
2158 if (set1[s1].params)
2159 new_set[s1].params = set1[s1].params;
2160 if (params)
2161 {
2162 if (!new_set[s1].params)
2163 new_set[s1].params = params;
2164 else
2165 {
2166 new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
2167 TRE_PARAM_LAST);
2168 if (!new_set[s1].params)
2169 return NULL;
2170 for (i = 0; i < TRE_PARAM_LAST; i++)
2171 if (params[i] != TRE_PARAM_UNSET)
2172 new_set[s1].params[i] = params[i];
2173 }
2174 }
2175 }
2176
2177 for (s2 = 0; set2[s2].position >= 0; s2++)
2178 {
2179 new_set[s1 + s2].position = set2[s2].position;
2180 new_set[s1 + s2].code_min = set2[s2].code_min;
2181 new_set[s1 + s2].code_max = set2[s2].code_max;
2182 /* XXX - why not | assertions here as well? */
2183 new_set[s1 + s2].assertions = set2[s2].assertions;
d5f8dde1 2184 new_set[s1 + s2].bracket_match_list = set2[s2].bracket_match_list;
5f2eab64
JM
2185 new_set[s1 + s2].backref = set2[s2].backref;
2186 if (set2[s2].tags == NULL)
2187 new_set[s1 + s2].tags = NULL;
2188 else
2189 {
2190 for (i = 0; set2[s2].tags[i] >= 0; i++);
2191 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2192 if (new_tags == NULL)
2193 return NULL;
2194 for (j = 0; j < i; j++)
2195 new_tags[j] = set2[s2].tags[j];
2196 new_tags[j] = -1;
2197 new_set[s1 + s2].tags = new_tags;
2198 }
2199 if (set2[s2].params)
2200 new_set[s1 + s2].params = set2[s2].params;
2201 if (params)
2202 {
2203 if (!new_set[s1 + s2].params)
2204 new_set[s1 + s2].params = params;
2205 else
2206 {
2207 new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
2208 TRE_PARAM_LAST);
2209 if (!new_set[s1 + s2].params)
2210 return NULL;
2211 for (i = 0; i < TRE_PARAM_LAST; i++)
2212 if (params[i] != TRE_PARAM_UNSET)
2213 new_set[s1 + s2].params[i] = params[i];
2214 }
2215 }
2216 }
2217 new_set[s1 + s2].position = -1;
2218 return new_set;
2219}
2220
2221/* Finds the empty path through `node' which is the one that should be
2222 taken according to POSIX.2 rules, and adds the tags on that path to
2223 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2224 set to the number of tags seen on the path. */
2225static reg_errcode_t
2226tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2227 int *assertions, int *params, int *num_tags_seen,
2228 int *params_seen)
2229{
2230 tre_literal_t *lit;
2231 tre_union_t *uni;
2232 tre_catenation_t *cat;
2233 tre_iteration_t *iter;
2234 int i;
2235 int bottom = tre_stack_num_objects(stack);
2236 reg_errcode_t status = REG_OK;
2237 if (num_tags_seen)
2238 *num_tags_seen = 0;
2239 if (params_seen)
2240 *params_seen = 0;
2241
2242 status = tre_stack_push_voidptr(stack, node);
2243
2244 /* Walk through the tree recursively. */
2245 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2246 {
2247 node = tre_stack_pop_voidptr(stack);
2248
2249 switch (node->type)
2250 {
2251 case LITERAL:
2252 lit = (tre_literal_t *)node->obj;
2253 switch (lit->code_min)
2254 {
2255 case TAG:
2256 if (lit->code_max >= 0)
2257 {
2258 if (tags != NULL)
2259 {
2260 /* Add the tag to `tags'. */
2261 for (i = 0; tags[i] >= 0; i++)
2262 if (tags[i] == lit->code_max)
2263 break;
2264 if (tags[i] < 0)
2265 {
2266 tags[i] = lit->code_max;
2267 tags[i + 1] = -1;
2268 }
2269 }
2270 if (num_tags_seen)
2271 (*num_tags_seen)++;
2272 }
2273 break;
2274 case ASSERTION:
2275 assert(lit->code_max >= 1
2276 || lit->code_max <= ASSERT_LAST);
2277 if (assertions != NULL)
2278 *assertions |= lit->code_max;
2279 break;
2280 case PARAMETER:
2281 if (params != NULL)
2282 for (i = 0; i < TRE_PARAM_LAST; i++)
2283 params[i] = lit->u.params[i];
2284 if (params_seen != NULL)
2285 *params_seen = 1;
2286 break;
2287 case EMPTY:
2288 break;
2289 default:
2290 assert(0);
2291 break;
2292 }
2293 break;
2294
2295 case UNION:
2296 /* Subexpressions starting earlier take priority over ones
2297 starting later, so we prefer the left subexpression over the
2298 right subexpression. */
2299 uni = (tre_union_t *)node->obj;
2300 if (uni->left->nullable)
2301 STACK_PUSHX(stack, voidptr, uni->left)
2302 else if (uni->right->nullable)
2303 STACK_PUSHX(stack, voidptr, uni->right)
2304 else
2305 assert(0);
2306 break;
2307
2308 case CATENATION:
2309 /* The path must go through both children. */
2310 cat = (tre_catenation_t *)node->obj;
2311 assert(cat->left->nullable);
2312 assert(cat->right->nullable);
2313 STACK_PUSHX(stack, voidptr, cat->left);
2314 STACK_PUSHX(stack, voidptr, cat->right);
2315 break;
2316
2317 case ITERATION:
2318 /* A match with an empty string is preferred over no match at
2319 all, so we go through the argument if possible. */
2320 iter = (tre_iteration_t *)node->obj;
2321 if (iter->arg->nullable)
2322 STACK_PUSHX(stack, voidptr, iter->arg);
2323 break;
2324
2325 default:
2326 assert(0);
2327 break;
2328 }
2329 }
2330
2331 return status;
2332}
2333
2334
2335typedef enum {
2336 NFL_RECURSE,
2337 NFL_POST_UNION,
2338 NFL_POST_CATENATION,
2339 NFL_POST_ITERATION
2340} tre_nfl_stack_symbol_t;
2341
2342
2343/* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2344 the nodes of the AST `tree'. */
2345static reg_errcode_t
2346tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2347{
2348 int bottom = tre_stack_num_objects(stack);
2349
2350 STACK_PUSHR(stack, voidptr, tree);
2351 STACK_PUSHR(stack, int, NFL_RECURSE);
2352
2353 while (tre_stack_num_objects(stack) > bottom)
2354 {
2355 tre_nfl_stack_symbol_t symbol;
2356 tre_ast_node_t *node;
2357
2358 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2359 node = tre_stack_pop_voidptr(stack);
2360 switch (symbol)
2361 {
2362 case NFL_RECURSE:
2363 switch (node->type)
2364 {
2365 case LITERAL:
2366 {
2367 tre_literal_t *lit = (tre_literal_t *)node->obj;
2368 if (IS_BACKREF(lit))
2369 {
2370 /* Back references: nullable = false, firstpos = {i},
2371 lastpos = {i}. */
2372 node->nullable = 0;
2373 node->firstpos = tre_set_one(mem, lit->position, 0,
d5f8dde1 2374 TRE_CHAR_MAX, NULL, -1);
5f2eab64
JM
2375 if (!node->firstpos)
2376 return REG_ESPACE;
2377 node->lastpos = tre_set_one(mem, lit->position, 0,
d5f8dde1 2378 TRE_CHAR_MAX, NULL,
5f2eab64
JM
2379 (int)lit->code_max);
2380 if (!node->lastpos)
2381 return REG_ESPACE;
2382 }
2383 else if (lit->code_min < 0)
2384 {
2385 /* Tags, empty strings, params, and zero width assertions:
2386 nullable = true, firstpos = {}, and lastpos = {}. */
2387 node->nullable = 1;
2388 node->firstpos = tre_set_empty(mem);
2389 if (!node->firstpos)
2390 return REG_ESPACE;
2391 node->lastpos = tre_set_empty(mem);
2392 if (!node->lastpos)
2393 return REG_ESPACE;
2394 }
2395 else
2396 {
2397 /* Literal at position i: nullable = false, firstpos = {i},
2398 lastpos = {i}. */
2399 node->nullable = 0;
2400 node->firstpos =
2401 tre_set_one(mem, lit->position, (int)lit->code_min,
d5f8dde1 2402 (int)lit->code_max, NULL, -1);
5f2eab64
JM
2403 if (!node->firstpos)
2404 return REG_ESPACE;
2405 node->lastpos = tre_set_one(mem, lit->position,
2406 (int)lit->code_min,
2407 (int)lit->code_max,
d5f8dde1 2408 lit->u.bracket_match_list,
5f2eab64
JM
2409 -1);
2410 if (!node->lastpos)
2411 return REG_ESPACE;
2412 }
2413 break;
2414 }
2415
2416 case UNION:
2417 /* Compute the attributes for the two subtrees, and after that
2418 for this node. */
2419 STACK_PUSHR(stack, voidptr, node);
2420 STACK_PUSHR(stack, int, NFL_POST_UNION);
2421 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2422 STACK_PUSHR(stack, int, NFL_RECURSE);
2423 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2424 STACK_PUSHR(stack, int, NFL_RECURSE);
2425 break;
2426
2427 case CATENATION:
2428 /* Compute the attributes for the two subtrees, and after that
2429 for this node. */
2430 STACK_PUSHR(stack, voidptr, node);
2431 STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2432 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2433 STACK_PUSHR(stack, int, NFL_RECURSE);
2434 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2435 STACK_PUSHR(stack, int, NFL_RECURSE);
2436 break;
2437
2438 case ITERATION:
2439 /* Compute the attributes for the subtree, and after that for
2440 this node. */
2441 STACK_PUSHR(stack, voidptr, node);
2442 STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2443 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2444 STACK_PUSHR(stack, int, NFL_RECURSE);
2445 break;
2446 }
2447 break; /* end case: NFL_RECURSE */
2448
2449 case NFL_POST_UNION:
2450 {
2451 tre_union_t *uni = (tre_union_t *)node->obj;
2452 node->nullable = uni->left->nullable || uni->right->nullable;
2453 node->firstpos = tre_set_union(mem, uni->left->firstpos,
2454 uni->right->firstpos, NULL, 0, NULL);
2455 if (!node->firstpos)
2456 return REG_ESPACE;
2457 node->lastpos = tre_set_union(mem, uni->left->lastpos,
2458 uni->right->lastpos, NULL, 0, NULL);
2459 if (!node->lastpos)
2460 return REG_ESPACE;
2461 break;
2462 }
2463
2464 case NFL_POST_ITERATION:
2465 {
d5f8dde1
JM
2466 int num_tags, *tags, assertions, params_seen;
2467 int *params;
2468 reg_errcode_t status;
5f2eab64
JM
2469 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2470
d5f8dde1
JM
2471 /* From Ville Laurikari's original 2001 Master's thesis, the
2472 firstpos(n) and lastpos(n) of an iteration is just the
2473 corresponding values of the iteration's argument. Unfortunately,
2474 this isn't sufficient for the following BRE:
2475
2476 \(a*\)*b\(\1\) matched against ab
2477
2478 The backreference wants to force the first subexpression to
2479 be the empty string, but there is no transition for this. So
2480 we need to modify the lastpos(n) of an iteration to be the
2481 equivalent of that of catentation. Using the same notation as
2482 in the thesis, lastpos(n) is redefined as:
2483
2484 if nullable(c1) then
2485 lastpos(c1) U
2486 addtags(lastpos(c1),
2487 emptymatch(c1))
2488 else
2489 lastpos(c1)
2490
2491 where c1 is the argument node. firstpos(n) remains the same. */
2492
2493 /* Compute lastpos. */
5f2eab64 2494 if (iter->min == 0 || iter->arg->nullable)
d5f8dde1
JM
2495 {
2496 node->nullable = 1;
2497 if (iter->arg->nullable)
2498 {
2499 /* The arg matches the empty string. Make a first pass
2500 with tre_match_empty() to get the number of tags and
2501 parameters. */
2502 status = tre_match_empty(stack, iter->arg,
2503 NULL, NULL, NULL, &num_tags,
2504 &params_seen);
2505 if (status != REG_OK)
2506 return status;
2507 /* Allocate arrays for the tags and parameters. */
2508 tags = xmalloc(sizeof(int) * (num_tags + 1));
2509 if (!tags)
2510 return REG_ESPACE;
2511 tags[0] = -1;
2512 assertions = 0;
2513 params = NULL;
2514 if (params_seen)
2515 {
2516 params = tre_mem_alloc(mem, sizeof(*params)
2517 * TRE_PARAM_LAST);
2518 if (!params)
2519 {
2520 xfree(tags);
2521 return REG_ESPACE;
2522 }
2523 }
2524 /* Second pass with tre_mach_empty() to get the list of
2525 tags and parameters. */
2526 status = tre_match_empty(stack, iter->arg, tags,
2527 &assertions, params, NULL, NULL);
2528 if (status != REG_OK)
2529 {
2530 xfree(tags);
2531 return status;
2532 }
2533 node->lastpos =
2534 tre_set_union(mem, iter->arg->lastpos, iter->arg->lastpos,
2535 tags, assertions, params);
2536 xfree(tags);
2537 if (!node->lastpos)
2538 return REG_ESPACE;
2539 }
2540 else
2541 node->lastpos = iter->arg->lastpos;
2542 }
5f2eab64 2543 else
d5f8dde1
JM
2544 {
2545 node->nullable = 0;
2546 node->lastpos = iter->arg->lastpos;
2547 }
5f2eab64 2548 node->firstpos = iter->arg->firstpos;
5f2eab64
JM
2549 break;
2550 }
2551
2552 case NFL_POST_CATENATION:
2553 {
2554 int num_tags, *tags, assertions, params_seen;
2555 int *params;
2556 reg_errcode_t status;
2557 tre_catenation_t *cat = node->obj;
2558 node->nullable = cat->left->nullable && cat->right->nullable;
2559
2560 /* Compute firstpos. */
2561 if (cat->left->nullable)
2562 {
2563 /* The left side matches the empty string. Make a first pass
2564 with tre_match_empty() to get the number of tags and
2565 parameters. */
2566 status = tre_match_empty(stack, cat->left,
2567 NULL, NULL, NULL, &num_tags,
2568 &params_seen);
2569 if (status != REG_OK)
2570 return status;
2571 /* Allocate arrays for the tags and parameters. */
2572 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2573 if (!tags)
2574 return REG_ESPACE;
2575 tags[0] = -1;
2576 assertions = 0;
2577 params = NULL;
2578 if (params_seen)
2579 {
2580 params = tre_mem_alloc(mem, sizeof(*params)
2581 * TRE_PARAM_LAST);
2582 if (!params)
2583 {
2584 xfree(tags);
2585 return REG_ESPACE;
2586 }
2587 }
2588 /* Second pass with tre_mach_empty() to get the list of
2589 tags and parameters. */
2590 status = tre_match_empty(stack, cat->left, tags,
2591 &assertions, params, NULL, NULL);
2592 if (status != REG_OK)
2593 {
2594 xfree(tags);
2595 return status;
2596 }
2597 node->firstpos =
2598 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2599 tags, assertions, params);
2600 xfree(tags);
2601 if (!node->firstpos)
2602 return REG_ESPACE;
2603 }
2604 else
2605 {
2606 node->firstpos = cat->left->firstpos;
2607 }
2608
2609 /* Compute lastpos. */
2610 if (cat->right->nullable)
2611 {
2612 /* The right side matches the empty string. Make a first pass
2613 with tre_match_empty() to get the number of tags and
2614 parameters. */
2615 status = tre_match_empty(stack, cat->right,
2616 NULL, NULL, NULL, &num_tags,
2617 &params_seen);
2618 if (status != REG_OK)
2619 return status;
2620 /* Allocate arrays for the tags and parameters. */
2621 tags = xmalloc(sizeof(int) * (num_tags + 1));
2622 if (!tags)
2623 return REG_ESPACE;
2624 tags[0] = -1;
2625 assertions = 0;
2626 params = NULL;
2627 if (params_seen)
2628 {
2629 params = tre_mem_alloc(mem, sizeof(*params)
2630 * TRE_PARAM_LAST);
2631 if (!params)
2632 {
2633 xfree(tags);
2634 return REG_ESPACE;
2635 }
2636 }
2637 /* Second pass with tre_mach_empty() to get the list of
2638 tags and parameters. */
2639 status = tre_match_empty(stack, cat->right, tags,
2640 &assertions, params, NULL, NULL);
2641 if (status != REG_OK)
2642 {
2643 xfree(tags);
2644 return status;
2645 }
2646 node->lastpos =
2647 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2648 tags, assertions, params);
2649 xfree(tags);
2650 if (!node->lastpos)
2651 return REG_ESPACE;
2652 }
2653 else
2654 {
2655 node->lastpos = cat->right->lastpos;
2656 }
2657 break;
2658 }
2659
2660 default:
2661 assert(0);
2662 break;
2663 }
2664 }
2665
2666 return REG_OK;
2667}
2668
2669
2670/* Adds a transition from each position in `p1' to each position in `p2'. */
2671static reg_errcode_t
2672tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2673 tre_tnfa_transition_t *transitions,
2674 int *counts, int *offs)
2675{
2676 tre_pos_and_tags_t *orig_p2 = p2;
2677 tre_tnfa_transition_t *trans;
2678 int i, j, k, l, dup, prev_p2_pos;
2679
2680 if (transitions != NULL)
2681 while (p1->position >= 0)
2682 {
2683 p2 = orig_p2;
2684 prev_p2_pos = -1;
2685 while (p2->position >= 0)
2686 {
2687 /* Optimization: if this position was already handled, skip it. */
2688 if (p2->position == prev_p2_pos)
2689 {
2690 p2++;
2691 continue;
2692 }
2693 prev_p2_pos = p2->position;
2694 /* Set `trans' to point to the next unused transition from
2695 position `p1->position'. */
2696 trans = transitions + offs[p1->position];
2697 while (trans->state != NULL)
2698 {
2699#if 0
2700 /* If we find a previous transition from `p1->position' to
2701 `p2->position', it is overwritten. This can happen only
2702 if there are nested loops in the regexp, like in "((a)*)*".
2703 In POSIX.2 repetition using the outer loop is always
2704 preferred over using the inner loop. Therefore the
2705 transition for the inner loop is useless and can be thrown
2706 away. */
2707 /* XXX - The same position is used for all nodes in a bracket
2708 expression, so this optimization cannot be used (it will
2709 break bracket expressions) unless I figure out a way to
2710 detect it here. */
2711 if (trans->state_id == p2->position)
2712 {
2713 DPRINT(("*"));
2714 break;
2715 }
2716#endif
2717 trans++;
2718 }
2719
2720 if (trans->state == NULL)
2721 (trans + 1)->state = NULL;
2722 /* Use the character ranges, assertions, etc. from `p1' for
2723 the transition from `p1' to `p2'. */
2724 trans->code_min = p1->code_min;
2725 trans->code_max = p1->code_max;
2726 trans->state = transitions + offs[p2->position];
2727 trans->state_id = p2->position;
2728 trans->assertions = p1->assertions | p2->assertions
d5f8dde1 2729 | (p1->bracket_match_list != NULL ? ASSERT_BRACKET_MATCH : 0);
5f2eab64
JM
2730 if (p1->backref >= 0)
2731 {
d5f8dde1 2732 assert((trans->assertions & ASSERT_BRACKET_MATCH) == 0);
5f2eab64
JM
2733 assert(p2->backref < 0);
2734 trans->u.backref = p1->backref;
2735 trans->assertions |= ASSERT_BACKREF;
2736 }
d5f8dde1 2737 if (p1->bracket_match_list != NULL)
5f2eab64 2738 {
d5f8dde1
JM
2739 trans->u.bracket_match_list =
2740 xmalloc(SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2741 if (trans->u.bracket_match_list == NULL)
5f2eab64 2742 return REG_ESPACE;
d5f8dde1
JM
2743 memcpy(trans->u.bracket_match_list, p1->bracket_match_list,
2744 SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
5f2eab64 2745 }
5f2eab64
JM
2746
2747 /* Find out how many tags this transition has. */
2748 i = 0;
2749 if (p1->tags != NULL)
2750 while(p1->tags[i] >= 0)
2751 i++;
2752 j = 0;
2753 if (p2->tags != NULL)
2754 while(p2->tags[j] >= 0)
2755 j++;
2756
2757 /* If we are overwriting a transition, free the old tag array. */
2758 if (trans->tags != NULL)
2759 xfree(trans->tags);
2760 trans->tags = NULL;
2761
2762 /* If there were any tags, allocate an array and fill it. */
2763 if (i + j > 0)
2764 {
2765 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2766 if (!trans->tags)
2767 return REG_ESPACE;
2768 i = 0;
2769 if (p1->tags != NULL)
2770 while(p1->tags[i] >= 0)
2771 {
2772 trans->tags[i] = p1->tags[i];
2773 i++;
2774 }
2775 l = i;
2776 j = 0;
2777 if (p2->tags != NULL)
2778 while (p2->tags[j] >= 0)
2779 {
2780 /* Don't add duplicates. */
2781 dup = 0;
2782 for (k = 0; k < i; k++)
2783 if (trans->tags[k] == p2->tags[j])
2784 {
2785 dup = 1;
2786 break;
2787 }
2788 if (!dup)
2789 trans->tags[l++] = p2->tags[j];
2790 j++;
2791 }
2792 trans->tags[l] = -1;
2793 }
2794
2795 /* Set the parameter array. If both `p2' and `p1' have same
2796 parameters, the values in `p2' override those in `p1'. */
2797 if (p1->params || p2->params)
2798 {
2799 if (!trans->params)
2800 trans->params = xmalloc(sizeof(*trans->params)
2801 * TRE_PARAM_LAST);
2802 if (!trans->params)
2803 return REG_ESPACE;
2804 for (i = 0; i < TRE_PARAM_LAST; i++)
2805 {
2806 trans->params[i] = TRE_PARAM_UNSET;
2807 if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
2808 trans->params[i] = p1->params[i];
2809 if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
2810 trans->params[i] = p2->params[i];
2811 }
2812 }
2813 else
2814 {
2815 if (trans->params)
2816 xfree(trans->params);
2817 trans->params = NULL;
2818 }
2819
2820
2821#ifdef TRE_DEBUG
2822 {
2823 int *tags;
2824
2825 DPRINT((" %2d -> %2d on %3d", p1->position, p2->position,
2826 p1->code_min));
2827 if (p1->code_max != p1->code_min)
2828 DPRINT(("-%3d", p1->code_max));
2829 tags = trans->tags;
2830 if (tags)
2831 {
2832 DPRINT((", tags ["));
2833 while (*tags >= 0)
2834 {
2835 DPRINT(("%d", *tags));
2836 tags++;
2837 if (*tags >= 0)
2838 DPRINT((","));
2839 }
2840 DPRINT(("]"));
2841 }
2842 if (trans->assertions)
2843 DPRINT((", assert %d", trans->assertions));
2844 if (trans->assertions & ASSERT_BACKREF)
2845 DPRINT((", backref %d", trans->u.backref));
d5f8dde1
JM
2846 else if (trans->assertions & ASSERT_BRACKET_MATCH)
2847 DPRINT((", bracket_match_list %p",
2848 trans->u.bracket_match_list));
5f2eab64
JM
2849 if (trans->params)
2850 {
2851 DPRINT((", "));
2852 tre_print_params(trans->params);
2853 }
2854 DPRINT(("\n"));
2855 }
2856#endif /* TRE_DEBUG */
2857 p2++;
2858 }
2859 p1++;
2860 }
2861 else
2862 /* Compute a maximum limit for the number of transitions leaving
2863 from each state. */
2864 while (p1->position >= 0)
2865 {
2866 p2 = orig_p2;
2867 while (p2->position >= 0)
2868 {
2869 counts[p1->position]++;
2870 p2++;
2871 }
2872 p1++;
2873 }
2874 return REG_OK;
2875}
2876
2877/* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2878 labelled with one character range (there are no transitions on empty
2879 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2880 the regexp. */
2881static reg_errcode_t
2882tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2883 int *counts, int *offs)
2884{
2885 tre_union_t *uni;
2886 tre_catenation_t *cat;
2887 tre_iteration_t *iter;
2888 reg_errcode_t errcode = REG_OK;
2889
2890 /* XXX - recurse using a stack!. */
2891 switch (node->type)
2892 {
2893 case LITERAL:
2894 break;
2895 case UNION:
2896 uni = (tre_union_t *)node->obj;
2897 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2898 if (errcode != REG_OK)
2899 return errcode;
2900 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2901 break;
2902
2903 case CATENATION:
2904 cat = (tre_catenation_t *)node->obj;
2905 /* Add a transition from each position in cat->left->lastpos
2906 to each position in cat->right->firstpos. */
2907 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2908 transitions, counts, offs);
2909 if (errcode != REG_OK)
2910 return errcode;
2911 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2912 if (errcode != REG_OK)
2913 return errcode;
2914 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2915 break;
2916
2917 case ITERATION:
2918 iter = (tre_iteration_t *)node->obj;
2919 assert(iter->max == -1 || iter->max == 1);
2920
2921 if (iter->max == -1)
2922 {
2923 assert(iter->min == 0 || iter->min == 1);
2924 /* Add a transition from each last position in the iterated
2925 expression to each first position. */
2926 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2927 transitions, counts, offs);
2928 if (errcode != REG_OK)
2929 return errcode;
2930 }
2931 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2932 break;
2933 }
2934 return errcode;
2935}
2936
2937
2938#define ERROR_EXIT(err) \
2939 do \
2940 { \
2941 errcode = err; \
2942 if (/*CONSTCOND*/1) \
2943 goto error_exit; \
2944 } \
2945 while (/*CONSTCOND*/0)
2946
2947
2948int
d5f8dde1
JM
2949tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags,
2950 locale_t loc)
5f2eab64
JM
2951{
2952 tre_stack_t *stack;
2953 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2954 tre_pos_and_tags_t *p;
2955 int *counts = NULL, *offs = NULL;
2956 int i, add = 0;
2957 tre_tnfa_transition_t *transitions, *initial;
2958 tre_tnfa_t *tnfa = NULL;
d5f8dde1 2959 tre_submatch_data_t *submatch_data = NULL;
5f2eab64
JM
2960 tre_tag_direction_t *tag_directions = NULL;
2961 reg_errcode_t errcode;
2962 tre_mem_t mem;
2963
2964 /* Parse context. */
2965 tre_parse_ctx_t parse_ctx;
2966
2967 /* Allocate a stack used throughout the compilation process for various
2968 purposes. */
2969 stack = tre_stack_new(512, 10240, 128);
2970 if (!stack)
2971 return REG_ESPACE;
2972 /* Allocate a fast memory allocator. */
2973 mem = tre_mem_new();
2974 if (!mem)
2975 {
2976 tre_stack_destroy(stack);
2977 return REG_ESPACE;
2978 }
2979
2980 /* Parse the regexp. */
2981 memset(&parse_ctx, 0, sizeof(parse_ctx));
2982 parse_ctx.mem = mem;
2983 parse_ctx.stack = stack;
2984 parse_ctx.re = regex;
2985 parse_ctx.len = n;
d5f8dde1
JM
2986 /* Only allow REG_UNGREEDY to be set if both REG_ENHANCED and REG_EXTENDED
2987 are also set */
2988 if ((cflags & (REG_ENHANCED | REG_EXTENDED)) != (REG_ENHANCED | REG_EXTENDED))
2989 cflags &= ~REG_UNGREEDY;
5f2eab64
JM
2990 parse_ctx.cflags = cflags;
2991 parse_ctx.max_backref = -1;
d5f8dde1
JM
2992 parse_ctx.loc = loc;
2993 parse_ctx.submatch_id_invisible = SUBMATCH_ID_INVISIBLE_START;
2994
5f2eab64
JM
2995 DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
2996 errcode = tre_parse(&parse_ctx);
2997 if (errcode != REG_OK)
2998 ERROR_EXIT(errcode);
2999 preg->re_nsub = parse_ctx.submatch_id - 1;
3000 tree = parse_ctx.result;
3001
3002 /* Back references and approximate matching cannot currently be used
3003 in the same regexp. */
3004 if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
3005 ERROR_EXIT(REG_BADPAT);
3006
3007#ifdef TRE_DEBUG
3008 tre_ast_print(tree);
3009#endif /* TRE_DEBUG */
3010
3011 /* Referring to nonexistent subexpressions is illegal. */
3012 if (parse_ctx.max_backref > (int)preg->re_nsub)
3013 ERROR_EXIT(REG_ESUBREG);
3014
3015 /* Allocate the TNFA struct. */
3016 tnfa = xcalloc(1, sizeof(tre_tnfa_t));
3017 if (tnfa == NULL)
3018 ERROR_EXIT(REG_ESPACE);
3019 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
3020 tnfa->have_approx = parse_ctx.have_approx;
3021 tnfa->num_submatches = parse_ctx.submatch_id;
d5f8dde1
JM
3022 tnfa->num_submatches_invisible = parse_ctx.submatch_id_invisible
3023 - SUBMATCH_ID_INVISIBLE_START;
3024 tnfa->num_reorder_tags = parse_ctx.num_reorder_tags;
3025 tnfa->loc = parse_ctx.loc;
5f2eab64
JM
3026
3027 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
3028 regexp does not have back references, this can be skipped. */
d5f8dde1 3029 if (tnfa->num_reorder_tags > 0 || !(cflags & REG_NOSUB))
5f2eab64
JM
3030 {
3031 DPRINT(("tre_compile: setting up tags\n"));
3032
3033 /* Figure out how many tags we will need. */
3034 errcode = tre_add_tags(NULL, stack, tree, tnfa);
3035 if (errcode != REG_OK)
3036 ERROR_EXIT(errcode);
3037#ifdef TRE_DEBUG
3038 tre_ast_print(tree);
3039#endif /* TRE_DEBUG */
3040
3041 if (tnfa->num_tags > 0)
3042 {
3043 tag_directions = xmalloc(sizeof(*tag_directions)
3044 * (tnfa->num_tags + 1));
3045 if (tag_directions == NULL)
3046 ERROR_EXIT(REG_ESPACE);
3047 tnfa->tag_directions = tag_directions;
3048 memset(tag_directions, -1,
3049 sizeof(*tag_directions) * (tnfa->num_tags + 1));
3050 }
d5f8dde1 3051 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 3,
5f2eab64
JM
3052 sizeof(tnfa->minimal_tags));
3053 if (tnfa->minimal_tags == NULL)
3054 ERROR_EXIT(REG_ESPACE);
3055
3056 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
3057 sizeof(*submatch_data));
3058 if (submatch_data == NULL)
3059 ERROR_EXIT(REG_ESPACE);
d5f8dde1
JM
3060 /* Set the eo_tag value to -1 to indicate that that corresponding
3061 * submatch has not be completed yet */
3062 for (i = 0; i < parse_ctx.submatch_id; i++)
3063 {
3064 submatch_data[i].eo_tag = -1;
3065 }
5f2eab64
JM
3066 tnfa->submatch_data = submatch_data;
3067
3068 errcode = tre_add_tags(mem, stack, tree, tnfa);
3069 if (errcode != REG_OK)
3070 ERROR_EXIT(errcode);
3071
3072#ifdef TRE_DEBUG
3073 for (i = 0; i < parse_ctx.submatch_id; i++)
3074 DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3075 i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
d5f8dde1
JM
3076 for (i = 0; i <= tnfa->num_tags; i++)
3077 DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
5f2eab64
JM
3078#endif /* TRE_DEBUG */
3079 }
3080
3081 /* Expand iteration nodes. */
3082 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
3083 tag_directions, &tnfa->params_depth);
3084 if (errcode != REG_OK)
3085 ERROR_EXIT(errcode);
3086
3087 /* Add a dummy node for the final state.
3088 XXX - For certain patterns this dummy node can be optimized away,
3089 for example "a*" or "ab*". Figure out a simple way to detect
3090 this possibility. */
3091 tmp_ast_l = tree;
3092 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
3093 if (tmp_ast_r == NULL)
3094 ERROR_EXIT(REG_ESPACE);
3095
3096 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
3097 if (tree == NULL)
3098 ERROR_EXIT(REG_ESPACE);
3099
3100#ifdef TRE_DEBUG
3101 tre_ast_print(tree);
3102 DPRINT(("Number of states: %d\n", parse_ctx.position));
d5f8dde1
JM
3103 if (submatch_data)
3104 for (i = 0; i < parse_ctx.submatch_id; i++)
3105 DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3106 i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3107 if (tag_directions)
3108 for (i = 0; i <= tnfa->num_tags; i++)
3109 DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
5f2eab64
JM
3110#endif /* TRE_DEBUG */
3111
3112 errcode = tre_compute_nfl(mem, stack, tree);
3113 if (errcode != REG_OK)
3114 ERROR_EXIT(errcode);
3115
3116 counts = xmalloc(sizeof(int) * parse_ctx.position);
3117 if (counts == NULL)
3118 ERROR_EXIT(REG_ESPACE);
3119
3120 offs = xmalloc(sizeof(int) * parse_ctx.position);
3121 if (offs == NULL)
3122 ERROR_EXIT(REG_ESPACE);
3123
3124 for (i = 0; i < parse_ctx.position; i++)
3125 counts[i] = 0;
3126 tre_ast_to_tnfa(tree, NULL, counts, NULL);
3127
3128 add = 0;
3129 for (i = 0; i < parse_ctx.position; i++)
3130 {
3131 offs[i] = add;
3132 add += counts[i] + 1;
3133 counts[i] = 0;
3134 }
3135 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
3136 if (transitions == NULL)
3137 ERROR_EXIT(REG_ESPACE);
3138 tnfa->transitions = transitions;
3139 tnfa->num_transitions = add;
3140
3141 DPRINT(("Converting to TNFA:\n"));
3142 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
3143 if (errcode != REG_OK)
3144 ERROR_EXIT(errcode);
3145
d5f8dde1
JM
3146
3147 /* Set first_char only if there is only one character that can be the
3148 first character of a match */
5f2eab64 3149 tnfa->first_char = -1;
d5f8dde1 3150 if (!tmp_ast_l->nullable)
5f2eab64 3151 {
d5f8dde1
JM
3152 int scanning = 1;
3153 for (p = tree->firstpos; scanning && p->position >= 0; p++)
5f2eab64
JM
3154 {
3155 tre_tnfa_transition_t *j = transitions + offs[p->position];
3156 while (j->state != NULL)
3157 {
d5f8dde1 3158 if (j->code_min <= j->code_max)
5f2eab64 3159 {
d5f8dde1
JM
3160 if (j->code_max != j->code_min || j->code_min == -1 || tnfa->first_char != -1)
3161 {
3162 tnfa->first_char = -1;
3163 scanning = 0;
3164 break;
3165 }
3166 tnfa->first_char = j->code_min;
5f2eab64
JM
3167 }
3168 j++;
3169 }
3170 }
d5f8dde1
JM
3171#ifdef TRE_DEBUG
3172 if (tnfa->first_char >= 0)
3173 DPRINT(("first char must be %d\n", tnfa->first_char));
3174#endif /* TRE_DEBUG */
5f2eab64 3175 }
5f2eab64
JM
3176
3177 p = tree->firstpos;
3178 i = 0;
3179 while (p->position >= 0)
3180 {
3181 i++;
3182
3183#ifdef TRE_DEBUG
3184 {
3185 int *tags;
3186 DPRINT(("initial: %d", p->position));
3187 tags = p->tags;
3188 if (tags != NULL)
3189 {
3190 if (*tags >= 0)
3191 DPRINT(("/"));
3192 while (*tags >= 0)
3193 {
3194 DPRINT(("%d", *tags));
3195 tags++;
3196 if (*tags >= 0)
3197 DPRINT((","));
3198 }
3199 }
3200 DPRINT((", assert %d", p->assertions));
3201 if (p->params)
3202 {
3203 DPRINT((", "));
3204 tre_print_params(p->params);
3205 }
3206 DPRINT(("\n"));
3207 }
3208#endif /* TRE_DEBUG */
3209
3210 p++;
3211 }
3212
3213 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
3214 if (initial == NULL)
3215 ERROR_EXIT(REG_ESPACE);
3216 tnfa->initial = initial;
3217
3218 i = 0;
3219 for (p = tree->firstpos; p->position >= 0; p++)
3220 {
3221 initial[i].state = transitions + offs[p->position];
3222 initial[i].state_id = p->position;
3223 initial[i].tags = NULL;
3224 /* Copy the arrays p->tags, and p->params, they are allocated
3225 from a tre_mem object. */
3226 if (p->tags)
3227 {
3228 int j;
3229 for (j = 0; p->tags[j] >= 0; j++);
3230 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
3231 if (!initial[i].tags)
3232 ERROR_EXIT(REG_ESPACE);
3233 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
3234 }
3235 initial[i].params = NULL;
3236 if (p->params)
3237 {
3238 initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
3239 if (!initial[i].params)
3240 ERROR_EXIT(REG_ESPACE);
3241 memcpy(initial[i].params, p->params,
3242 sizeof(*p->params) * TRE_PARAM_LAST);
3243 }
3244 initial[i].assertions = p->assertions;
3245 i++;
3246 }
3247 initial[i].state = NULL;
3248
3249 tnfa->num_transitions = add;
3250 tnfa->final = transitions + offs[tree->lastpos[0].position];
3251 tnfa->num_states = parse_ctx.position;
3252 tnfa->cflags = cflags;
3253
d5f8dde1
JM
3254 DPRINT(("final state %d (%p)\n", tree->lastpos[0].position,
3255 (void *)tnfa->final));
5f2eab64
JM
3256
3257 tre_mem_destroy(mem);
3258 tre_stack_destroy(stack);
3259 xfree(counts);
3260 xfree(offs);
3261
d5f8dde1
JM
3262#ifdef TRE_USE_SYSTEM_REGEX_H
3263 preg->re_magic = RE_MAGIC;
3264#endif /* TRE_USE_SYSTEM_REGEX_H */
5f2eab64 3265 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
d5f8dde1 3266 xlocale_retain(tnfa->loc);
5f2eab64
JM
3267 return REG_OK;
3268
3269 error_exit:
3270 /* Free everything that was allocated and return the error code. */
3271 tre_mem_destroy(mem);
3272 if (stack != NULL)
3273 tre_stack_destroy(stack);
3274 if (counts != NULL)
3275 xfree(counts);
3276 if (offs != NULL)
3277 xfree(offs);
d5f8dde1
JM
3278
3279 /* Set tnfa into preg, so that calling tre_free() will free the contents
3280 of tnfa. NULL out the loc field since we never retained the locale. */
5f2eab64 3281 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
d5f8dde1
JM
3282 if(tnfa) tnfa->loc = NULL;
3283
5f2eab64
JM
3284 tre_free(preg);
3285 return errcode;
3286}
3287
3288
3289
3290
3291void
3292tre_free(regex_t *preg)
3293{
3294 tre_tnfa_t *tnfa;
3295 unsigned int i;
3296 tre_tnfa_transition_t *trans;
3297
d5f8dde1
JM
3298#ifdef TRE_USE_SYSTEM_REGEX_H
3299 preg->re_magic = 0;
3300#endif /* TRE_USE_SYSTEM_REGEX_H */
5f2eab64
JM
3301 tnfa = (void *)preg->TRE_REGEX_T_FIELD;
3302 if (!tnfa)
3303 return;
d5f8dde1 3304 preg->TRE_REGEX_T_FIELD = NULL;
5f2eab64
JM
3305
3306 for (i = 0; i < tnfa->num_transitions; i++)
3307 if (tnfa->transitions[i].state)
3308 {
3309 if (tnfa->transitions[i].tags)
3310 xfree(tnfa->transitions[i].tags);
d5f8dde1
JM
3311 if (tnfa->transitions[i].assertions & ASSERT_BRACKET_MATCH)
3312 xfree(tnfa->transitions[i].u.bracket_match_list);
5f2eab64
JM
3313 if (tnfa->transitions[i].params)
3314 xfree(tnfa->transitions[i].params);
3315 }
3316 if (tnfa->transitions)
3317 xfree(tnfa->transitions);
3318
3319 if (tnfa->initial)
3320 {
3321 for (trans = tnfa->initial; trans->state; trans++)
3322 {
3323 if (trans->tags)
3324 xfree(trans->tags);
3325 if (trans->params)
3326 xfree(trans->params);
3327 }
3328 xfree(tnfa->initial);
3329 }
3330
3331 if (tnfa->submatch_data)
3332 {
5f2eab64
JM
3333 xfree(tnfa->submatch_data);
3334 }
3335
3336 if (tnfa->tag_directions)
3337 xfree(tnfa->tag_directions);
5f2eab64
JM
3338 if (tnfa->minimal_tags)
3339 xfree(tnfa->minimal_tags);
d5f8dde1
JM
3340
3341 if (tnfa->loc)
3342 xlocale_release(tnfa->loc);
3343
3344 if (tnfa->last_matched_branch)
3345 xfree(tnfa->last_matched_branch);
3346
5f2eab64
JM
3347 xfree(tnfa);
3348}
3349
3350char *
3351tre_version(void)
3352{
3353 static char str[256];
3354 char *version;
3355
3356 if (str[0] == 0)
3357 {
3358 (void) tre_config(TRE_CONFIG_VERSION, &version);
3359 (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
3360 }
3361 return str;
3362}
3363
3364int
3365tre_config(int query, void *result)
3366{
3367 int *int_result = result;
3368 const char **string_result = result;
3369
3370 switch (query)
3371 {
3372 case TRE_CONFIG_APPROX:
3373#ifdef TRE_APPROX
3374 *int_result = 1;
3375#else /* !TRE_APPROX */
3376 *int_result = 0;
3377#endif /* !TRE_APPROX */
3378 return REG_OK;
3379
3380 case TRE_CONFIG_WCHAR:
3381#ifdef TRE_WCHAR
3382 *int_result = 1;
3383#else /* !TRE_WCHAR */
3384 *int_result = 0;
3385#endif /* !TRE_WCHAR */
3386 return REG_OK;
3387
3388 case TRE_CONFIG_MULTIBYTE:
3389#ifdef TRE_MULTIBYTE
3390 *int_result = 1;
3391#else /* !TRE_MULTIBYTE */
3392 *int_result = 0;
3393#endif /* !TRE_MULTIBYTE */
3394 return REG_OK;
3395
3396 case TRE_CONFIG_SYSTEM_ABI:
3397#ifdef TRE_CONFIG_SYSTEM_ABI
3398 *int_result = 1;
3399#else /* !TRE_CONFIG_SYSTEM_ABI */
3400 *int_result = 0;
3401#endif /* !TRE_CONFIG_SYSTEM_ABI */
3402 return REG_OK;
3403
3404 case TRE_CONFIG_VERSION:
3405 *string_result = TRE_VERSION;
3406 return REG_OK;
3407 }
3408
3409 return REG_NOMATCH;
3410}
3411
3412
3413/* EOF */