TRE: Add local modifications to extend functionality
[dragonfly.git] / contrib / tre / lib / tre-compile.c
1 /*
2   tre-compile.c - TRE regex compiler
3
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6
7 */
8
9 /*
10   TODO:
11    - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
12      function calls.
13 */
14
15
16 #ifdef HAVE_CONFIG_H
17 #include <config.h>
18 #endif /* HAVE_CONFIG_H */
19 #include <stdio.h>
20 #include <assert.h>
21 #include <string.h>
22 #include <limits.h>
23
24 #include "tre-internal.h"
25 #include "tre-mem.h"
26 #include "tre-stack.h"
27 #include "tre-ast.h"
28 #include "tre-parse.h"
29 #include "tre-compile.h"
30 #include "tre.h"
31 #include "xmalloc.h"
32
33 /*
34   The bit_ffs() macro in bitstring.h is flawed.  Replace it with a working one.
35 */
36 #undef bit_ffs
37 #define bit_ffs(name, nbits, value) { \
38         register bitstr_t *_name = name; \
39         register int _byte, _nbits = nbits; \
40         register int _stopbyte = _bit_byte(_nbits), _value = -1; \
41         for (_byte = 0; _byte <= _stopbyte; ++_byte) \
42                 if (_name[_byte]) { \
43                         _value = _byte << 3; \
44                         for (_stopbyte = _name[_byte]; !(_stopbyte&0x1); \
45                             ++_value, _stopbyte >>= 1); \
46                         break; \
47                 } \
48         *(value) = _value; \
49 }
50
51 /*
52   Algorithms to setup tags so that submatch addressing can be done.
53 */
54
55
56 #ifdef TRE_DEBUG
57 static const char *tag_dir_str[] = {
58   "minimize",
59   "maximize",
60   "left-maximize"
61 };
62
63 static const char _indent[] = "  ";
64
65 static void
66 print_indent(int indent)
67 {
68   while (indent-- > 0)
69     DPRINT((_indent));
70 }
71
72 static void print_last_matched_pre(tre_last_matched_pre_t *lm, int indent,
73                                    int num_tags);
74 static void
75 print_last_match_branch_pre(tre_last_matched_branch_pre_t *branch, int indent,
76                             int num_tags)
77 {
78   tre_last_matched_pre_t *u = branch->last_matched;
79   int n_last_matched = 0;
80
81   while (u)
82     {
83       n_last_matched++;
84       u = u->next;
85     }
86
87   print_indent(indent);
88   DPRINT(("BRANCH: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
89           branch->tot_branches, branch->tot_last_matched, branch->tot_tags));
90   print_indent(indent);
91   DPRINT(("..n_last_matched=%d last_matched=%d\n", branch->n_last_matched,
92           n_last_matched));
93   if (branch->n_last_matched != n_last_matched)
94     DPRINT(("*** mismatch between n_last_matched and unions ***\n"));
95   if (branch->cmp_tag > 0)
96     {
97       int i;
98       const char *sep = " tags=";
99       print_indent(indent);
100       DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
101       for (i = 0; i < num_tags; i++)
102         if (bit_test(branch->tags, i))
103           {
104             DPRINT(("%s%d", sep, i));
105             sep = ",";
106           }
107       DPRINT(("\n"));
108     }
109
110   u = branch->last_matched;
111   indent++;
112   while (u)
113     {
114       print_last_matched_pre(u, indent, num_tags);
115       u = u->next;
116     }
117 }
118
119 static void
120 print_last_matched_pre(tre_last_matched_pre_t *lm, int indent, int num_tags)
121 {
122   tre_last_matched_branch_pre_t *b = lm->branches;
123   int n_branches = 0;
124
125   while (b)
126     {
127       n_branches++;
128       b = b->next;
129     }
130
131   print_indent(indent);
132   DPRINT(("LAST_MATCHED: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
133           lm->tot_branches, lm->tot_last_matched, lm->tot_tags));
134   print_indent(indent);
135   DPRINT(("..start_tag=%d n_branches=%d branches=%d\n", lm->start_tag,
136           lm->n_branches, n_branches));
137   if (lm->n_branches != n_branches)
138     DPRINT(("*** mismatch between n and branches ***\n"));
139
140   b = lm->branches;
141   indent++;
142   while (b)
143     {
144       print_last_match_branch_pre(b, indent, num_tags);
145       b = b->next;
146     }
147 }
148
149 static void print_last_matched(tre_last_matched_t *lm, int indent);
150 static void
151 print_last_match_branch(tre_last_matched_branch_t *branch, int indent)
152 {
153   tre_last_matched_t *u;
154   int i;
155
156   print_indent(indent);
157   DPRINT(("BRANCH: n_last_matched=%d\n", branch->n_last_matched));
158   if (branch->cmp_tag > 0)
159     {
160       print_indent(indent);
161       DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
162       if (branch->n_tags > 0)
163         {
164           const char *sep = " tags=";
165           for (i = 0; i < branch->n_tags; i++)
166             {
167               DPRINT(("%s%d", sep, branch->tags[i]));
168               sep = ",";
169             }
170         }
171       DPRINT(("\n"));
172     }
173
174   u = branch->last_matched;
175   indent++;
176   for (i = branch->n_last_matched; i > 0; i--, u++)
177     print_last_matched(u, indent);
178 }
179
180 static void
181 print_last_matched(tre_last_matched_t *lm, int indent)
182 {
183   int i;
184   tre_last_matched_branch_t *b;
185
186   print_indent(indent);
187   DPRINT(("LAST_MATCHED: n_branches=%d start_tag=%d\n", lm->n_branches,
188           lm->start_tag));
189
190   b = lm->branches;
191   indent++;
192   for (i = lm->n_branches; i > 0; i--, b++)
193     print_last_match_branch(b, indent);
194 }
195 #endif /* TRE_DEBUG */
196
197
198 /* Merge the tre_last_matched_branch_pre_t of src into dst, creating a new
199    one if needed.  If tag_id > 0, add that tag as well (a negative tag_id will
200    create an unset tre_last_matched_branch_pre_t. */
201 static reg_errcode_t
202 tre_merge_branches(tre_mem_t mem, tre_ast_node_t *dst, tre_ast_node_t *src,
203                    int tag_id, int num_tags)
204 {
205   tre_last_matched_branch_pre_t *db = dst->last_matched_branch;
206   tre_last_matched_branch_pre_t *sb = (src ? src->last_matched_branch : NULL);
207
208   if (db)
209     {
210       if (sb)
211         {
212           bitstr_t *l = db->tags;
213           bitstr_t *r = sb->tags;
214           int i = bitstr_size(num_tags);
215
216           while(i-- > 0)
217             *l++ |= *r++;
218           /* db and sb are the info from two parallel sub-trees, so the tags
219              must be mutually exclusive, and we can just add their numbers */
220           db->n_tags += sb->n_tags;
221           db->tot_tags += sb->tot_tags;
222           if (db->last_matched)
223             {
224               if (sb->last_matched)
225                 {
226                   tre_last_matched_pre_t *u = db->last_matched;
227
228                   while(u->next)
229                     u = u->next;
230                   u->next = sb->last_matched;
231                   db->n_last_matched += sb->n_last_matched;
232                   db->tot_branches += sb->tot_branches;
233                   db->tot_last_matched += sb->tot_last_matched;
234                 }
235             }
236             else if (sb->last_matched)
237               {
238                 db->last_matched = sb->last_matched;
239                 db->n_last_matched = sb->n_last_matched;
240                 db->tot_branches = sb->tot_branches;
241                 db->tot_last_matched = sb->tot_last_matched;
242               }
243         }
244     }
245   else
246     db = sb;
247
248   if (tag_id != 0)
249     {
250       if (!db)
251         {
252           db = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
253                               + bitstr_size(num_tags));
254           if (db == NULL)
255             return REG_ESPACE;
256           db->tot_branches = 1;
257         }
258       if (tag_id > 0)
259         {
260           /* tag_id is a new tag, and shouldn't exist in db's tags,
261              so we can always increment n_tags */
262           bit_set(db->tags, tag_id);
263           db->n_tags++;
264           db->tot_tags++;
265         }
266     }
267   dst->last_matched_branch = db;
268   return REG_OK;
269 }
270
271
272 /* Inserts a catenation node to the root of the tree given in `node'.
273    As the left child a new tag with number `tag_id' to `node' is added,
274    and the right child is the old root. */
275 static reg_errcode_t
276 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
277 {
278   tre_catenation_t *c;
279
280   DPRINT(("add_tag_left: tag %d\n", tag_id));
281
282   c = tre_mem_alloc(mem, sizeof(*c));
283   if (c == NULL)
284     return REG_ESPACE;
285   c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
286   if (c->left == NULL)
287     return REG_ESPACE;
288   c->right = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
289   if (c->right == NULL)
290     return REG_ESPACE;
291
292   c->right->obj = node->obj;
293   c->right->type = node->type;
294   c->right->last_matched_branch = node->last_matched_branch;
295   c->right->nullable = -1;
296   c->right->submatch_id = -1;
297   node->obj = c;
298   node->type = CATENATION;
299   node->original = c->right;
300   return REG_OK;
301 }
302
303 /* Inserts a catenation node to the root of the tree given in `node'.
304    As the right child a new tag with number `tag_id' to `node' is added,
305    and the left child is the old root. */
306 static reg_errcode_t
307 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
308 {
309   tre_catenation_t *c;
310
311   DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
312
313   c = tre_mem_alloc(mem, sizeof(*c));
314   if (c == NULL)
315     return REG_ESPACE;
316   c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
317   if (c->right == NULL)
318     return REG_ESPACE;
319   c->left = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
320   if (c->left == NULL)
321     return REG_ESPACE;
322
323   c->left->obj = node->obj;
324   c->left->type = node->type;
325   c->left->last_matched_branch = node->last_matched_branch;
326   c->left->nullable = -1;
327   c->left->submatch_id = -1;
328   node->obj = c;
329   node->type = CATENATION;
330   node->original = c->left;
331   return REG_OK;
332 }
333
334 typedef enum {
335   ADDTAGS_RECURSE,
336   ADDTAGS_RECURSE_NOT_TOP_UNION,
337   ADDTAGS_AFTER_ITERATION,
338   ADDTAGS_AFTER_UNION_LEFT,
339   ADDTAGS_AFTER_UNION_RIGHT,
340   ADDTAGS_AFTER_CAT_LEFT,
341   ADDTAGS_AFTER_CAT_RIGHT,
342   ADDTAGS_SET_SUBMATCH_END,
343   ADDTAGS_UNION_RECURSE,
344   ADDTAGS_UNION_RIGHT_RECURSE,
345   ADDTAGS_AFTER_UNION_TOP,
346 } tre_addtags_symbol_t;
347
348 enum {
349   COPY_LAST_MATCHED_BRANCH,
350   COPY_LAST_MATCHED_BRANCH_NEXT,
351   COPY_LAST_MATCHED,
352   COPY_LAST_MATCHED_NEXT,
353 };
354
355
356 #define REGSET_UNSET            ((unsigned)-1)
357
358 /* Go through `regset' and set submatch data for submatches that are
359    using this tag. */
360 static void
361 tre_purge_regset(unsigned *regset, tre_tnfa_t *tnfa, int tag)
362 {
363   int i;
364
365   for (i = 0; regset[i] != REGSET_UNSET; i++)
366     {
367       int id = regset[i] / 2;
368       int start = !(regset[i] % 2);
369       if (id >= SUBMATCH_ID_INVISIBLE_START)
370         continue;
371       DPRINT(("  Using tag %d for %s offset of "
372               "submatch %d\n", tag,
373               start ? "start" : "end", id));
374       if (start)
375         tnfa->submatch_data[id].so_tag = tag;
376       else
377         tnfa->submatch_data[id].eo_tag = tag;
378     }
379   regset[0] = -1;
380 }
381
382
383 #define REGSET_HAS_STARTS       0x1
384 #define REGSET_HAS_ENDS         0x2
385
386
387 /* Adds tags to appropriate locations in the parse tree in `tree', so that
388    subexpressions marked for submatch addressing can be traced. */
389 static reg_errcode_t
390 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
391              tre_tnfa_t *tnfa)
392 {
393   reg_errcode_t status = REG_OK;
394   tre_addtags_symbol_t symbol;
395   tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
396   int bottom = tre_stack_num_objects(stack);
397   /* True for first pass (counting number of needed tags) */
398   int first_pass = (mem == NULL || tnfa == NULL);
399   unsigned *regset, *orig_regset;
400   int regset_contains = 0;
401   int num_tags = 0; /* Total number of tags. */
402   int num_minimals = 0;  /* Number of special minimal tags. */
403   int tag = 0;      /* The tag that is to be added next. */
404   int next_tag = 1; /* Next tag to use after this one. */
405   int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
406   int *reorder_tags = NULL; /* Tag reorder array: a pair for each reorder,
407                              * the first is the tag to reorder, the second
408                              * is the tag after which the first is reordered */
409   int *rtp;                 /* Pointer used to fill in reorder_tags and
410                              * tag_order */
411   int *to_reorder;          /* Transform array converting sequential order to
412                              * that specified by reorder_tags */
413   int id;
414
415   tre_tag_direction_t direction = TRE_TAG_LEFT_MAXIMIZE;
416   if (!first_pass)
417     {
418       DPRINT(("Initializing direction to %s\n", tag_dir_str[direction]));
419       tnfa->end_tag = 0;
420       tnfa->minimal_tags[0] = -1;
421     }
422
423   regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches
424                    + tnfa->num_submatches_invisible + 1) * 2));
425   if (regset == NULL)
426     {
427       status = REG_ESPACE;
428       goto error_regset;
429     }
430   regset[0] = REGSET_UNSET;
431   orig_regset = regset;
432
433   if (!first_pass)
434     {
435       /* Allocate all memory for reorder_tags, tag_order, to_seq_order and
436        * to_reorder in one batch (assuming all are the same type) */
437       rtp = reorder_tags = xmalloc(sizeof(*reorder_tags) *
438                                    ((2 * tnfa->num_reorder_tags + 1) +
439                                    tnfa->num_tags));
440       if (reorder_tags == NULL)
441         {
442           status = REG_ESPACE;
443           goto error_reorder_tags;
444         }
445       to_reorder = reorder_tags + (2 * tnfa->num_reorder_tags + 1);
446     }
447
448   STACK_PUSH(stack, voidptr, node);
449   STACK_PUSH(stack, int, ADDTAGS_RECURSE);
450
451   while (tre_stack_num_objects(stack) > bottom)
452     {
453       if (status != REG_OK)
454         break;
455
456       symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
457       switch (symbol)
458         {
459           int top_union;
460
461         case ADDTAGS_SET_SUBMATCH_END:
462           {
463             int i;
464
465             id = tre_stack_pop_int(stack);
466             node = tre_stack_pop_voidptr(stack);
467             /* Add end of this submatch to regset. */
468             for (i = 0; regset[i] != REGSET_UNSET; i++);
469             regset[i] = id * 2 + 1;
470             regset[i + 1] = -1;
471             regset_contains |= REGSET_HAS_ENDS;
472
473             /* Always put a tag after a minimal iterator. */
474             if (minimal_tag >= 0)
475               {
476                 if (first_pass)
477                   {
478                     node->num_tags++;
479                     DPRINT(("  ADDTAGS_SET_SUBMATCH_END: node->num_tags = %d\n",
480                             node->num_tags));
481                   }
482                 else
483                   {
484                     int i;
485                     status = tre_merge_branches(mem, node, NULL, tag,
486                                                 tnfa->num_tags);
487                     if (status != REG_OK)
488                       break;
489                     status = tre_add_tag_right(mem, node, tag);
490                     if (status != REG_OK)
491                       break;
492                     tnfa->tag_directions[tag] = TRE_TAG_MINIMIZE;
493                     DPRINT(("Setting t%d direction to %s\n", tag,
494                             tag_dir_str[tnfa->tag_directions[tag]]));
495                     DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
496                     for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
497                     tnfa->minimal_tags[i] = tag;
498                     tnfa->minimal_tags[i + 1] = minimal_tag;
499                     tnfa->minimal_tags[i + 2] = -1;
500
501                     DPRINT(("  Minimal end: t%d reordered to "
502                             "after t%d\n", tag, minimal_tag));
503                     /* Append to tag_order, move "tag" after
504                      * "minimal_tag" */
505                     *rtp++ = tag;
506                     *rtp++ = minimal_tag;
507
508                     num_minimals++;
509                     tre_purge_regset(regset, tnfa, tag);
510                   }
511
512                 minimal_tag = -1;
513                 DPRINT(("  ADDTAGS_SET_SUBMATCH_END num_tags++ tag=%d\n", tag));
514                 regset[0] = REGSET_UNSET;
515                 regset_contains = 0;
516                 tag = next_tag;
517                 num_tags++;
518                 next_tag++;
519               }
520             break;
521           }
522
523         case ADDTAGS_RECURSE_NOT_TOP_UNION:
524           /* Like ADDTAGS_RECURSE, except that top_union is set to zero,
525            * indicating that if a union is being processed, it is not the
526            * top-most of a series */
527           top_union = 0;
528           goto do_addtags_recurse;
529
530         case ADDTAGS_RECURSE:
531           /* Setting top_union to 1 means that if a union is begin processed,
532            * it is the top-most of a series, and should recurse through the
533            * series to set the left_tag and right_tag values */
534           top_union = 1;
535
536 do_addtags_recurse:
537           node = tre_stack_pop_voidptr(stack);
538
539           id = node->submatch_id;
540           if (id >= 0)
541             {
542               int i;
543
544
545               /* Add start of this submatch to regset. */
546               for (i = 0; regset[i] != REGSET_UNSET; i++);
547               regset[i] = id * 2;
548               regset[i + 1] = -1;
549               regset_contains |= REGSET_HAS_STARTS;
550
551               /* Add end of this submatch to regset after processing this
552                  node. */
553               STACK_PUSH(stack, voidptr, node);
554               STACK_PUSHX(stack, int, id);
555               STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
556             }
557
558           switch (node->type)
559             {
560             case LITERAL:
561               {
562                 tre_literal_t *lit = node->obj;
563
564                 if (!IS_SPECIAL(lit) || IS_BACKREF(lit) || IS_EMPTY(lit) || IS_ASSERTION(lit))
565                   {
566                     DPRINT(("Literal %d-%d\n",
567                             (int)lit->code_min, (int)lit->code_max));
568                     if (regset_contains)
569                       {
570                         /* Regset is not empty, so add a tag before the
571                            literal or backref. */
572                         if (first_pass)
573                           {
574                             DPRINT(("  ADDTAGS_RECURSE:LITERAL node->num_tags = 1\n"));
575                             node->num_tags = 1;
576                           }
577                         else
578                           {
579                             status = tre_merge_branches(mem, node, NULL, tag,
580                                                         tnfa->num_tags);
581                             if (status != REG_OK)
582                               break;
583                             status = tre_add_tag_left(mem, node, tag);
584                             if (status != REG_OK)
585                               break;
586                             if (regset_contains == REGSET_HAS_STARTS)
587                               tnfa->tag_directions[tag] = TRE_TAG_LEFT_MAXIMIZE;
588                             else
589                               tnfa->tag_directions[tag] = direction;
590                             DPRINT(("Setting t%d direction to %s\n", tag,
591                                     tag_dir_str[tnfa->tag_directions[tag]]));
592                             tre_purge_regset(regset, tnfa, tag);
593
594                             if (IS_BACKREF(lit))
595                               {
596                                 int b = lit->code_max;
597                                 int t = tnfa->submatch_data[b].so_tag;
598                                 /* Fail if the referenced submatch hasn't been
599                                  * completed yet */
600                                 if (tnfa->submatch_data[b].eo_tag < 0)
601                                   {
602                                     status = REG_ESUBREG;
603                                     break;
604                                   }
605                                 if (t < tag)
606                                   {
607                                     DPRINT(("  Backref %d start: "
608                                             "t%d reordered to before t%d\n",
609                                             b, tag, t));
610                                     if(t > 0)
611                                       t--;
612                                     /* Append to tag_order, move "tag" after
613                                      * "t" */
614                                     *rtp++ = tag;
615                                     *rtp++ = t;
616                                   }
617 #if TRE_DEBUG
618                                 else
619                                   DPRINT(("  Backref %d start: "
620                                           "(t%d already before t%d)\n",
621                                             b, tag, t));
622 #endif /* TRE_DEBUG */
623                               }
624                           }
625
626                         DPRINT(("  ADDTAGS_RECURSE:LITERAL num_tags++ tag=%d\n",
627                                 tag));
628                         regset[0] = REGSET_UNSET;
629                         regset_contains = 0;
630                         tag = next_tag;
631                         num_tags++;
632                         next_tag++;
633                       }
634                   }
635                 else
636                   {
637                     assert(!IS_TAG(lit));
638                   }
639                 break;
640               }
641             case CATENATION:
642               {
643                 tre_catenation_t *cat = node->obj;
644                 tre_ast_node_t *left = cat->left;
645                 tre_ast_node_t *right = cat->right;
646                 int reserved_tag = -1;
647                 DPRINT(("Catenation, next_tag = %d\n", next_tag));
648
649
650                 /* After processing right child. */
651                 STACK_PUSHX(stack, voidptr, node);
652                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
653
654                 /* Process right child. */
655                 STACK_PUSHX(stack, voidptr, right);
656                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
657
658                 /* After processing left child. */
659                 STACK_PUSHX(stack, int, next_tag + left->num_tags);
660                 DPRINT(("  Pushing %d for after left\n",
661                         next_tag + left->num_tags));
662                 if (left->num_tags > 0 && right->num_tags > 0)
663                   {
664                     /* Reserve the next tag to the right child. */
665                     DPRINT(("  ADDTAGS_RECURSE:CATENATION num_tags++ "
666                             "Reserving next_tag %d to right child\n",
667                             next_tag));
668                     reserved_tag = next_tag;
669                     next_tag++;
670                   }
671                 STACK_PUSHX(stack, int, reserved_tag);
672                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
673
674                 /* Process left child. */
675                 STACK_PUSHX(stack, voidptr, left);
676                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
677
678                 }
679               break;
680             case ITERATION:
681               {
682                 tre_iteration_t *iter = node->obj;
683                 DPRINT(("Iteration\n"));
684
685                 if (first_pass)
686                   STACK_PUSHX(stack, int, regset_contains != 0);
687                 STACK_PUSHX(stack, int, tag);
688                 STACK_PUSHX(stack, voidptr, node);
689                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
690
691                 STACK_PUSHX(stack, voidptr, iter->arg);
692                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
693
694                 /* Regset is not empty, so add a tag here (this always happens
695                    because iterators always get submatch id, even if in the
696                    invisible range) */
697                 if (regset_contains)
698                   {
699                     if (!first_pass)
700                       {
701                         status = tre_merge_branches(mem, node, NULL, tag,
702                                                     tnfa->num_tags);
703                         if (status != REG_OK)
704                           break;
705                         status = tre_add_tag_left(mem, node, tag);
706                         if (status != REG_OK)
707                           break;
708                         if (regset_contains == REGSET_HAS_STARTS && tag != 0)
709                           tnfa->tag_directions[tag] = iter->minimal ?
710                                                       TRE_TAG_MINIMIZE :
711                                                       TRE_TAG_LEFT_MAXIMIZE;
712                         else
713                           tnfa->tag_directions[tag] = direction;
714                         DPRINT(("Setting t%d direction to %s\n", tag,
715                                 tag_dir_str[tnfa->tag_directions[tag]]));
716                         tre_purge_regset(regset, tnfa, tag);
717                       }
718
719                     DPRINT(("  ADDTAGS_RECURSE:ITERATION num_tags++ tag=%d\n",
720                             tag));
721                     regset[0] = REGSET_UNSET;
722                     regset_contains = 0;
723                     tag = next_tag;
724                     num_tags++;
725                     next_tag++;
726                   }
727                 direction = TRE_TAG_LEFT_MAXIMIZE;
728                 DPRINT(("  Setting direction to %s\n", tag_dir_str[direction]));
729               }
730               break;
731             case UNION:
732               {
733                 tre_union_t *uni;
734                 tre_ast_node_t *left;
735                 tre_ast_node_t *right;
736                 int front_tag = -1;
737
738                 DPRINT(("Union\n"));
739
740                 if (regset_contains)
741                   {
742                     DPRINT(("  UNION num_tags++ tag=%d\n", tag));
743                     front_tag = tag;
744                     tag = next_tag;
745                     num_tags++;
746                     next_tag++;
747                   }
748
749                 /* For the top union, walk the tree of consecutive unions,
750                  * setting the left_tag and right_tag values in increasing
751                  * order (left to right priority) */
752                 if (top_union &&
753                     (node->num_submatches -
754                     (node->submatch_id >= 0 &&
755                     node->submatch_id < SUBMATCH_ID_INVISIBLE_START)) > 0)
756                   {
757                     tre_ast_node_t *n;
758                     int last = tre_stack_num_objects(stack);
759
760                     STACK_PUSH(stack, voidptr, node);
761                     STACK_PUSH(stack, int, ADDTAGS_UNION_RECURSE);
762
763                     while (tre_stack_num_objects(stack) > last)
764                       {
765                         symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
766                         switch (symbol)
767                           {
768                           case ADDTAGS_UNION_RECURSE:
769                             n = tre_stack_pop_voidptr(stack);
770                             uni = n->obj;
771                             left = uni->left;
772
773                             /* Since the top union has num_submatches > 0,
774                              * we set all the consecutive union's
775                              * make_branches to 1 to force the generation
776                              * of end tags for each union branch. */
777                             n->make_branches = 1;
778
779                             STACK_PUSH(stack, voidptr, n);
780                             STACK_PUSH(stack, int,
781                                        ADDTAGS_UNION_RIGHT_RECURSE);
782
783                             if (left->type == UNION)
784                               {
785                                 STACK_PUSH(stack, voidptr, left);
786                                 STACK_PUSH(stack, int,
787                                            ADDTAGS_UNION_RECURSE);
788                               }
789                             else
790                               {
791                                 DPRINT(("  ADDTAGS_UNION_RECURSE "
792                                         "num_tags++ tag=%d\n", tag));
793                                 uni->left_tag = tag;
794                                 tag = next_tag;
795                                 num_tags++;
796                                 next_tag++;
797                               }
798                             break;
799
800                           case ADDTAGS_UNION_RIGHT_RECURSE:
801                             n = tre_stack_pop_voidptr(stack);
802                             uni = n->obj;
803                             right = uni->right;
804
805                             if (right->type == UNION)
806                               {
807                                 STACK_PUSH(stack, voidptr, right);
808                                 STACK_PUSH(stack, int,
809                                            ADDTAGS_UNION_RECURSE);
810                               }
811                             else
812                               {
813                                 DPRINT(("  ADDTAGS_UNION_RIGHT_RECURSE "
814                                         "num_tags++ tag=%d\n", tag));
815                                 uni->right_tag = tag;
816                                 tag = next_tag;
817                                 num_tags++;
818                                 next_tag++;
819                               }
820
821                             break;
822
823                           default:
824                             assert(0);
825                             break;
826
827                           } /* end switch(symbol) */
828                       } /* end while(tre_stack_num_objects(stack) > last */
829                     if (!first_pass)
830                       {
831                         STACK_PUSHX(stack, int, front_tag);
832                         STACK_PUSHX(stack, voidptr, node);
833                         STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_TOP);
834                       }
835                   } /* end if (top_union && ...) */
836
837                 uni = node->obj;
838                 left = uni->left;
839                 right = uni->right;
840
841                 /* After processing right child. */
842                 STACK_PUSHX(stack, voidptr, regset);
843                 STACK_PUSHX(stack, int, regset_contains != 0);
844                 STACK_PUSHX(stack, voidptr, node);
845                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
846
847                 /* Process right child. */
848                 STACK_PUSHX(stack, voidptr, right);
849                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
850
851                 /* After processing left child. */
852                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
853
854                 /* Process left child. */
855                 STACK_PUSHX(stack, voidptr, left);
856                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
857
858                 /* Regset is not empty, so add a tag here. */
859                 if (regset_contains)
860                   {
861                     if (!first_pass)
862                       {
863                         status = tre_merge_branches(mem, node, NULL, front_tag,
864                                                     tnfa->num_tags);
865                         if (status != REG_OK)
866                           break;
867                         status = tre_add_tag_left(mem, node, front_tag);
868                         if (status != REG_OK)
869                           break;
870                         if (regset_contains == REGSET_HAS_STARTS)
871                           tnfa->tag_directions[front_tag] = TRE_TAG_LEFT_MAXIMIZE;
872                         else
873                           tnfa->tag_directions[front_tag] = direction;
874                         DPRINT(("Setting t%d direction to %s\n", front_tag,
875                                 tag_dir_str[tnfa->tag_directions[front_tag]]));
876                         tre_purge_regset(regset, tnfa, front_tag);
877                       }
878
879                     regset[0] = REGSET_UNSET;
880                     regset_contains = 0;
881                   }
882
883                 break;
884               }
885             } /* end switch (node->type) */
886
887           break; /* end case: ADDTAGS_RECURSE */
888
889         case ADDTAGS_AFTER_ITERATION:
890           {
891             tre_iteration_t *iter;
892             tre_ast_node_t *orig;
893             int enter_tag;
894
895             node = tre_stack_pop_voidptr(stack);
896             orig = node->original ? node->original : node;
897             iter = (tre_iteration_t *)orig->obj;
898             enter_tag = tre_stack_pop_int(stack);
899             if (iter->minimal)
900               minimal_tag = enter_tag;
901
902             DPRINT(("After iteration\n"));
903             if (first_pass)
904               {
905                 node->num_tags = iter->arg->num_tags + tre_stack_pop_int(stack);
906                 DPRINT(("  ADDTAGS_AFTER_ITERATION: node->num_tags = %d\n",
907                         node->num_tags));
908               }
909             else
910               {
911                 /* node->last_matched_branch will have the start tag (the tag
912                    just *before* the iteration).  iter->arg->last_matched_branch
913                    will have the tag(s) inside the iteration, the ones that
914                    may need to be reset if the iteration doesn't match.  So
915                    before we merge iter->arg into node, we need to set up
916                    a new tre_last_matched_t and tre_last_matched_branch_t,
917                    using any of the inside tags as cmp_tag (we choose the first
918                    tag found by bit_ffs).  If there are no inside tags, we
919                    don't bother creating the extra structures. */
920                 tre_last_matched_branch_pre_t *b =
921                                                 iter->arg->last_matched_branch;
922
923                 if (b && b->n_tags > 0)
924                   {
925                     tre_last_matched_pre_t *u;
926
927                     bit_ffs(b->tags, num_tags, &b->cmp_tag);
928                     DPRINT(("  ADDTAGS_AFTER_ITERATION: n_tags=%d "
929                             "cmp_tag = %d\n", b->n_tags, b->cmp_tag));
930
931                     u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t) +
932                                        sizeof(tre_last_matched_branch_pre_t)
933                                        + bitstr_size(tnfa->num_tags));
934                     if (!u)
935                       {
936                         status = REG_ESPACE;
937                         break;
938                       }
939                     u->branches = b;
940                     u->n_branches = 1;
941                     u->start_tag = b->cmp_tag;
942                     u->tot_branches = b->tot_branches;
943                     u->tot_last_matched = 1 + b->tot_last_matched;
944                     u->tot_tags = b->tot_tags;
945
946                     b = (tre_last_matched_branch_pre_t *)(u + 1);
947                     b->last_matched = u;
948                     b->n_last_matched = 1;
949                     b->tot_branches = 1 + u->tot_branches;
950                     b->tot_last_matched = u->tot_last_matched;
951                     b->tot_tags = u->tot_tags;
952
953                     iter->arg->last_matched_branch = b;
954                   }
955                 status = tre_merge_branches(mem, node, iter->arg, 0,
956                                             tnfa->num_tags);
957                 if (status != REG_OK)
958                   break;
959
960                 if (iter->minimal)
961                   {
962                     /* Add a union with a left EMPTY literal and the right
963                        being iter->arg.  This should force the tags inside
964                        the minimal iteration to prefer being unset */
965                     if (iter->min == 0 && iter->max <= 1)
966                       {
967                         tre_ast_node_t *u, *e;
968
969                         e = tre_ast_new_literal(mem, EMPTY, -1, -1);
970                         if (e == NULL)
971                           {
972                             status = REG_ESPACE;
973                             break;
974                           }
975                         u = tre_ast_new_union(mem, e, iter->arg);
976                         if (u == NULL)
977                           {
978                             status = REG_ESPACE;
979                             break;
980                           }
981                         iter->arg = u;
982                       }
983
984                     direction = TRE_TAG_MINIMIZE;
985                   }
986                 else
987                   direction = TRE_TAG_MAXIMIZE;
988                 DPRINT(("  Setting direction to %s\n", tag_dir_str[direction]));
989               }
990             break;
991           }
992
993         case ADDTAGS_AFTER_CAT_LEFT:
994           {
995             int new_tag = tre_stack_pop_int(stack);
996             next_tag = tre_stack_pop_int(stack);
997             DPRINT(("After cat left, tag = %d, next_tag = %d\n",
998                     tag, next_tag));
999             if (new_tag >= 0)
1000               {
1001                 DPRINT(("  Setting tag to %d\n", new_tag));
1002                 tag = new_tag;
1003               }
1004             break;
1005           }
1006
1007         case ADDTAGS_AFTER_CAT_RIGHT:
1008           {
1009             tre_catenation_t *cat;
1010
1011             DPRINT(("After cat right\n"));
1012             node = tre_stack_pop_voidptr(stack);
1013             cat = node->obj;
1014             if (first_pass)
1015               {
1016                 node->num_tags = cat->left->num_tags + cat->right->num_tags;
1017                 DPRINT(("  ADDTAGS_AFTER_CAT_RIGHT: node->num_tags = %d\n",
1018                         node->num_tags));
1019               }
1020             else
1021               {
1022                 status = tre_merge_branches(mem, cat->left, cat->right, 0,
1023                                             tnfa->num_tags);
1024                 if (status != REG_OK)
1025                   break;
1026                 status = tre_merge_branches(mem, node, cat->left, 0,
1027                                             tnfa->num_tags);
1028               }
1029             break;
1030           }
1031
1032         case ADDTAGS_AFTER_UNION_LEFT:
1033           DPRINT(("After union left\n"));
1034           /* Lift the bottom of the `regset' array so that when processing
1035              the right operand the items currently in the array are
1036              invisible.  The original bottom was saved at ADDTAGS_UNION and
1037              will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1038           while (*regset != REGSET_UNSET)
1039             regset++;
1040           regset_contains = 0;
1041           break;
1042
1043         case ADDTAGS_AFTER_UNION_RIGHT:
1044           {
1045             int added_tags;
1046             tre_ast_node_t *orig;
1047             tre_union_t *uni;
1048             /* Note: node may not be a UNION, but a CATENATION with a left
1049              * tag.  So that is why we pass the original node->obj on the
1050              * stack, to get the union's true values. */
1051
1052             DPRINT(("After union right\n"));
1053             node = tre_stack_pop_voidptr(stack);
1054             orig = node->original ? node->original : node;
1055             uni = (tre_union_t *)orig->obj;
1056             added_tags = tre_stack_pop_int(stack);
1057             if (first_pass)
1058               {
1059                 node->num_tags = uni->left->num_tags + uni->right->num_tags
1060                                  + added_tags;
1061                 if (uni->left_tag > 0)
1062                   node->num_tags++;
1063                 if (uni->right_tag > 0)
1064                   node->num_tags++;
1065                 DPRINT(("  ADDTAGS_AFTER_UNION_RIGHT: node->num_tags = %d\n",
1066                         node->num_tags));
1067               }
1068             regset = tre_stack_pop_voidptr(stack);
1069
1070             /* Add tags after both children, the left child gets a smaller
1071                tag than the right child.  This guarantees that we prefer
1072                the left child over the right child. */
1073             /* XXX - This is not always necessary (if the children have
1074                tags which must be seen for every match of that child). */
1075             if (!first_pass && node->make_branches)
1076               {
1077                 tre_last_matched_branch_pre_t *lb =
1078                   uni->left->last_matched_branch;
1079                 tre_last_matched_branch_pre_t *rb =
1080                   uni->right->last_matched_branch;
1081                 tre_last_matched_pre_t *lu =
1082                   uni->left->last_matched_in_progress;
1083                 tre_last_matched_pre_t *ru =
1084                   uni->right->last_matched_in_progress;
1085                 tre_last_matched_pre_t *u;
1086                 /* We don't need to call tre_merge_branches because these
1087                  * tags don't participate in submatch ranges, so don't need
1088                  * to be recorded.  But we do set the cmp_tag entry of the
1089                  * tre_last_matched_branch_pre_t, so we might call
1090                  * tre_merge_branches if we need to create an empty
1091                  * tre_last_matched_branch_pre_t. */
1092                 if (uni->left_tag > 0)
1093                   {
1094                     DPRINT(("Setting t%d direction to maximize\n",
1095                             uni->left_tag));
1096                     status = tre_add_tag_right(mem, uni->left, uni->left_tag);
1097                     if (status != REG_OK)
1098                       break;
1099                     tnfa->tag_directions[uni->left_tag] = TRE_TAG_MAXIMIZE;
1100                     if (!lb)
1101                       {
1102                         status = tre_merge_branches(mem, uni->left, NULL, -1,
1103                                                     tnfa->num_tags);
1104                         if (status != REG_OK)
1105                           break;
1106                         lb = uni->left->last_matched_branch;
1107                       }
1108                     lb->cmp_tag = uni->left_tag;
1109                   }
1110                 if (uni->right_tag > 0)
1111                   {
1112                     DPRINT(("Setting t%d direction to maximize\n",
1113                             uni->right_tag));
1114                     status = tre_add_tag_right(mem, uni->right, uni->right_tag);
1115                     if (status != REG_OK)
1116                       break;
1117                     tnfa->tag_directions[uni->right_tag] = TRE_TAG_MAXIMIZE;
1118                     if (!rb)
1119                       {
1120                         status = tre_merge_branches(mem, uni->right, NULL, -1,
1121                                                     tnfa->num_tags);
1122                         if (status != REG_OK)
1123                           break;
1124                         rb = uni->right->last_matched_branch;
1125                       }
1126                     rb->cmp_tag = uni->right_tag;
1127                   }
1128                 /* Now merge the tre_last_matched_branch_pre_t into a
1129                    tre_last_matched_pre_t */
1130                 if (lu == NULL)
1131                   {
1132                     if (ru == NULL)
1133                       {
1134                         /* Create a new tre_last_matched_pre_t */
1135                         u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t));
1136                         if (!u)
1137                           {
1138                             status = REG_ESPACE;
1139                             break;
1140                           }
1141                         u->tot_last_matched = 1;
1142
1143                         if (lb)
1144                           {
1145                             u->branches = lb;
1146                             u->n_branches = 1;
1147                             u->tot_branches += lb->tot_branches;
1148                             u->tot_last_matched += lb->tot_last_matched;
1149                             u->tot_tags += lb->tot_tags;
1150                             if (rb)
1151                               {
1152                                 lb->next = rb;
1153                                 u->n_branches++;
1154                                 u->tot_branches += rb->tot_branches;
1155                                 u->tot_last_matched += rb->tot_last_matched;
1156                                 u->tot_tags += rb->tot_tags;
1157                               }
1158                           }
1159                         else if (rb)
1160                           {
1161                             u->branches = rb;
1162                             u->n_branches = 1;
1163                             u->tot_branches += rb->tot_branches;
1164                             u->tot_last_matched += rb->tot_last_matched;
1165                             u->tot_tags += rb->tot_tags;
1166                           }
1167                       }
1168                     else
1169                       {
1170                         /* Use ru, and add lb */
1171                         u = ru;
1172                         if (lb)
1173                           {
1174                             lb->next = u->branches;
1175                             u->branches = lb;
1176                             u->n_branches++;
1177                             u->tot_branches += lb->tot_branches;
1178                             u->tot_last_matched += lb->tot_last_matched;
1179                             u->tot_tags += lb->tot_tags;
1180                           }
1181                       }
1182                   }
1183                 else if (ru == NULL)
1184                   {
1185                     /* Use lu, and add rb */
1186                     u = lu;
1187                     if (rb)
1188                       {
1189                         rb->next = u->branches;
1190                         u->branches = rb;
1191                         u->n_branches++;
1192                         u->tot_branches += rb->tot_branches;
1193                         u->tot_last_matched += rb->tot_last_matched;
1194                         u->tot_tags += rb->tot_tags;
1195                       }
1196                   }
1197                 else
1198                   {
1199                     /* Merge lu and ru into lu */
1200                     if (lu->branches)
1201                       {
1202                         if (ru->branches)
1203                           {
1204                             tre_last_matched_branch_pre_t *b = lu->branches;
1205                             while (b->next) b = b->next;
1206                             b->next = ru->branches;
1207                             lu->n_branches += ru->n_branches;
1208                           }
1209                       }
1210                     else if (ru->branches)
1211                       {
1212                         lu->branches = ru->branches;
1213                         lu->n_branches = ru->n_branches;
1214                       }
1215                     lu->tot_branches += ru->tot_branches;
1216                     lu->tot_last_matched += ru->tot_last_matched - 1;
1217                     lu->tot_tags += ru->tot_tags;
1218                     u = lu;
1219                   }
1220                 node->last_matched_in_progress = u;
1221               }
1222             direction = TRE_TAG_MAXIMIZE;
1223             break;
1224           }
1225
1226         case ADDTAGS_AFTER_UNION_TOP: /* only called when not first_pass */
1227           {
1228             tre_last_matched_branch_pre_t *b;
1229             tre_last_matched_pre_t *u;
1230             int start_tag;
1231
1232             DPRINT(("After union top\n"));
1233             node = tre_stack_pop_voidptr(stack);
1234             start_tag = tre_stack_pop_int(stack);
1235             b = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
1236                                + bitstr_size(tnfa->num_tags));
1237             if (!b)
1238               {
1239                 status = REG_ESPACE;
1240                 break;
1241               }
1242
1243             u = node->last_matched_in_progress;
1244             u->start_tag = start_tag;
1245             b->tot_branches = 1 + u->tot_branches;
1246             b->tot_last_matched = u->tot_last_matched;
1247             b->tot_tags = u->tot_tags;
1248             b->last_matched = u;
1249             b->n_last_matched = 1;
1250             node->last_matched_branch = b;
1251             node->last_matched_in_progress = NULL;
1252             break;
1253           }
1254
1255         default:
1256           assert(0);
1257           break;
1258
1259         } /* end switch(symbol) */
1260     } /* end while(tre_stack_num_objects(stack) > bottom) */
1261
1262   if (status != REG_OK)
1263     {
1264       DPRINT(("Error during %s pass\n", first_pass ? "first" : "second"));
1265       goto error_post_compile;
1266     }
1267
1268   if (!first_pass)
1269     {
1270       int i;
1271       if (num_tags != tnfa->num_tags)
1272         {
1273           DPRINT(("num_tags(%d) != tnfa->num_tags(%d)\n", num_tags,
1274                   tnfa->num_tags));
1275           status = REG_BADPAT;
1276           goto error_post_compile;
1277         }
1278
1279       tre_purge_regset(regset, tnfa, tag);
1280       DPRINT(("Setting t%d to %s\n", num_tags,
1281               tag_dir_str[direction]));
1282       tnfa->tag_directions[num_tags] = direction;
1283
1284       if (rtp > reorder_tags + 2 * tnfa->num_reorder_tags)
1285         {
1286           DPRINT(("Processed %d reorder tags instead of %d\n",
1287                   (int)(rtp - reorder_tags) / 2, tnfa->num_reorder_tags));
1288           status = REG_BADPAT;
1289           goto error_post_compile;
1290         }
1291     *rtp = -1;
1292 #if TRE_DEBUG
1293       if (reorder_tags[0] >= 0)
1294         {
1295           DPRINT(("reorder_tags:\n"));
1296           for (rtp = reorder_tags; *rtp >= 0;)
1297             {
1298               DPRINT(("%d after ", *rtp++));
1299               DPRINT(("%d\n", *rtp++));
1300             }
1301         }
1302         else
1303           DPRINT(("No reorder_tags\n"));
1304 #endif /* TRE_DEBUG */
1305
1306       /* Initialize to_reorder */
1307       for (i = 0; i < num_tags; i++)
1308         to_reorder[i] = i;
1309       /* Use to_seq_order to convert reorder_tags values, and use those to
1310        * reorder to_reorder */
1311       for (rtp = reorder_tags; *rtp >= 0;)
1312         {
1313           int j, high, low;
1314           int ti = *rtp++;
1315
1316           /* Skip reordering the final tag */
1317           if (ti >= num_tags)
1318             {
1319               DPRINT(("Skipping reorder of %d\n", ti));
1320               rtp++;
1321               continue;
1322             }
1323           /* The number of the tag to reorder */
1324           high = to_reorder[ti];
1325           /* Reorder after this tag */
1326           low = to_reorder[*rtp++];
1327
1328           DPRINT(("ti=%d high=%d low=%d\n", ti, high, low));
1329           if (low > high)
1330             {
1331               DPRINT(("Tag %d already before %d\n", high, low));
1332               continue;
1333             }
1334           for (j = 0; j < num_tags; j++)
1335             if (to_reorder[j] > low && to_reorder[j] < high)
1336               to_reorder[j]++;
1337           to_reorder[ti] = low + 1;
1338 #ifdef TRE_DEBUG
1339           DPRINT(("to_reorder=("));
1340           for (j = 0; j < num_tags; j++)
1341             {
1342               DPRINT(("%d", to_reorder[j]));
1343               if (j < num_tags - 1)
1344                 DPRINT((","));
1345             }
1346           DPRINT((")\n"));
1347 #endif /* TRE_DEBUG */
1348         }
1349       /* Determine if reordering in really necessary */
1350       {
1351         int need_reorder = 0;
1352         for (i = 0; i < num_tags; i++)
1353           if(to_reorder[i] != i)
1354             {
1355               need_reorder = 1;
1356               break;
1357             }
1358         /* If need_reorder is not set, free reorder_tags, and set to NULL,
1359          * indicating no reordering is needed */
1360         if (!need_reorder)
1361           {
1362             DPRINT(("Don't need to reorder\n"));
1363             xfree(reorder_tags);
1364             reorder_tags = NULL;
1365           }
1366       }
1367     }
1368
1369   if (reorder_tags)
1370     {
1371       int i;
1372       tre_tag_direction_t *new_tag_directions;
1373 #if TRE_DEBUG
1374       DPRINT(("to_reorder:"));
1375       for (i = 0; i < num_tags; i++)
1376         DPRINT((" %d->%d", i, to_reorder[i]));
1377       DPRINT(("\n"));
1378 #endif /* TRE_DEBUG */
1379
1380       DPRINT(("Reordering submatch_data\n"));
1381       for (i = 0; i < tnfa->num_submatches; i++)
1382         {
1383 #if TRE_DEBUG
1384           int so = tnfa->submatch_data[i].so_tag;
1385           int eo = tnfa->submatch_data[i].eo_tag;
1386 #endif /* TRE_DEBUG */
1387           tnfa->submatch_data[i].so_tag =
1388             to_reorder[tnfa->submatch_data[i].so_tag];
1389           tnfa->submatch_data[i].eo_tag =
1390             tnfa->submatch_data[i].eo_tag < num_tags ?
1391             to_reorder[tnfa->submatch_data[i].eo_tag] :
1392             tnfa->submatch_data[i].eo_tag;
1393           DPRINT(("pmatch[%d]: {%d, %d}->{%d, %d}\n", i, so, eo,
1394                   tnfa->submatch_data[i].so_tag,
1395                   tnfa->submatch_data[i].eo_tag));
1396         }
1397
1398       DPRINT(("Reordering tag_directions\n"));
1399       /* We only allocate num_tags directions and reorder them.  The
1400        * num_tags-th direction (end tag) is left unchanged. */
1401       new_tag_directions = xmalloc(sizeof(*new_tag_directions) * num_tags);
1402       if (new_tag_directions == NULL)
1403         {
1404           status = REG_ESPACE;
1405           goto error_post_compile;
1406         }
1407       for (i = 0; i < num_tags; i++)
1408         {
1409           new_tag_directions[to_reorder[i]] = tnfa->tag_directions[i];
1410         }
1411 #if TRE_DEBUG
1412       for (i = 0; i < num_tags; i++)
1413         {
1414           DPRINT(("t%d %s->%s\n", i,
1415                   tag_dir_str[tnfa->tag_directions[i]],
1416                   tag_dir_str[new_tag_directions[i]]));
1417         }
1418         DPRINT(("t%d %s->%s\n", num_tags,
1419                 tag_dir_str[tnfa->tag_directions[num_tags]],
1420                 tag_dir_str[tnfa->tag_directions[num_tags]]));
1421 #endif /* TRE_DEBUG */
1422       memcpy(tnfa->tag_directions, new_tag_directions, sizeof(*new_tag_directions) * num_tags);
1423       xfree(new_tag_directions);
1424
1425       DPRINT(("Reordering minimal_tags\n"));
1426       for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
1427         tnfa->minimal_tags[i] = tnfa->minimal_tags[i] < num_tags ?
1428                                 to_reorder[tnfa->minimal_tags[i]] :
1429                                 tnfa->minimal_tags[i];
1430
1431       DPRINT(("Reordering AST tags\n"));
1432       STACK_PUSH(stack, voidptr, tree);
1433       while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1434         {
1435           node = tre_stack_pop_voidptr(stack);
1436
1437           switch (node->type)
1438             {
1439             case LITERAL:
1440               {
1441                 tre_literal_t *lit = (tre_literal_t *)node->obj;
1442                 if (IS_TAG(lit))
1443                   lit->code_max = to_reorder[lit->code_max];
1444                 break;
1445               }
1446
1447             case UNION:
1448               {
1449                 tre_union_t *uni = (tre_union_t *)node->obj;
1450                 STACK_PUSHX(stack, voidptr, uni->right);
1451                 STACK_PUSHX(stack, voidptr, uni->left);
1452                 break;
1453               }
1454
1455             case CATENATION:
1456               {
1457                 tre_catenation_t *cat = (tre_catenation_t *)node->obj;
1458                 STACK_PUSHX(stack, voidptr, cat->right);
1459                 STACK_PUSHX(stack, voidptr, cat->left);
1460                 break;
1461               }
1462
1463             case ITERATION:
1464               {
1465                 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
1466                 STACK_PUSHX(stack, voidptr, iter->arg);
1467                 break;
1468               }
1469
1470             default:
1471               assert(0);
1472               break;
1473             }
1474         }
1475         if (status != REG_OK)
1476           {
1477             DPRINT(("Error while reordering tags\n"));
1478             goto error_post_compile;
1479           }
1480     }
1481
1482
1483   if (!first_pass)
1484     {
1485       if (tree->last_matched_branch)
1486         {
1487           tre_last_matched_branch_t *buf, *b, *bb;
1488           tre_last_matched_branch_pre_t *bp;
1489           tre_last_matched_t *u, *uu;
1490           tre_last_matched_pre_t *up;
1491           int *t;
1492           int i;
1493 #ifdef TRE_DEBUG
1494           tre_last_matched_branch_t *_b;
1495           tre_last_matched_t *_u;
1496           int *_t;
1497
1498           DPRINT(("last_match_branch_pre:\n"));
1499           print_last_match_branch_pre(tree->last_matched_branch, 0, num_tags);
1500 #endif /* TRE_DEBUG */
1501           buf = (tre_last_matched_branch_t *)xcalloc(1,
1502                                      tree->last_matched_branch->tot_branches
1503                                      * sizeof(tre_last_matched_branch_t) +
1504                                      tree->last_matched_branch->tot_last_matched
1505                                      * sizeof(tre_last_matched_t) +
1506                                      tree->last_matched_branch->tot_tags *
1507                                      sizeof(int));
1508           if (!buf)
1509             {
1510               status = REG_ESPACE;
1511               goto error_post_compile;
1512             }
1513
1514           b = buf;
1515           u = (tre_last_matched_t *)(b +
1516               tree->last_matched_branch->tot_branches);
1517           t = (int *)(u + tree->last_matched_branch->tot_last_matched);
1518 #ifdef TRE_DEBUG
1519           _b = b;
1520           _u = u;
1521           _t = t;
1522 #endif /* TRE_DEBUG */
1523           DPRINT(("Copying info_pre to info\n"));
1524           STACK_PUSH(stack, voidptr, tree->last_matched_branch);
1525           STACK_PUSH(stack, int, 1);
1526           STACK_PUSH(stack, int, COPY_LAST_MATCHED_BRANCH);
1527
1528           while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1529             {
1530               switch (tre_stack_pop_int(stack))
1531                 {
1532                 case COPY_LAST_MATCHED_BRANCH:
1533                   i = tre_stack_pop_int(stack);
1534                   /* The tre_last_matched_branch_pre_t * is still on the
1535                      stack */
1536                   STACK_PUSHX(stack, voidptr, b);
1537                   STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1538                   b += i;
1539                   break;
1540
1541                 case COPY_LAST_MATCHED_BRANCH_NEXT:
1542                   bb = tre_stack_pop_voidptr(stack);
1543                   bp = tre_stack_pop_voidptr(stack);
1544                   bb->n_last_matched = bp->n_last_matched;
1545                   bb->cmp_tag = bp->cmp_tag;
1546                   if (bp->n_tags > 0)
1547                     {
1548                       int n;
1549                       n = bb->n_tags = bp->n_tags;
1550                       bb->tags = t;
1551                       for (i = 0; i < num_tags; i++)
1552                         if (bit_test(bp->tags, i))
1553                           {
1554                             *t++ = i;
1555                             if (--n <= 0)
1556                               break;
1557                           }
1558                     }
1559                   if (bp->next)
1560                     {
1561                       STACK_PUSHX(stack, voidptr, bp->next);
1562                       STACK_PUSHX(stack, voidptr, bb + 1);
1563                       STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1564                     }
1565                   if (bp->n_last_matched > 0)
1566                     {
1567                       bb->last_matched = u;
1568                       STACK_PUSHX(stack, voidptr, bp->last_matched);
1569                       STACK_PUSHX(stack, int, bp->n_last_matched);
1570                       STACK_PUSHX(stack, int, COPY_LAST_MATCHED);
1571                     }
1572                   break;
1573
1574                 case COPY_LAST_MATCHED:
1575                   i = tre_stack_pop_int(stack);
1576                   /* The tre_last_matched_pre_t * is still on the stack */
1577                   STACK_PUSHX(stack, voidptr, u);
1578                   STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1579                   u += i;
1580                   break;
1581
1582                 case COPY_LAST_MATCHED_NEXT:
1583                   uu = tre_stack_pop_voidptr(stack);
1584                   up = tre_stack_pop_voidptr(stack);
1585                   uu->n_branches = up->n_branches;
1586                   uu->branches = b;
1587                   uu->start_tag = up->start_tag;
1588                   if (up->next)
1589                     {
1590                       STACK_PUSHX(stack, voidptr, up->next);
1591                       STACK_PUSHX(stack, voidptr, uu + 1);
1592                       STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1593                     }
1594                   STACK_PUSHX(stack, voidptr, up->branches);
1595                   STACK_PUSHX(stack, int, up->n_branches);
1596                   STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH);
1597                   break;
1598                 }
1599             }
1600           if (status != REG_OK)
1601             goto error_post_compile;
1602 #ifdef TRE_DEBUG
1603           DPRINT(("last_matched_branch:\n"));
1604           print_last_match_branch(buf, 0);
1605           if (b != _b + tree->last_matched_branch->tot_branches)
1606             DPRINT(("b/%p != _b + tree->last_matched_branch->tot_branches/%p\n",
1607                     b, _b + tree->last_matched_branch->tot_branches));
1608           if (u != _u + tree->last_matched_branch->tot_last_matched)
1609             DPRINT(("u/%p != _u + "
1610                     "tree->last_matched_branch->tot_last_matched/%p\n",
1611                     u, _u + tree->last_matched_branch->tot_last_matched));
1612           if (t != _t + tree->last_matched_branch->tot_tags)
1613             DPRINT(("t/%p != _t + tree->last_matched_branch->tot_tags/%p\n",
1614                     t, _t + tree->last_matched_branch->tot_tags));
1615 #endif /* TRE_DEBUG */
1616           tnfa->last_matched_branch = buf;
1617         }
1618 #ifdef TRE_DEBUG
1619       else
1620         DPRINT(("No last_match_branch_pre\n"));
1621 #endif /* TRE_DEBUG */
1622     }
1623
1624   DPRINT(("tre_add_tags: %s complete.  Number of tags %d.\n",
1625           first_pass? "First pass" : "Second pass", num_tags));
1626 #ifdef TRE_DEBUG
1627   tre_ast_print(tree);
1628 #endif /* TRE_DEBUG */
1629   DPRINT(("tre_add_tags: tree->num_tags=%d num_tags=%d\n", tree->num_tags,
1630           num_tags));
1631   assert(tree->num_tags == num_tags);
1632   tnfa->end_tag = num_tags;
1633   tnfa->num_tags = num_tags;
1634   tnfa->num_minimals = num_minimals;
1635 error_post_compile:
1636   xfree(reorder_tags);
1637 error_reorder_tags:
1638   xfree(orig_regset);
1639 error_regset:
1640   return status;
1641 }
1642
1643
1644
1645 /*
1646   AST to TNFA compilation routines.
1647 */
1648
1649 typedef enum {
1650   COPY_RECURSE,
1651   COPY_SET_RESULT_PTR
1652 } tre_copyast_symbol_t;
1653
1654 /* Flags for tre_copy_ast(). */
1655 #define COPY_REMOVE_TAGS         1
1656 #define COPY_MAXIMIZE_FIRST_TAG  2
1657
1658 static reg_errcode_t
1659 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1660              int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1661              tre_ast_node_t **copy, int *max_pos)
1662 {
1663   reg_errcode_t status = REG_OK;
1664   int bottom = tre_stack_num_objects(stack);
1665   int num_copied = 0;
1666   int first_tag = 1;
1667   tre_ast_node_t **result = copy;
1668   tre_copyast_symbol_t symbol;
1669
1670   STACK_PUSH(stack, voidptr, ast);
1671   STACK_PUSH(stack, int, COPY_RECURSE);
1672
1673   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1674     {
1675       tre_ast_node_t *node;
1676       if (status != REG_OK)
1677         break;
1678
1679       symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1680       switch (symbol)
1681         {
1682         case COPY_SET_RESULT_PTR:
1683           result = tre_stack_pop_voidptr(stack);
1684           break;
1685         case COPY_RECURSE:
1686           node = tre_stack_pop_voidptr(stack);
1687           switch (node->type)
1688             {
1689             case LITERAL:
1690               {
1691                 tre_literal_t *lit = node->obj;
1692                 int pos = lit->position;
1693                 int min = lit->code_min;
1694                 int max = lit->code_max;
1695                 tre_bracket_match_list_t *list = !IS_SPECIAL(lit) ?
1696                                                   lit->u.bracket_match_list :
1697                                                   NULL;
1698                 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1699                   {
1700                     /* XXX - e.g. [ab] has only one position but two
1701                        nodes, so we are creating holes in the state space
1702                        here.  Not fatal, just wastes memory. */
1703                     pos += *pos_add;
1704                     num_copied++;
1705                   }
1706                 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1707                   {
1708                     /* Change this tag to empty. */
1709                     min = EMPTY;
1710                     max = pos = -1;
1711                   }
1712                 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1713                          && first_tag)
1714                   {
1715                     /* Maximize the first tag. */
1716                     if (tag_directions[max] == TRE_TAG_LEFT_MAXIMIZE)
1717                       tag_directions[max] = TRE_TAG_MAXIMIZE;
1718                     first_tag = 0;
1719                   }
1720                 *result = tre_ast_new_literal(mem, min, max, pos);
1721                 if (*result == NULL)
1722                   status = REG_ESPACE;
1723
1724                 if (pos > *max_pos)
1725                   *max_pos = pos;
1726
1727                 if (!IS_SPECIAL(lit))
1728                   ((tre_literal_t *)(*result)->obj)->u.bracket_match_list
1729                       = list;
1730                 break;
1731               }
1732             case UNION:
1733               {
1734                 tre_union_t *uni = node->obj;
1735                 tre_union_t *tmp;
1736                 *result = tre_ast_new_union(mem, uni->left, uni->right);
1737                 if (*result == NULL)
1738                   {
1739                     status = REG_ESPACE;
1740                     break;
1741                   }
1742                 tmp = (*result)->obj;
1743                 result = &tmp->left;
1744                 STACK_PUSHX(stack, voidptr, uni->right);
1745                 STACK_PUSHX(stack, int, COPY_RECURSE);
1746                 STACK_PUSHX(stack, voidptr, &tmp->right);
1747                 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1748                 STACK_PUSHX(stack, voidptr, uni->left);
1749                 STACK_PUSHX(stack, int, COPY_RECURSE);
1750                 break;
1751               }
1752             case CATENATION:
1753               {
1754                 tre_catenation_t *cat = node->obj;
1755                 tre_catenation_t *tmp;
1756                 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1757                 if (*result == NULL)
1758                   {
1759                     status = REG_ESPACE;
1760                     break;
1761                   }
1762                 tmp = (*result)->obj;
1763                 tmp->left = NULL;
1764                 tmp->right = NULL;
1765                 result = &tmp->left;
1766
1767                 STACK_PUSHX(stack, voidptr, cat->right);
1768                 STACK_PUSHX(stack, int, COPY_RECURSE);
1769                 STACK_PUSHX(stack, voidptr, &tmp->right);
1770                 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1771                 STACK_PUSHX(stack, voidptr, cat->left);
1772                 STACK_PUSHX(stack, int, COPY_RECURSE);
1773                 break;
1774               }
1775             case ITERATION:
1776               {
1777                 tre_iteration_t *iter = node->obj;
1778                 STACK_PUSHX(stack, voidptr, iter->arg);
1779                 STACK_PUSHX(stack, int, COPY_RECURSE);
1780                 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1781                                            iter->max, iter->minimal);
1782                 if (*result == NULL)
1783                   {
1784                     status = REG_ESPACE;
1785                     break;
1786                   }
1787                 iter = (*result)->obj;
1788                 result = &iter->arg;
1789                 break;
1790               }
1791             default:
1792               assert(0);
1793               break;
1794             }
1795           break;
1796         }
1797     }
1798   *pos_add += num_copied;
1799   return status;
1800 }
1801
1802 typedef enum {
1803   EXPAND_RECURSE,
1804   EXPAND_AFTER_ITER
1805 } tre_expand_ast_symbol_t;
1806
1807 /* Expands each iteration node that has a finite nonzero minimum or maximum
1808    iteration count to a catenated sequence of copies of the node. */
1809 static reg_errcode_t
1810 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1811                int *position, tre_tag_direction_t *tag_directions,
1812                int *max_depth)
1813 {
1814   reg_errcode_t status = REG_OK;
1815   int bottom = tre_stack_num_objects(stack);
1816   int pos_add = 0;
1817   int pos_add_total = 0;
1818   int max_pos = 0;
1819 #ifdef TRE_APPROX
1820   /* Current approximate matching parameters. */
1821   int params[TRE_PARAM_LAST];
1822   /* Approximate parameter nesting level. */
1823   int params_depth = 0;
1824 #endif /* TRE_APPROX */
1825   int iter_depth = 0;
1826 #ifdef TRE_APPROX
1827   int i;
1828 #endif /* TRE_APPROX */
1829
1830 #ifdef TRE_APPROX
1831   for (i = 0; i < TRE_PARAM_LAST; i++)
1832     params[i] = TRE_PARAM_DEFAULT;
1833 #endif /* TRE_APPROX */
1834
1835   STACK_PUSHR(stack, voidptr, ast);
1836   STACK_PUSHR(stack, int, EXPAND_RECURSE);
1837   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1838     {
1839       tre_ast_node_t *node;
1840       tre_expand_ast_symbol_t symbol;
1841
1842       if (status != REG_OK)
1843         break;
1844
1845       DPRINT(("pos_add %d\n", pos_add));
1846
1847       symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1848       node = tre_stack_pop_voidptr(stack);
1849       switch (symbol)
1850         {
1851         case EXPAND_RECURSE:
1852           switch (node->type)
1853             {
1854             case LITERAL:
1855               {
1856                 tre_literal_t *lit= node->obj;
1857                 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1858                   {
1859                     lit->position += pos_add;
1860                     if (lit->position > max_pos)
1861                       max_pos = lit->position;
1862                   }
1863                 break;
1864               }
1865             case UNION:
1866               {
1867                 tre_union_t *uni = node->obj;
1868                 STACK_PUSHX(stack, voidptr, uni->right);
1869                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1870                 STACK_PUSHX(stack, voidptr, uni->left);
1871                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1872                 break;
1873               }
1874             case CATENATION:
1875               {
1876                 tre_catenation_t *cat = node->obj;
1877                 STACK_PUSHX(stack, voidptr, cat->right);
1878                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1879                 STACK_PUSHX(stack, voidptr, cat->left);
1880                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1881                 break;
1882               }
1883             case ITERATION:
1884               {
1885                 tre_iteration_t *iter = node->obj;
1886                 STACK_PUSHX(stack, int, pos_add);
1887                 STACK_PUSHX(stack, voidptr, node);
1888                 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1889                 STACK_PUSHX(stack, voidptr, iter->arg);
1890                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1891                 /* If we are going to expand this node at EXPAND_AFTER_ITER
1892                    then don't increase the `pos' fields of the nodes now, it
1893                    will get done when expanding. */
1894                 if (iter->min > 1 || iter->max > 1)
1895                   pos_add = 0;
1896                 iter_depth++;
1897                 DPRINT(("iter\n"));
1898                 break;
1899               }
1900             default:
1901               assert(0);
1902               break;
1903             }
1904           break;
1905         case EXPAND_AFTER_ITER:
1906           {
1907             tre_iteration_t *iter = node->obj;
1908             int pos_add_last;
1909             pos_add = tre_stack_pop_int(stack);
1910             pos_add_last = pos_add;
1911             /* Originally (in tre_parse_bound), if min == 0 && max == 0, we
1912                immediate replace the whole iteration with EMPTY.  This
1913                unfortunately drops any submatches, and messes up setting the
1914                pmatch values (we can get tags of -1, and tag values in the
1915                billions).  So we left it there and replace with EMPTY here. */
1916             if (iter->min == 0 && iter->max == 0)
1917               {
1918                 tre_ast_node_t *empty = tre_ast_new_literal(mem, EMPTY, -1, -1);
1919                 if (empty == NULL)
1920                   return REG_ESPACE;
1921                 node->obj = empty->obj;
1922                 node->type = empty->type;
1923               }
1924             else if (iter->min > 1 || iter->max > 1)
1925               {
1926                 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1927                 int j;
1928                 int pos_add_save = pos_add;
1929
1930                 /* Create a catenated sequence of copies of the node. */
1931                 for (j = 0; j < iter->min; j++)
1932                   {
1933                     tre_ast_node_t *copy;
1934                     /* Remove tags from all but the last copy. */
1935                     int flags = ((j + 1 < iter->min)
1936                                  ? COPY_REMOVE_TAGS
1937                                  : COPY_MAXIMIZE_FIRST_TAG);
1938                     DPRINT(("  pos_add %d\n", pos_add));
1939                     pos_add_save = pos_add;
1940                     status = tre_copy_ast(mem, stack, iter->arg, flags,
1941                                           &pos_add, tag_directions, &copy,
1942                                           &max_pos);
1943                     if (status != REG_OK)
1944                       return status;
1945                     if (seq1 != NULL)
1946                       seq1 = tre_ast_new_catenation(mem, seq1, copy);
1947                     else
1948                       seq1 = copy;
1949                     if (seq1 == NULL)
1950                       return REG_ESPACE;
1951                   }
1952
1953                 if (iter->max == -1)
1954                   {
1955                     /* No upper limit. */
1956                     pos_add_save = pos_add;
1957                     status = tre_copy_ast(mem, stack, iter->arg, 0,
1958                                           &pos_add, NULL, &seq2, &max_pos);
1959                     if (status != REG_OK)
1960                       return status;
1961                     seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1962                     if (seq2 == NULL)
1963                       return REG_ESPACE;
1964                   }
1965                 else
1966                   {
1967                     for (j = iter->min; j < iter->max; j++)
1968                       {
1969                         tre_ast_node_t *tmp, *copy;
1970                         pos_add_save = pos_add;
1971                         status = tre_copy_ast(mem, stack, iter->arg, 0,
1972                                               &pos_add, NULL, &copy, &max_pos);
1973                         if (status != REG_OK)
1974                           return status;
1975                         if (seq2 != NULL)
1976                           seq2 = tre_ast_new_catenation(mem, copy, seq2);
1977                         else
1978                           seq2 = copy;
1979                         if (seq2 == NULL)
1980                           return REG_ESPACE;
1981                         tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1982                         if (tmp == NULL)
1983                           return REG_ESPACE;
1984                         seq2 = tre_ast_new_union(mem, tmp, seq2);
1985                         if (seq2 == NULL)
1986                           return REG_ESPACE;
1987                       }
1988                   }
1989
1990                 pos_add = pos_add_save;
1991                 if (seq1 == NULL)
1992                   seq1 = seq2;
1993                 else if (seq2 != NULL)
1994                   seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1995                 if (seq1 == NULL)
1996                   return REG_ESPACE;
1997                 node->obj = seq1->obj;
1998                 node->type = seq1->type;
1999               }
2000
2001             iter_depth--;
2002             pos_add_total += pos_add - pos_add_last;
2003             if (iter_depth == 0)
2004               pos_add = pos_add_total;
2005
2006 #ifdef TRE_APPROX
2007             /* If approximate parameters are specified, surround the result
2008                with two parameter setting nodes.  The one on the left sets
2009                the specified parameters, and the one on the right restores
2010                the old parameters. */
2011             if (iter->params)
2012               {
2013                 tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
2014                 int *old_params;
2015
2016                 tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
2017                 if (!tmp_l)
2018                   return REG_ESPACE;
2019                 ((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
2020                 iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
2021                 tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
2022                 if (!tmp_r)
2023                   return REG_ESPACE;
2024                 old_params = tre_mem_alloc(mem, sizeof(*old_params)
2025                                            * TRE_PARAM_LAST);
2026                 if (!old_params)
2027                   return REG_ESPACE;
2028                 for (i = 0; i < TRE_PARAM_LAST; i++)
2029                   old_params[i] = params[i];
2030                 ((tre_literal_t *)tmp_r->obj)->u.params = old_params;
2031                 old_params[TRE_PARAM_DEPTH] = params_depth;
2032                 /* XXX - this is the only place where ast_new_node is
2033                    needed -- should be moved inside AST module. */
2034                 node_copy = tre_ast_new_node(mem, ITERATION,
2035                                              sizeof(tre_iteration_t));
2036                 if (!node_copy)
2037                   return REG_ESPACE;
2038                 node_copy->obj = node->obj;
2039                 tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
2040                 if (!tmp_node)
2041                   return REG_ESPACE;
2042                 tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
2043                 if (!tmp_node)
2044                   return REG_ESPACE;
2045                 /* Replace the contents of `node' with `tmp_node'. */
2046                 memcpy(node, tmp_node, sizeof(*node));
2047                 node->obj = tmp_node->obj;
2048                 node->type = tmp_node->type;
2049                 params_depth++;
2050                 if (params_depth > *max_depth)
2051                   *max_depth = params_depth;
2052               }
2053 #endif /* TRE_APPROX */
2054             break;
2055           }
2056         default:
2057           assert(0);
2058           break;
2059         }
2060     }
2061
2062   *position += pos_add_total;
2063
2064   /* `max_pos' should never be larger than `*position' if the above
2065      code works, but just an extra safeguard let's make sure
2066      `*position' is set large enough so enough memory will be
2067      allocated for the transition table. */
2068   if (max_pos > *position)
2069     *position = max_pos;
2070
2071 #ifdef TRE_DEBUG
2072   DPRINT(("Expanded AST:\n"));
2073   tre_ast_print(ast);
2074   DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
2075 #endif
2076
2077   return status;
2078 }
2079
2080 static tre_pos_and_tags_t *
2081 tre_set_empty(tre_mem_t mem)
2082 {
2083   tre_pos_and_tags_t *new_set;
2084
2085   new_set = tre_mem_calloc(mem, sizeof(*new_set));
2086   if (new_set == NULL)
2087     return NULL;
2088
2089   new_set[0].position = -1;
2090   new_set[0].code_min = -1;
2091   new_set[0].code_max = -1;
2092
2093   return new_set;
2094 }
2095
2096 static tre_pos_and_tags_t *
2097 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2098             tre_bracket_match_list_t *bracket_match_list, int backref)
2099 {
2100   tre_pos_and_tags_t *new_set;
2101
2102   new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2103   if (new_set == NULL)
2104     return NULL;
2105
2106   new_set[0].position = position;
2107   new_set[0].code_min = code_min;
2108   new_set[0].code_max = code_max;
2109   new_set[0].bracket_match_list = bracket_match_list;
2110   new_set[0].backref = backref;
2111   new_set[1].position = -1;
2112   new_set[1].code_min = -1;
2113   new_set[1].code_max = -1;
2114
2115   return new_set;
2116 }
2117
2118 static tre_pos_and_tags_t *
2119 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2120               int *tags, int assertions, int *params)
2121 {
2122   int s1, s2, i, j;
2123   tre_pos_and_tags_t *new_set;
2124   int *new_tags;
2125   int num_tags;
2126
2127   for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2128   for (s1 = 0; set1[s1].position >= 0; s1++);
2129   for (s2 = 0; set2[s2].position >= 0; s2++);
2130   new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2131   if (!new_set )
2132     return NULL;
2133
2134   for (s1 = 0; set1[s1].position >= 0; s1++)
2135     {
2136       new_set[s1].position = set1[s1].position;
2137       new_set[s1].code_min = set1[s1].code_min;
2138       new_set[s1].code_max = set1[s1].code_max;
2139       new_set[s1].assertions = set1[s1].assertions | assertions;
2140       new_set[s1].bracket_match_list = set1[s1].bracket_match_list;
2141       new_set[s1].backref = set1[s1].backref;
2142       if (set1[s1].tags == NULL && tags == NULL)
2143         new_set[s1].tags = NULL;
2144       else
2145         {
2146           for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2147           new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2148                                          * (i + num_tags + 1)));
2149           if (new_tags == NULL)
2150             return NULL;
2151           for (j = 0; j < i; j++)
2152             new_tags[j] = set1[s1].tags[j];
2153           for (i = 0; i < num_tags; i++)
2154             new_tags[j + i] = tags[i];
2155           new_tags[j + i] = -1;
2156           new_set[s1].tags = new_tags;
2157         }
2158       if (set1[s1].params)
2159         new_set[s1].params = set1[s1].params;
2160       if (params)
2161         {
2162           if (!new_set[s1].params)
2163             new_set[s1].params = params;
2164           else
2165             {
2166               new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
2167                                                  TRE_PARAM_LAST);
2168               if (!new_set[s1].params)
2169                 return NULL;
2170               for (i = 0; i < TRE_PARAM_LAST; i++)
2171                 if (params[i] != TRE_PARAM_UNSET)
2172                   new_set[s1].params[i] = params[i];
2173             }
2174         }
2175     }
2176
2177   for (s2 = 0; set2[s2].position >= 0; s2++)
2178     {
2179       new_set[s1 + s2].position = set2[s2].position;
2180       new_set[s1 + s2].code_min = set2[s2].code_min;
2181       new_set[s1 + s2].code_max = set2[s2].code_max;
2182       /* XXX - why not | assertions here as well? */
2183       new_set[s1 + s2].assertions = set2[s2].assertions;
2184       new_set[s1 + s2].bracket_match_list = set2[s2].bracket_match_list;
2185       new_set[s1 + s2].backref = set2[s2].backref;
2186       if (set2[s2].tags == NULL)
2187         new_set[s1 + s2].tags = NULL;
2188       else
2189         {
2190           for (i = 0; set2[s2].tags[i] >= 0; i++);
2191           new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2192           if (new_tags == NULL)
2193             return NULL;
2194           for (j = 0; j < i; j++)
2195             new_tags[j] = set2[s2].tags[j];
2196           new_tags[j] = -1;
2197           new_set[s1 + s2].tags = new_tags;
2198         }
2199       if (set2[s2].params)
2200         new_set[s1 + s2].params = set2[s2].params;
2201       if (params)
2202         {
2203           if (!new_set[s1 + s2].params)
2204             new_set[s1 + s2].params = params;
2205           else
2206             {
2207               new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
2208                                                       TRE_PARAM_LAST);
2209               if (!new_set[s1 + s2].params)
2210                 return NULL;
2211               for (i = 0; i < TRE_PARAM_LAST; i++)
2212                 if (params[i] != TRE_PARAM_UNSET)
2213                   new_set[s1 + s2].params[i] = params[i];
2214             }
2215         }
2216     }
2217   new_set[s1 + s2].position = -1;
2218   return new_set;
2219 }
2220
2221 /* Finds the empty path through `node' which is the one that should be
2222    taken according to POSIX.2 rules, and adds the tags on that path to
2223    `tags'.   `tags' may be NULL.  If `num_tags_seen' is not NULL, it is
2224    set to the number of tags seen on the path. */
2225 static reg_errcode_t
2226 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2227                 int *assertions, int *params, int *num_tags_seen,
2228                 int *params_seen)
2229 {
2230   tre_literal_t *lit;
2231   tre_union_t *uni;
2232   tre_catenation_t *cat;
2233   tre_iteration_t *iter;
2234   int i;
2235   int bottom = tre_stack_num_objects(stack);
2236   reg_errcode_t status = REG_OK;
2237   if (num_tags_seen)
2238     *num_tags_seen = 0;
2239   if (params_seen)
2240     *params_seen = 0;
2241
2242   status = tre_stack_push_voidptr(stack, node);
2243
2244   /* Walk through the tree recursively. */
2245   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2246     {
2247       node = tre_stack_pop_voidptr(stack);
2248
2249       switch (node->type)
2250         {
2251         case LITERAL:
2252           lit = (tre_literal_t *)node->obj;
2253           switch (lit->code_min)
2254             {
2255             case TAG:
2256               if (lit->code_max >= 0)
2257                 {
2258                   if (tags != NULL)
2259                     {
2260                       /* Add the tag to `tags'. */
2261                       for (i = 0; tags[i] >= 0; i++)
2262                         if (tags[i] == lit->code_max)
2263                           break;
2264                       if (tags[i] < 0)
2265                         {
2266                           tags[i] = lit->code_max;
2267                           tags[i + 1] = -1;
2268                         }
2269                     }
2270                   if (num_tags_seen)
2271                     (*num_tags_seen)++;
2272                 }
2273               break;
2274             case ASSERTION:
2275               assert(lit->code_max >= 1
2276                      || lit->code_max <= ASSERT_LAST);
2277               if (assertions != NULL)
2278                 *assertions |= lit->code_max;
2279               break;
2280             case PARAMETER:
2281               if (params != NULL)
2282                 for (i = 0; i < TRE_PARAM_LAST; i++)
2283                   params[i] = lit->u.params[i];
2284               if (params_seen != NULL)
2285                 *params_seen = 1;
2286               break;
2287             case EMPTY:
2288               break;
2289             default:
2290               assert(0);
2291               break;
2292             }
2293           break;
2294
2295         case UNION:
2296           /* Subexpressions starting earlier take priority over ones
2297              starting later, so we prefer the left subexpression over the
2298              right subexpression. */
2299           uni = (tre_union_t *)node->obj;
2300           if (uni->left->nullable)
2301             STACK_PUSHX(stack, voidptr, uni->left)
2302           else if (uni->right->nullable)
2303             STACK_PUSHX(stack, voidptr, uni->right)
2304           else
2305             assert(0);
2306           break;
2307
2308         case CATENATION:
2309           /* The path must go through both children. */
2310           cat = (tre_catenation_t *)node->obj;
2311           assert(cat->left->nullable);
2312           assert(cat->right->nullable);
2313           STACK_PUSHX(stack, voidptr, cat->left);
2314           STACK_PUSHX(stack, voidptr, cat->right);
2315           break;
2316
2317         case ITERATION:
2318           /* A match with an empty string is preferred over no match at
2319              all, so we go through the argument if possible. */
2320           iter = (tre_iteration_t *)node->obj;
2321           if (iter->arg->nullable)
2322             STACK_PUSHX(stack, voidptr, iter->arg);
2323           break;
2324
2325         default:
2326           assert(0);
2327           break;
2328         }
2329     }
2330
2331   return status;
2332 }
2333
2334
2335 typedef enum {
2336   NFL_RECURSE,
2337   NFL_POST_UNION,
2338   NFL_POST_CATENATION,
2339   NFL_POST_ITERATION
2340 } tre_nfl_stack_symbol_t;
2341
2342
2343 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2344    the nodes of the AST `tree'. */
2345 static reg_errcode_t
2346 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2347 {
2348   int bottom = tre_stack_num_objects(stack);
2349
2350   STACK_PUSHR(stack, voidptr, tree);
2351   STACK_PUSHR(stack, int, NFL_RECURSE);
2352
2353   while (tre_stack_num_objects(stack) > bottom)
2354     {
2355       tre_nfl_stack_symbol_t symbol;
2356       tre_ast_node_t *node;
2357
2358       symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2359       node = tre_stack_pop_voidptr(stack);
2360       switch (symbol)
2361         {
2362         case NFL_RECURSE:
2363           switch (node->type)
2364             {
2365             case LITERAL:
2366               {
2367                 tre_literal_t *lit = (tre_literal_t *)node->obj;
2368                 if (IS_BACKREF(lit))
2369                   {
2370                     /* Back references: nullable = false, firstpos = {i},
2371                        lastpos = {i}. */
2372                     node->nullable = 0;
2373                     node->firstpos = tre_set_one(mem, lit->position, 0,
2374                                              TRE_CHAR_MAX, NULL, -1);
2375                     if (!node->firstpos)
2376                       return REG_ESPACE;
2377                     node->lastpos = tre_set_one(mem, lit->position, 0,
2378                                                 TRE_CHAR_MAX, NULL,
2379                                                 (int)lit->code_max);
2380                     if (!node->lastpos)
2381                       return REG_ESPACE;
2382                   }
2383                 else if (lit->code_min < 0)
2384                   {
2385                     /* Tags, empty strings, params, and zero width assertions:
2386                        nullable = true, firstpos = {}, and lastpos = {}. */
2387                     node->nullable = 1;
2388                     node->firstpos = tre_set_empty(mem);
2389                     if (!node->firstpos)
2390                       return REG_ESPACE;
2391                     node->lastpos = tre_set_empty(mem);
2392                     if (!node->lastpos)
2393                       return REG_ESPACE;
2394                   }
2395                 else
2396                   {
2397                     /* Literal at position i: nullable = false, firstpos = {i},
2398                        lastpos = {i}. */
2399                     node->nullable = 0;
2400                     node->firstpos =
2401                       tre_set_one(mem, lit->position, (int)lit->code_min,
2402                                   (int)lit->code_max, NULL, -1);
2403                     if (!node->firstpos)
2404                       return REG_ESPACE;
2405                     node->lastpos = tre_set_one(mem, lit->position,
2406                                                 (int)lit->code_min,
2407                                                 (int)lit->code_max,
2408                                                 lit->u.bracket_match_list,
2409                                                 -1);
2410                     if (!node->lastpos)
2411                       return REG_ESPACE;
2412                   }
2413                 break;
2414               }
2415
2416             case UNION:
2417               /* Compute the attributes for the two subtrees, and after that
2418                  for this node. */
2419               STACK_PUSHR(stack, voidptr, node);
2420               STACK_PUSHR(stack, int, NFL_POST_UNION);
2421               STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2422               STACK_PUSHR(stack, int, NFL_RECURSE);
2423               STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2424               STACK_PUSHR(stack, int, NFL_RECURSE);
2425               break;
2426
2427             case CATENATION:
2428               /* Compute the attributes for the two subtrees, and after that
2429                  for this node. */
2430               STACK_PUSHR(stack, voidptr, node);
2431               STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2432               STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2433               STACK_PUSHR(stack, int, NFL_RECURSE);
2434               STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2435               STACK_PUSHR(stack, int, NFL_RECURSE);
2436               break;
2437
2438             case ITERATION:
2439               /* Compute the attributes for the subtree, and after that for
2440                  this node. */
2441               STACK_PUSHR(stack, voidptr, node);
2442               STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2443               STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2444               STACK_PUSHR(stack, int, NFL_RECURSE);
2445               break;
2446             }
2447           break; /* end case: NFL_RECURSE */
2448
2449         case NFL_POST_UNION:
2450           {
2451             tre_union_t *uni = (tre_union_t *)node->obj;
2452             node->nullable = uni->left->nullable || uni->right->nullable;
2453             node->firstpos = tre_set_union(mem, uni->left->firstpos,
2454                                            uni->right->firstpos, NULL, 0, NULL);
2455             if (!node->firstpos)
2456               return REG_ESPACE;
2457             node->lastpos = tre_set_union(mem, uni->left->lastpos,
2458                                           uni->right->lastpos, NULL, 0, NULL);
2459             if (!node->lastpos)
2460               return REG_ESPACE;
2461             break;
2462           }
2463
2464         case NFL_POST_ITERATION:
2465           {
2466             int num_tags, *tags, assertions, params_seen;
2467             int *params;
2468             reg_errcode_t status;
2469             tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2470
2471             /* From Ville Laurikari's original 2001 Master's thesis, the
2472                firstpos(n) and lastpos(n) of an iteration is just the
2473                corresponding values of the iteration's argument.  Unfortunately,
2474                this isn't sufficient for the following BRE:
2475
2476                    \(a*\)*b\(\1\)    matched against    ab
2477
2478                The backreference wants to force the first subexpression to
2479                be the empty string, but there is no transition for this.  So
2480                we need to modify the lastpos(n) of an iteration to be the
2481                equivalent of that of catentation.  Using the same notation as
2482                in the thesis, lastpos(n) is redefined as:
2483
2484                    if nullable(c1) then
2485                        lastpos(c1) U
2486                        addtags(lastpos(c1),
2487                                emptymatch(c1))
2488                    else
2489                        lastpos(c1)
2490
2491                where c1 is the argument node.  firstpos(n) remains the same. */
2492
2493             /* Compute lastpos. */
2494             if (iter->min == 0 || iter->arg->nullable)
2495               {
2496                 node->nullable = 1;
2497                 if (iter->arg->nullable)
2498                   {
2499                     /* The arg matches the empty string.  Make a first pass
2500                        with tre_match_empty() to get the number of tags and
2501                        parameters. */
2502                     status = tre_match_empty(stack, iter->arg,
2503                                              NULL, NULL, NULL, &num_tags,
2504                                              &params_seen);
2505                     if (status != REG_OK)
2506                       return status;
2507                     /* Allocate arrays for the tags and parameters. */
2508                     tags = xmalloc(sizeof(int) * (num_tags + 1));
2509                     if (!tags)
2510                       return REG_ESPACE;
2511                     tags[0] = -1;
2512                     assertions = 0;
2513                     params = NULL;
2514                     if (params_seen)
2515                       {
2516                         params = tre_mem_alloc(mem, sizeof(*params)
2517                                                * TRE_PARAM_LAST);
2518                         if (!params)
2519                           {
2520                             xfree(tags);
2521                             return REG_ESPACE;
2522                           }
2523                       }
2524                     /* Second pass with tre_mach_empty() to get the list of
2525                        tags and parameters. */
2526                     status = tre_match_empty(stack, iter->arg, tags,
2527                                              &assertions, params, NULL, NULL);
2528                     if (status != REG_OK)
2529                       {
2530                         xfree(tags);
2531                         return status;
2532                       }
2533                     node->lastpos =
2534                       tre_set_union(mem, iter->arg->lastpos, iter->arg->lastpos,
2535                                     tags, assertions, params);
2536                     xfree(tags);
2537                     if (!node->lastpos)
2538                       return REG_ESPACE;
2539                   }
2540                 else
2541                   node->lastpos = iter->arg->lastpos;
2542               }
2543             else
2544               {
2545                 node->nullable = 0;
2546                 node->lastpos = iter->arg->lastpos;
2547               }
2548             node->firstpos = iter->arg->firstpos;
2549             break;
2550           }
2551
2552         case NFL_POST_CATENATION:
2553           {
2554             int num_tags, *tags, assertions, params_seen;
2555             int *params;
2556             reg_errcode_t status;
2557             tre_catenation_t *cat = node->obj;
2558             node->nullable = cat->left->nullable && cat->right->nullable;
2559
2560             /* Compute firstpos. */
2561             if (cat->left->nullable)
2562               {
2563                 /* The left side matches the empty string.  Make a first pass
2564                    with tre_match_empty() to get the number of tags and
2565                    parameters. */
2566                 status = tre_match_empty(stack, cat->left,
2567                                          NULL, NULL, NULL, &num_tags,
2568                                          &params_seen);
2569                 if (status != REG_OK)
2570                   return status;
2571                 /* Allocate arrays for the tags and parameters. */
2572                 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2573                 if (!tags)
2574                   return REG_ESPACE;
2575                 tags[0] = -1;
2576                 assertions = 0;
2577                 params = NULL;
2578                 if (params_seen)
2579                   {
2580                     params = tre_mem_alloc(mem, sizeof(*params)
2581                                            * TRE_PARAM_LAST);
2582                     if (!params)
2583                       {
2584                         xfree(tags);
2585                         return REG_ESPACE;
2586                       }
2587                   }
2588                 /* Second pass with tre_mach_empty() to get the list of
2589                    tags and parameters. */
2590                 status = tre_match_empty(stack, cat->left, tags,
2591                                          &assertions, params, NULL, NULL);
2592                 if (status != REG_OK)
2593                   {
2594                     xfree(tags);
2595                     return status;
2596                   }
2597                 node->firstpos =
2598                   tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2599                                 tags, assertions, params);
2600                 xfree(tags);
2601                 if (!node->firstpos)
2602                   return REG_ESPACE;
2603               }
2604             else
2605               {
2606                 node->firstpos = cat->left->firstpos;
2607               }
2608
2609             /* Compute lastpos. */
2610             if (cat->right->nullable)
2611               {
2612                 /* The right side matches the empty string.  Make a first pass
2613                    with tre_match_empty() to get the number of tags and
2614                    parameters. */
2615                 status = tre_match_empty(stack, cat->right,
2616                                          NULL, NULL, NULL, &num_tags,
2617                                          &params_seen);
2618                 if (status != REG_OK)
2619                   return status;
2620                 /* Allocate arrays for the tags and parameters. */
2621                 tags = xmalloc(sizeof(int) * (num_tags + 1));
2622                 if (!tags)
2623                   return REG_ESPACE;
2624                 tags[0] = -1;
2625                 assertions = 0;
2626                 params = NULL;
2627                 if (params_seen)
2628                   {
2629                     params = tre_mem_alloc(mem, sizeof(*params)
2630                                            * TRE_PARAM_LAST);
2631                     if (!params)
2632                       {
2633                         xfree(tags);
2634                         return REG_ESPACE;
2635                       }
2636                   }
2637                 /* Second pass with tre_mach_empty() to get the list of
2638                    tags and parameters. */
2639                 status = tre_match_empty(stack, cat->right, tags,
2640                                          &assertions, params, NULL, NULL);
2641                 if (status != REG_OK)
2642                   {
2643                     xfree(tags);
2644                     return status;
2645                   }
2646                 node->lastpos =
2647                   tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2648                                 tags, assertions, params);
2649                 xfree(tags);
2650                 if (!node->lastpos)
2651                   return REG_ESPACE;
2652               }
2653             else
2654               {
2655                 node->lastpos = cat->right->lastpos;
2656               }
2657             break;
2658           }
2659
2660         default:
2661           assert(0);
2662           break;
2663         }
2664     }
2665
2666   return REG_OK;
2667 }
2668
2669
2670 /* Adds a transition from each position in `p1' to each position in `p2'. */
2671 static reg_errcode_t
2672 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2673                tre_tnfa_transition_t *transitions,
2674                int *counts, int *offs)
2675 {
2676   tre_pos_and_tags_t *orig_p2 = p2;
2677   tre_tnfa_transition_t *trans;
2678   int i, j, k, l, dup, prev_p2_pos;
2679
2680   if (transitions != NULL)
2681     while (p1->position >= 0)
2682       {
2683         p2 = orig_p2;
2684         prev_p2_pos = -1;
2685         while (p2->position >= 0)
2686           {
2687             /* Optimization: if this position was already handled, skip it. */
2688             if (p2->position == prev_p2_pos)
2689               {
2690                 p2++;
2691                 continue;
2692               }
2693             prev_p2_pos = p2->position;
2694             /* Set `trans' to point to the next unused transition from
2695                position `p1->position'. */
2696             trans = transitions + offs[p1->position];
2697             while (trans->state != NULL)
2698               {
2699 #if 0
2700                 /* If we find a previous transition from `p1->position' to
2701                    `p2->position', it is overwritten.  This can happen only
2702                    if there are nested loops in the regexp, like in "((a)*)*".
2703                    In POSIX.2 repetition using the outer loop is always
2704                    preferred over using the inner loop.  Therefore the
2705                    transition for the inner loop is useless and can be thrown
2706                    away. */
2707                 /* XXX - The same position is used for all nodes in a bracket
2708                    expression, so this optimization cannot be used (it will
2709                    break bracket expressions) unless I figure out a way to
2710                    detect it here. */
2711                 if (trans->state_id == p2->position)
2712                   {
2713                     DPRINT(("*"));
2714                     break;
2715                   }
2716 #endif
2717                 trans++;
2718               }
2719
2720             if (trans->state == NULL)
2721               (trans + 1)->state = NULL;
2722             /* Use the character ranges, assertions, etc. from `p1' for
2723                the transition from `p1' to `p2'. */
2724             trans->code_min = p1->code_min;
2725             trans->code_max = p1->code_max;
2726             trans->state = transitions + offs[p2->position];
2727             trans->state_id = p2->position;
2728             trans->assertions = p1->assertions | p2->assertions
2729               | (p1->bracket_match_list != NULL ? ASSERT_BRACKET_MATCH : 0);
2730             if (p1->backref >= 0)
2731               {
2732                 assert((trans->assertions & ASSERT_BRACKET_MATCH) == 0);
2733                 assert(p2->backref < 0);
2734                 trans->u.backref = p1->backref;
2735                 trans->assertions |= ASSERT_BACKREF;
2736               }
2737             if (p1->bracket_match_list != NULL)
2738               {
2739                 trans->u.bracket_match_list =
2740                   xmalloc(SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2741                 if (trans->u.bracket_match_list == NULL)
2742                   return REG_ESPACE;
2743                 memcpy(trans->u.bracket_match_list, p1->bracket_match_list,
2744                        SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2745               }
2746
2747             /* Find out how many tags this transition has. */
2748             i = 0;
2749             if (p1->tags != NULL)
2750               while(p1->tags[i] >= 0)
2751                 i++;
2752             j = 0;
2753             if (p2->tags != NULL)
2754               while(p2->tags[j] >= 0)
2755                 j++;
2756
2757             /* If we are overwriting a transition, free the old tag array. */
2758             if (trans->tags != NULL)
2759               xfree(trans->tags);
2760             trans->tags = NULL;
2761
2762             /* If there were any tags, allocate an array and fill it. */
2763             if (i + j > 0)
2764               {
2765                 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2766                 if (!trans->tags)
2767                   return REG_ESPACE;
2768                 i = 0;
2769                 if (p1->tags != NULL)
2770                   while(p1->tags[i] >= 0)
2771                     {
2772                       trans->tags[i] = p1->tags[i];
2773                       i++;
2774                     }
2775                 l = i;
2776                 j = 0;
2777                 if (p2->tags != NULL)
2778                   while (p2->tags[j] >= 0)
2779                     {
2780                       /* Don't add duplicates. */
2781                       dup = 0;
2782                       for (k = 0; k < i; k++)
2783                         if (trans->tags[k] == p2->tags[j])
2784                           {
2785                             dup = 1;
2786                             break;
2787                           }
2788                       if (!dup)
2789                         trans->tags[l++] = p2->tags[j];
2790                       j++;
2791                     }
2792                 trans->tags[l] = -1;
2793               }
2794
2795             /* Set the parameter array.  If both `p2' and `p1' have same
2796                parameters, the values in `p2' override those in `p1'. */
2797             if (p1->params || p2->params)
2798               {
2799                 if (!trans->params)
2800                   trans->params = xmalloc(sizeof(*trans->params)
2801                                           * TRE_PARAM_LAST);
2802                 if (!trans->params)
2803                   return REG_ESPACE;
2804                 for (i = 0; i < TRE_PARAM_LAST; i++)
2805                   {
2806                     trans->params[i] = TRE_PARAM_UNSET;
2807                     if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
2808                       trans->params[i] = p1->params[i];
2809                     if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
2810                       trans->params[i] = p2->params[i];
2811                   }
2812               }
2813             else
2814               {
2815                 if (trans->params)
2816                   xfree(trans->params);
2817                 trans->params = NULL;
2818               }
2819
2820
2821 #ifdef TRE_DEBUG
2822             {
2823               int *tags;
2824
2825               DPRINT(("  %2d -> %2d on %3d", p1->position, p2->position,
2826                       p1->code_min));
2827               if (p1->code_max != p1->code_min)
2828                 DPRINT(("-%3d", p1->code_max));
2829               tags = trans->tags;
2830               if (tags)
2831                 {
2832                   DPRINT((", tags ["));
2833                   while (*tags >= 0)
2834                     {
2835                       DPRINT(("%d", *tags));
2836                       tags++;
2837                       if (*tags >= 0)
2838                         DPRINT((","));
2839                     }
2840                   DPRINT(("]"));
2841                 }
2842               if (trans->assertions)
2843                 DPRINT((", assert %d", trans->assertions));
2844               if (trans->assertions & ASSERT_BACKREF)
2845                 DPRINT((", backref %d", trans->u.backref));
2846               else if (trans->assertions & ASSERT_BRACKET_MATCH)
2847                 DPRINT((", bracket_match_list %p",
2848                         trans->u.bracket_match_list));
2849               if (trans->params)
2850                 {
2851                   DPRINT((", "));
2852                   tre_print_params(trans->params);
2853                 }
2854               DPRINT(("\n"));
2855             }
2856 #endif /* TRE_DEBUG */
2857             p2++;
2858           }
2859         p1++;
2860       }
2861   else
2862     /* Compute a maximum limit for the number of transitions leaving
2863        from each state. */
2864     while (p1->position >= 0)
2865       {
2866         p2 = orig_p2;
2867         while (p2->position >= 0)
2868           {
2869             counts[p1->position]++;
2870             p2++;
2871           }
2872         p1++;
2873       }
2874   return REG_OK;
2875 }
2876
2877 /* Converts the syntax tree to a TNFA.  All the transitions in the TNFA are
2878    labelled with one character range (there are no transitions on empty
2879    strings).  The TNFA takes O(n^2) space in the worst case, `n' is size of
2880    the regexp. */
2881 static reg_errcode_t
2882 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2883                 int *counts, int *offs)
2884 {
2885   tre_union_t *uni;
2886   tre_catenation_t *cat;
2887   tre_iteration_t *iter;
2888   reg_errcode_t errcode = REG_OK;
2889
2890   /* XXX - recurse using a stack!. */
2891   switch (node->type)
2892     {
2893     case LITERAL:
2894       break;
2895     case UNION:
2896       uni = (tre_union_t *)node->obj;
2897       errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2898       if (errcode != REG_OK)
2899         return errcode;
2900       errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2901       break;
2902
2903     case CATENATION:
2904       cat = (tre_catenation_t *)node->obj;
2905       /* Add a transition from each position in cat->left->lastpos
2906          to each position in cat->right->firstpos. */
2907       errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2908                                transitions, counts, offs);
2909       if (errcode != REG_OK)
2910         return errcode;
2911       errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2912       if (errcode != REG_OK)
2913         return errcode;
2914       errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2915       break;
2916
2917     case ITERATION:
2918       iter = (tre_iteration_t *)node->obj;
2919       assert(iter->max == -1 || iter->max == 1);
2920
2921       if (iter->max == -1)
2922         {
2923           assert(iter->min == 0 || iter->min == 1);
2924           /* Add a transition from each last position in the iterated
2925              expression to each first position. */
2926           errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2927                                    transitions, counts, offs);
2928           if (errcode != REG_OK)
2929             return errcode;
2930         }
2931       errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2932       break;
2933     }
2934   return errcode;
2935 }
2936
2937
2938 #define ERROR_EXIT(err)           \
2939   do                              \
2940     {                             \
2941       errcode = err;              \
2942       if (/*CONSTCOND*/1)         \
2943         goto error_exit;          \
2944     }                             \
2945  while (/*CONSTCOND*/0)
2946
2947
2948 int
2949 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags,
2950             locale_t loc)
2951 {
2952   tre_stack_t *stack;
2953   tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2954   tre_pos_and_tags_t *p;
2955   int *counts = NULL, *offs = NULL;
2956   int i, add = 0;
2957   tre_tnfa_transition_t *transitions, *initial;
2958   tre_tnfa_t *tnfa = NULL;
2959   tre_submatch_data_t *submatch_data = NULL;
2960   tre_tag_direction_t *tag_directions = NULL;
2961   reg_errcode_t errcode;
2962   tre_mem_t mem;
2963
2964   /* Parse context. */
2965   tre_parse_ctx_t parse_ctx;
2966
2967   /* Allocate a stack used throughout the compilation process for various
2968      purposes. */
2969   stack = tre_stack_new(512, 10240, 128);
2970   if (!stack)
2971     return REG_ESPACE;
2972   /* Allocate a fast memory allocator. */
2973   mem = tre_mem_new();
2974   if (!mem)
2975     {
2976       tre_stack_destroy(stack);
2977       return REG_ESPACE;
2978     }
2979
2980   /* Parse the regexp. */
2981   memset(&parse_ctx, 0, sizeof(parse_ctx));
2982   parse_ctx.mem = mem;
2983   parse_ctx.stack = stack;
2984   parse_ctx.re = regex;
2985   parse_ctx.len = n;
2986   /* Only allow REG_UNGREEDY to be set if both REG_ENHANCED and REG_EXTENDED
2987      are also set */
2988   if ((cflags & (REG_ENHANCED | REG_EXTENDED)) != (REG_ENHANCED | REG_EXTENDED))
2989     cflags &= ~REG_UNGREEDY;
2990   parse_ctx.cflags = cflags;
2991   parse_ctx.max_backref = -1;
2992   parse_ctx.loc = loc;
2993   parse_ctx.submatch_id_invisible = SUBMATCH_ID_INVISIBLE_START;
2994
2995   DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
2996   errcode = tre_parse(&parse_ctx);
2997   if (errcode != REG_OK)
2998     ERROR_EXIT(errcode);
2999   preg->re_nsub = parse_ctx.submatch_id - 1;
3000   tree = parse_ctx.result;
3001
3002   /* Back references and approximate matching cannot currently be used
3003      in the same regexp. */
3004   if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
3005     ERROR_EXIT(REG_BADPAT);
3006
3007 #ifdef TRE_DEBUG
3008   tre_ast_print(tree);
3009 #endif /* TRE_DEBUG */
3010
3011   /* Referring to nonexistent subexpressions is illegal. */
3012   if (parse_ctx.max_backref > (int)preg->re_nsub)
3013     ERROR_EXIT(REG_ESUBREG);
3014
3015   /* Allocate the TNFA struct. */
3016   tnfa = xcalloc(1, sizeof(tre_tnfa_t));
3017   if (tnfa == NULL)
3018     ERROR_EXIT(REG_ESPACE);
3019   tnfa->have_backrefs = parse_ctx.max_backref >= 0;
3020   tnfa->have_approx = parse_ctx.have_approx;
3021   tnfa->num_submatches = parse_ctx.submatch_id;
3022   tnfa->num_submatches_invisible = parse_ctx.submatch_id_invisible
3023                                    - SUBMATCH_ID_INVISIBLE_START;
3024   tnfa->num_reorder_tags = parse_ctx.num_reorder_tags;
3025   tnfa->loc = parse_ctx.loc;
3026
3027   /* Set up tags for submatch addressing.  If REG_NOSUB is set and the
3028      regexp does not have back references, this can be skipped. */
3029   if (tnfa->num_reorder_tags > 0 || !(cflags & REG_NOSUB))
3030     {
3031       DPRINT(("tre_compile: setting up tags\n"));
3032
3033       /* Figure out how many tags we will need. */
3034       errcode = tre_add_tags(NULL, stack, tree, tnfa);
3035       if (errcode != REG_OK)
3036         ERROR_EXIT(errcode);
3037 #ifdef TRE_DEBUG
3038       tre_ast_print(tree);
3039 #endif /* TRE_DEBUG */
3040
3041       if (tnfa->num_tags > 0)
3042         {
3043           tag_directions = xmalloc(sizeof(*tag_directions)
3044                                    * (tnfa->num_tags + 1));
3045           if (tag_directions == NULL)
3046             ERROR_EXIT(REG_ESPACE);
3047           tnfa->tag_directions = tag_directions;
3048           memset(tag_directions, -1,
3049                  sizeof(*tag_directions) * (tnfa->num_tags + 1));
3050         }
3051       tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 3,
3052                                    sizeof(tnfa->minimal_tags));
3053       if (tnfa->minimal_tags == NULL)
3054         ERROR_EXIT(REG_ESPACE);
3055
3056       submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
3057                               sizeof(*submatch_data));
3058       if (submatch_data == NULL)
3059         ERROR_EXIT(REG_ESPACE);
3060       /* Set the eo_tag value to -1 to indicate that that corresponding
3061        * submatch has not be completed yet */
3062       for (i = 0; i < parse_ctx.submatch_id; i++)
3063         {
3064           submatch_data[i].eo_tag = -1;
3065         }
3066       tnfa->submatch_data = submatch_data;
3067
3068       errcode = tre_add_tags(mem, stack, tree, tnfa);
3069       if (errcode != REG_OK)
3070         ERROR_EXIT(errcode);
3071
3072 #ifdef TRE_DEBUG
3073       for (i = 0; i < parse_ctx.submatch_id; i++)
3074         DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3075                 i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3076       for (i = 0; i <= tnfa->num_tags; i++)
3077         DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
3078 #endif /* TRE_DEBUG */
3079     }
3080
3081   /* Expand iteration nodes. */
3082   errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
3083                            tag_directions, &tnfa->params_depth);
3084   if (errcode != REG_OK)
3085     ERROR_EXIT(errcode);
3086
3087   /* Add a dummy node for the final state.
3088      XXX - For certain patterns this dummy node can be optimized away,
3089            for example "a*" or "ab*".   Figure out a simple way to detect
3090            this possibility. */
3091   tmp_ast_l = tree;
3092   tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
3093   if (tmp_ast_r == NULL)
3094     ERROR_EXIT(REG_ESPACE);
3095
3096   tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
3097   if (tree == NULL)
3098     ERROR_EXIT(REG_ESPACE);
3099
3100 #ifdef TRE_DEBUG
3101   tre_ast_print(tree);
3102   DPRINT(("Number of states: %d\n", parse_ctx.position));
3103   if (submatch_data)
3104     for (i = 0; i < parse_ctx.submatch_id; i++)
3105       DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3106               i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3107   if (tag_directions)
3108     for (i = 0; i <= tnfa->num_tags; i++)
3109       DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
3110 #endif /* TRE_DEBUG */
3111
3112   errcode = tre_compute_nfl(mem, stack, tree);
3113   if (errcode != REG_OK)
3114     ERROR_EXIT(errcode);
3115
3116   counts = xmalloc(sizeof(int) * parse_ctx.position);
3117   if (counts == NULL)
3118     ERROR_EXIT(REG_ESPACE);
3119
3120   offs = xmalloc(sizeof(int) * parse_ctx.position);
3121   if (offs == NULL)
3122     ERROR_EXIT(REG_ESPACE);
3123
3124   for (i = 0; i < parse_ctx.position; i++)
3125     counts[i] = 0;
3126   tre_ast_to_tnfa(tree, NULL, counts, NULL);
3127
3128   add = 0;
3129   for (i = 0; i < parse_ctx.position; i++)
3130     {
3131       offs[i] = add;
3132       add += counts[i] + 1;
3133       counts[i] = 0;
3134     }
3135   transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
3136   if (transitions == NULL)
3137     ERROR_EXIT(REG_ESPACE);
3138   tnfa->transitions = transitions;
3139   tnfa->num_transitions = add;
3140
3141   DPRINT(("Converting to TNFA:\n"));
3142   errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
3143   if (errcode != REG_OK)
3144     ERROR_EXIT(errcode);
3145
3146
3147   /* Set first_char only if there is only one character that can be the
3148      first character of a match */
3149   tnfa->first_char = -1;
3150   if (!tmp_ast_l->nullable)
3151     {
3152       int scanning = 1;
3153       for (p = tree->firstpos; scanning && p->position >= 0; p++)
3154         {
3155           tre_tnfa_transition_t *j = transitions + offs[p->position];
3156           while (j->state != NULL)
3157             {
3158               if (j->code_min <= j->code_max)
3159                 {
3160                   if (j->code_max != j->code_min || j->code_min == -1 || tnfa->first_char != -1)
3161                     {
3162                       tnfa->first_char = -1;
3163                       scanning = 0;
3164                       break;
3165                     }
3166                   tnfa->first_char = j->code_min;
3167                 }
3168               j++;
3169             }
3170         }
3171 #ifdef TRE_DEBUG
3172       if (tnfa->first_char >= 0)
3173         DPRINT(("first char must be %d\n", tnfa->first_char));
3174 #endif /* TRE_DEBUG */
3175     }
3176
3177   p = tree->firstpos;
3178   i = 0;
3179   while (p->position >= 0)
3180     {
3181       i++;
3182
3183 #ifdef TRE_DEBUG
3184       {
3185         int *tags;
3186         DPRINT(("initial: %d", p->position));
3187         tags = p->tags;
3188         if (tags != NULL)
3189           {
3190             if (*tags >= 0)
3191               DPRINT(("/"));
3192             while (*tags >= 0)
3193               {
3194                 DPRINT(("%d", *tags));
3195                 tags++;
3196                 if (*tags >= 0)
3197                   DPRINT((","));
3198               }
3199           }
3200         DPRINT((", assert %d", p->assertions));
3201         if (p->params)
3202           {
3203             DPRINT((", "));
3204             tre_print_params(p->params);
3205           }
3206         DPRINT(("\n"));
3207       }
3208 #endif /* TRE_DEBUG */
3209
3210       p++;
3211     }
3212
3213   initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
3214   if (initial == NULL)
3215     ERROR_EXIT(REG_ESPACE);
3216   tnfa->initial = initial;
3217
3218   i = 0;
3219   for (p = tree->firstpos; p->position >= 0; p++)
3220     {
3221       initial[i].state = transitions + offs[p->position];
3222       initial[i].state_id = p->position;
3223       initial[i].tags = NULL;
3224       /* Copy the arrays p->tags, and p->params, they are allocated
3225          from a tre_mem object. */
3226       if (p->tags)
3227         {
3228           int j;
3229           for (j = 0; p->tags[j] >= 0; j++);
3230           initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
3231           if (!initial[i].tags)
3232             ERROR_EXIT(REG_ESPACE);
3233           memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
3234         }
3235       initial[i].params = NULL;
3236       if (p->params)
3237         {
3238           initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
3239           if (!initial[i].params)
3240             ERROR_EXIT(REG_ESPACE);
3241           memcpy(initial[i].params, p->params,
3242                  sizeof(*p->params) * TRE_PARAM_LAST);
3243         }
3244       initial[i].assertions = p->assertions;
3245       i++;
3246     }
3247   initial[i].state = NULL;
3248
3249   tnfa->num_transitions = add;
3250   tnfa->final = transitions + offs[tree->lastpos[0].position];
3251   tnfa->num_states = parse_ctx.position;
3252   tnfa->cflags = cflags;
3253
3254   DPRINT(("final state %d (%p)\n", tree->lastpos[0].position,
3255           (void *)tnfa->final));
3256
3257   tre_mem_destroy(mem);
3258   tre_stack_destroy(stack);
3259   xfree(counts);
3260   xfree(offs);
3261
3262 #ifdef TRE_USE_SYSTEM_REGEX_H
3263   preg->re_magic = RE_MAGIC;
3264 #endif /* TRE_USE_SYSTEM_REGEX_H */
3265   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3266   xlocale_retain(tnfa->loc);
3267   return REG_OK;
3268
3269  error_exit:
3270   /* Free everything that was allocated and return the error code. */
3271   tre_mem_destroy(mem);
3272   if (stack != NULL)
3273     tre_stack_destroy(stack);
3274   if (counts != NULL)
3275     xfree(counts);
3276   if (offs != NULL)
3277     xfree(offs);
3278
3279   /* Set tnfa into preg, so that calling tre_free() will free the contents
3280      of tnfa.  NULL out the loc field since we never retained the locale. */
3281   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3282   if(tnfa) tnfa->loc = NULL;
3283
3284   tre_free(preg);
3285   return errcode;
3286 }
3287
3288
3289
3290
3291 void
3292 tre_free(regex_t *preg)
3293 {
3294   tre_tnfa_t *tnfa;
3295   unsigned int i;
3296   tre_tnfa_transition_t *trans;
3297
3298 #ifdef TRE_USE_SYSTEM_REGEX_H
3299   preg->re_magic = 0;
3300 #endif /* TRE_USE_SYSTEM_REGEX_H */
3301   tnfa = (void *)preg->TRE_REGEX_T_FIELD;
3302   if (!tnfa)
3303     return;
3304   preg->TRE_REGEX_T_FIELD = NULL;
3305
3306   for (i = 0; i < tnfa->num_transitions; i++)
3307     if (tnfa->transitions[i].state)
3308       {
3309         if (tnfa->transitions[i].tags)
3310           xfree(tnfa->transitions[i].tags);
3311         if (tnfa->transitions[i].assertions & ASSERT_BRACKET_MATCH)
3312           xfree(tnfa->transitions[i].u.bracket_match_list);
3313         if (tnfa->transitions[i].params)
3314           xfree(tnfa->transitions[i].params);
3315       }
3316   if (tnfa->transitions)
3317     xfree(tnfa->transitions);
3318
3319   if (tnfa->initial)
3320     {
3321       for (trans = tnfa->initial; trans->state; trans++)
3322         {
3323           if (trans->tags)
3324             xfree(trans->tags);
3325           if (trans->params)
3326             xfree(trans->params);
3327         }
3328       xfree(tnfa->initial);
3329     }
3330
3331   if (tnfa->submatch_data)
3332     {
3333       xfree(tnfa->submatch_data);
3334     }
3335
3336   if (tnfa->tag_directions)
3337     xfree(tnfa->tag_directions);
3338   if (tnfa->minimal_tags)
3339     xfree(tnfa->minimal_tags);
3340
3341   if (tnfa->loc)
3342     xlocale_release(tnfa->loc);
3343
3344   if (tnfa->last_matched_branch)
3345     xfree(tnfa->last_matched_branch);
3346
3347   xfree(tnfa);
3348 }
3349
3350 char *
3351 tre_version(void)
3352 {
3353   static char str[256];
3354   char *version;
3355
3356   if (str[0] == 0)
3357     {
3358       (void) tre_config(TRE_CONFIG_VERSION, &version);
3359       (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
3360     }
3361   return str;
3362 }
3363
3364 int
3365 tre_config(int query, void *result)
3366 {
3367   int *int_result = result;
3368   const char **string_result = result;
3369
3370   switch (query)
3371     {
3372     case TRE_CONFIG_APPROX:
3373 #ifdef TRE_APPROX
3374       *int_result = 1;
3375 #else /* !TRE_APPROX */
3376       *int_result = 0;
3377 #endif /* !TRE_APPROX */
3378       return REG_OK;
3379
3380     case TRE_CONFIG_WCHAR:
3381 #ifdef TRE_WCHAR
3382       *int_result = 1;
3383 #else /* !TRE_WCHAR */
3384       *int_result = 0;
3385 #endif /* !TRE_WCHAR */
3386       return REG_OK;
3387
3388     case TRE_CONFIG_MULTIBYTE:
3389 #ifdef TRE_MULTIBYTE
3390       *int_result = 1;
3391 #else /* !TRE_MULTIBYTE */
3392       *int_result = 0;
3393 #endif /* !TRE_MULTIBYTE */
3394       return REG_OK;
3395
3396     case TRE_CONFIG_SYSTEM_ABI:
3397 #ifdef TRE_CONFIG_SYSTEM_ABI
3398       *int_result = 1;
3399 #else /* !TRE_CONFIG_SYSTEM_ABI */
3400       *int_result = 0;
3401 #endif /* !TRE_CONFIG_SYSTEM_ABI */
3402       return REG_OK;
3403
3404     case TRE_CONFIG_VERSION:
3405       *string_result = TRE_VERSION;
3406       return REG_OK;
3407     }
3408
3409   return REG_NOMATCH;
3410 }
3411
3412
3413 /* EOF */