948cc469895e6f2dae7a5cd056f300fd3e55ded4
[dragonfly.git] / contrib / tre / lib / tre-compile.c
1 /*
2   tre-compile.c - TRE regex compiler
3
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6
7 */
8
9 /*
10   TODO:
11    - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
12      function calls.
13 */
14
15
16 #ifdef HAVE_CONFIG_H
17 #include <config.h>
18 #endif /* HAVE_CONFIG_H */
19 #include <stdio.h>
20 #include <assert.h>
21 #include <string.h>
22
23 #include "tre-internal.h"
24 #include "tre-mem.h"
25 #include "tre-stack.h"
26 #include "tre-ast.h"
27 #include "tre-parse.h"
28 #include "tre-compile.h"
29 #include "tre.h"
30 #include "xmalloc.h"
31
32 /*
33   Algorithms to setup tags so that submatch addressing can be done.
34 */
35
36
37 /* Inserts a catenation node to the root of the tree given in `node'.
38    As the left child a new tag with number `tag_id' to `node' is added,
39    and the right child is the old root. */
40 static reg_errcode_t
41 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
42 {
43   tre_catenation_t *c;
44
45   DPRINT(("add_tag_left: tag %d\n", tag_id));
46
47   c = tre_mem_alloc(mem, sizeof(*c));
48   if (c == NULL)
49     return REG_ESPACE;
50   c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
51   if (c->left == NULL)
52     return REG_ESPACE;
53   c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
54   if (c->right == NULL)
55     return REG_ESPACE;
56
57   c->right->obj = node->obj;
58   c->right->type = node->type;
59   c->right->nullable = -1;
60   c->right->submatch_id = -1;
61   c->right->firstpos = NULL;
62   c->right->lastpos = NULL;
63   c->right->num_tags = 0;
64   node->obj = c;
65   node->type = CATENATION;
66   return REG_OK;
67 }
68
69 /* Inserts a catenation node to the root of the tree given in `node'.
70    As the right child a new tag with number `tag_id' to `node' is added,
71    and the left child is the old root. */
72 static reg_errcode_t
73 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
74 {
75   tre_catenation_t *c;
76
77   DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
78
79   c = tre_mem_alloc(mem, sizeof(*c));
80   if (c == NULL)
81     return REG_ESPACE;
82   c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
83   if (c->right == NULL)
84     return REG_ESPACE;
85   c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
86   if (c->left == NULL)
87     return REG_ESPACE;
88
89   c->left->obj = node->obj;
90   c->left->type = node->type;
91   c->left->nullable = -1;
92   c->left->submatch_id = -1;
93   c->left->firstpos = NULL;
94   c->left->lastpos = NULL;
95   c->left->num_tags = 0;
96   node->obj = c;
97   node->type = CATENATION;
98   return REG_OK;
99 }
100
101 typedef enum {
102   ADDTAGS_RECURSE,
103   ADDTAGS_AFTER_ITERATION,
104   ADDTAGS_AFTER_UNION_LEFT,
105   ADDTAGS_AFTER_UNION_RIGHT,
106   ADDTAGS_AFTER_CAT_LEFT,
107   ADDTAGS_AFTER_CAT_RIGHT,
108   ADDTAGS_SET_SUBMATCH_END
109 } tre_addtags_symbol_t;
110
111
112 typedef struct {
113   int tag;
114   int next_tag;
115 } tre_tag_states_t;
116
117
118 /* Go through `regset' and set submatch data for submatches that are
119    using this tag. */
120 static void
121 tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
122 {
123   int i;
124
125   for (i = 0; regset[i] >= 0; i++)
126     {
127       int id = regset[i] / 2;
128       int start = !(regset[i] % 2);
129       DPRINT(("  Using tag %d for %s offset of "
130               "submatch %d\n", tag,
131               start ? "start" : "end", id));
132       if (start)
133         tnfa->submatch_data[id].so_tag = tag;
134       else
135         tnfa->submatch_data[id].eo_tag = tag;
136     }
137   regset[0] = -1;
138 }
139
140
141 /* Adds tags to appropriate locations in the parse tree in `tree', so that
142    subexpressions marked for submatch addressing can be traced. */
143 static reg_errcode_t
144 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
145              tre_tnfa_t *tnfa)
146 {
147   reg_errcode_t status = REG_OK;
148   tre_addtags_symbol_t symbol;
149   tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
150   int bottom = tre_stack_num_objects(stack);
151   /* True for first pass (counting number of needed tags) */
152   int first_pass = (mem == NULL || tnfa == NULL);
153   int *regset, *orig_regset;
154   int num_tags = 0; /* Total number of tags. */
155   int num_minimals = 0;  /* Number of special minimal tags. */
156   int tag = 0;      /* The tag that is to be added next. */
157   int next_tag = 1; /* Next tag to use after this one. */
158   int *parents;     /* Stack of submatches the current submatch is
159                        contained in. */
160   int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
161   tre_tag_states_t *saved_states;
162
163   tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
164   if (!first_pass)
165     {
166       tnfa->end_tag = 0;
167       tnfa->minimal_tags[0] = -1;
168     }
169
170   regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
171   if (regset == NULL)
172     return REG_ESPACE;
173   regset[0] = -1;
174   orig_regset = regset;
175
176   parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
177   if (parents == NULL)
178     {
179       xfree(regset);
180       return REG_ESPACE;
181     }
182   parents[0] = -1;
183
184   saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
185   if (saved_states == NULL)
186     {
187       xfree(regset);
188       xfree(parents);
189       return REG_ESPACE;
190     }
191   else
192     {
193       unsigned int i;
194       for (i = 0; i <= tnfa->num_submatches; i++)
195         saved_states[i].tag = -1;
196     }
197
198   STACK_PUSH(stack, voidptr, node);
199   STACK_PUSH(stack, int, ADDTAGS_RECURSE);
200
201   while (tre_stack_num_objects(stack) > bottom)
202     {
203       if (status != REG_OK)
204         break;
205
206       symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
207       switch (symbol)
208         {
209
210         case ADDTAGS_SET_SUBMATCH_END:
211           {
212             int id = tre_stack_pop_int(stack);
213             int i;
214
215             /* Add end of this submatch to regset. */
216             for (i = 0; regset[i] >= 0; i++);
217             regset[i] = id * 2 + 1;
218             regset[i + 1] = -1;
219
220             /* Pop this submatch from the parents stack. */
221             for (i = 0; parents[i] >= 0; i++);
222             parents[i - 1] = -1;
223             break;
224           }
225
226         case ADDTAGS_RECURSE:
227           node = tre_stack_pop_voidptr(stack);
228
229           if (node->submatch_id >= 0)
230             {
231               int id = node->submatch_id;
232               int i;
233
234
235               /* Add start of this submatch to regset. */
236               for (i = 0; regset[i] >= 0; i++);
237               regset[i] = id * 2;
238               regset[i + 1] = -1;
239
240               if (!first_pass)
241                 {
242                   for (i = 0; parents[i] >= 0; i++);
243                   tnfa->submatch_data[id].parents = NULL;
244                   if (i > 0)
245                     {
246                       int *p = xmalloc(sizeof(*p) * (i + 1));
247                       if (p == NULL)
248                         {
249                           status = REG_ESPACE;
250                           break;
251                         }
252                       assert(tnfa->submatch_data[id].parents == NULL);
253                       tnfa->submatch_data[id].parents = p;
254                       for (i = 0; parents[i] >= 0; i++)
255                         p[i] = parents[i];
256                       p[i] = -1;
257                     }
258                 }
259
260               /* Add end of this submatch to regset after processing this
261                  node. */
262               STACK_PUSHX(stack, int, node->submatch_id);
263               STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
264             }
265
266           switch (node->type)
267             {
268             case LITERAL:
269               {
270                 tre_literal_t *lit = node->obj;
271
272                 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
273                   {
274                     int i;
275                     DPRINT(("Literal %d-%d\n",
276                             (int)lit->code_min, (int)lit->code_max));
277                     if (regset[0] >= 0)
278                       {
279                         /* Regset is not empty, so add a tag before the
280                            literal or backref. */
281                         if (!first_pass)
282                           {
283                             status = tre_add_tag_left(mem, node, tag);
284                             tnfa->tag_directions[tag] = direction;
285                             if (minimal_tag >= 0)
286                               {
287                                 DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
288                                 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
289                                 tnfa->minimal_tags[i] = tag;
290                                 tnfa->minimal_tags[i + 1] = minimal_tag;
291                                 tnfa->minimal_tags[i + 2] = -1;
292                                 minimal_tag = -1;
293                                 num_minimals++;
294                               }
295                             tre_purge_regset(regset, tnfa, tag);
296                           }
297                         else
298                           {
299                             DPRINT(("  num_tags = 1\n"));
300                             node->num_tags = 1;
301                           }
302
303                         DPRINT(("  num_tags++\n"));
304                         regset[0] = -1;
305                         tag = next_tag;
306                         num_tags++;
307                         next_tag++;
308                       }
309                   }
310                 else
311                   {
312                     assert(!IS_TAG(lit));
313                   }
314                 break;
315               }
316             case CATENATION:
317               {
318                 tre_catenation_t *cat = node->obj;
319                 tre_ast_node_t *left = cat->left;
320                 tre_ast_node_t *right = cat->right;
321                 int reserved_tag = -1;
322                 DPRINT(("Catenation, next_tag = %d\n", next_tag));
323
324
325                 /* After processing right child. */
326                 STACK_PUSHX(stack, voidptr, node);
327                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
328
329                 /* Process right child. */
330                 STACK_PUSHX(stack, voidptr, right);
331                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
332
333                 /* After processing left child. */
334                 STACK_PUSHX(stack, int, next_tag + left->num_tags);
335                 DPRINT(("  Pushing %d for after left\n",
336                         next_tag + left->num_tags));
337                 if (left->num_tags > 0 && right->num_tags > 0)
338                   {
339                     /* Reserve the next tag to the right child. */
340                     DPRINT(("  Reserving next_tag %d to right child\n",
341                             next_tag));
342                     reserved_tag = next_tag;
343                     next_tag++;
344                   }
345                 STACK_PUSHX(stack, int, reserved_tag);
346                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
347
348                 /* Process left child. */
349                 STACK_PUSHX(stack, voidptr, left);
350                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
351
352                 }
353               break;
354             case ITERATION:
355               {
356                 tre_iteration_t *iter = node->obj;
357                 DPRINT(("Iteration\n"));
358
359                 if (first_pass)
360                   {
361                     STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
362                   }
363                 else
364                   {
365                     STACK_PUSHX(stack, int, tag);
366                     STACK_PUSHX(stack, int, iter->minimal);
367                   }
368                 STACK_PUSHX(stack, voidptr, node);
369                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
370
371                 STACK_PUSHX(stack, voidptr, iter->arg);
372                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
373
374                 /* Regset is not empty, so add a tag here. */
375                 if (regset[0] >= 0 || iter->minimal)
376                   {
377                     if (!first_pass)
378                       {
379                         int i;
380                         status = tre_add_tag_left(mem, node, tag);
381                         if (iter->minimal)
382                           tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
383                         else
384                           tnfa->tag_directions[tag] = direction;
385                         if (minimal_tag >= 0)
386                           {
387                             DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
388                             for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
389                             tnfa->minimal_tags[i] = tag;
390                             tnfa->minimal_tags[i + 1] = minimal_tag;
391                             tnfa->minimal_tags[i + 2] = -1;
392                             minimal_tag = -1;
393                             num_minimals++;
394                           }
395                         tre_purge_regset(regset, tnfa, tag);
396                       }
397
398                     DPRINT(("  num_tags++\n"));
399                     regset[0] = -1;
400                     tag = next_tag;
401                     num_tags++;
402                     next_tag++;
403                   }
404                 direction = TRE_TAG_MINIMIZE;
405               }
406               break;
407             case UNION:
408               {
409                 tre_union_t *uni = node->obj;
410                 tre_ast_node_t *left = uni->left;
411                 tre_ast_node_t *right = uni->right;
412                 int left_tag;
413                 int right_tag;
414
415                 if (regset[0] >= 0)
416                   {
417                     left_tag = next_tag;
418                     right_tag = next_tag + 1;
419                   }
420                 else
421                   {
422                     left_tag = tag;
423                     right_tag = next_tag;
424                   }
425
426                 DPRINT(("Union\n"));
427
428                 /* After processing right child. */
429                 STACK_PUSHX(stack, int, right_tag);
430                 STACK_PUSHX(stack, int, left_tag);
431                 STACK_PUSHX(stack, voidptr, regset);
432                 STACK_PUSHX(stack, int, regset[0] >= 0);
433                 STACK_PUSHX(stack, voidptr, node);
434                 STACK_PUSHX(stack, voidptr, right);
435                 STACK_PUSHX(stack, voidptr, left);
436                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
437
438                 /* Process right child. */
439                 STACK_PUSHX(stack, voidptr, right);
440                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
441
442                 /* After processing left child. */
443                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
444
445                 /* Process left child. */
446                 STACK_PUSHX(stack, voidptr, left);
447                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
448
449                 /* Regset is not empty, so add a tag here. */
450                 if (regset[0] >= 0)
451                   {
452                     if (!first_pass)
453                       {
454                         int i;
455                         status = tre_add_tag_left(mem, node, tag);
456                         tnfa->tag_directions[tag] = direction;
457                         if (minimal_tag >= 0)
458                           {
459                             DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
460                             for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
461                             tnfa->minimal_tags[i] = tag;
462                             tnfa->minimal_tags[i + 1] = minimal_tag;
463                             tnfa->minimal_tags[i + 2] = -1;
464                             minimal_tag = -1;
465                             num_minimals++;
466                           }
467                         tre_purge_regset(regset, tnfa, tag);
468                       }
469
470                     DPRINT(("  num_tags++\n"));
471                     regset[0] = -1;
472                     tag = next_tag;
473                     num_tags++;
474                     next_tag++;
475                   }
476
477                 if (node->num_submatches > 0)
478                   {
479                     /* The next two tags are reserved for markers. */
480                     next_tag++;
481                     tag = next_tag;
482                     next_tag++;
483                   }
484
485                 break;
486               }
487             }
488
489           if (node->submatch_id >= 0)
490             {
491               int i;
492               /* Push this submatch on the parents stack. */
493               for (i = 0; parents[i] >= 0; i++);
494               parents[i] = node->submatch_id;
495               parents[i + 1] = -1;
496             }
497
498           break; /* end case: ADDTAGS_RECURSE */
499
500         case ADDTAGS_AFTER_ITERATION:
501           {
502             int minimal = 0;
503             int enter_tag;
504             node = tre_stack_pop_voidptr(stack);
505             if (first_pass)
506               {
507                 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
508                   + tre_stack_pop_int(stack);
509                 minimal_tag = -1;
510               }
511             else
512               {
513                 minimal = tre_stack_pop_int(stack);
514                 enter_tag = tre_stack_pop_int(stack);
515                 if (minimal)
516                   minimal_tag = enter_tag;
517               }
518
519             DPRINT(("After iteration\n"));
520             if (!first_pass)
521               {
522                 DPRINT(("  Setting direction to %s\n",
523                         minimal ? "minimize" : "maximize"));
524                 if (minimal)
525                   direction = TRE_TAG_MINIMIZE;
526                 else
527                   direction = TRE_TAG_MAXIMIZE;
528               }
529             break;
530           }
531
532         case ADDTAGS_AFTER_CAT_LEFT:
533           {
534             int new_tag = tre_stack_pop_int(stack);
535             next_tag = tre_stack_pop_int(stack);
536             DPRINT(("After cat left, tag = %d, next_tag = %d\n",
537                     tag, next_tag));
538             if (new_tag >= 0)
539               {
540                 DPRINT(("  Setting tag to %d\n", new_tag));
541                 tag = new_tag;
542               }
543             break;
544           }
545
546         case ADDTAGS_AFTER_CAT_RIGHT:
547           DPRINT(("After cat right\n"));
548           node = tre_stack_pop_voidptr(stack);
549           if (first_pass)
550             node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
551               + ((tre_catenation_t *)node->obj)->right->num_tags;
552           break;
553
554         case ADDTAGS_AFTER_UNION_LEFT:
555           DPRINT(("After union left\n"));
556           /* Lift the bottom of the `regset' array so that when processing
557              the right operand the items currently in the array are
558              invisible.  The original bottom was saved at ADDTAGS_UNION and
559              will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
560           while (*regset >= 0)
561             regset++;
562           break;
563
564         case ADDTAGS_AFTER_UNION_RIGHT:
565           {
566             int added_tags, tag_left, tag_right;
567             tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
568             tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
569             DPRINT(("After union right\n"));
570             node = tre_stack_pop_voidptr(stack);
571             added_tags = tre_stack_pop_int(stack);
572             if (first_pass)
573               {
574                 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
575                   + ((tre_union_t *)node->obj)->right->num_tags + added_tags
576                   + ((node->num_submatches > 0) ? 2 : 0);
577               }
578             regset = tre_stack_pop_voidptr(stack);
579             tag_left = tre_stack_pop_int(stack);
580             tag_right = tre_stack_pop_int(stack);
581
582             /* Add tags after both children, the left child gets a smaller
583                tag than the right child.  This guarantees that we prefer
584                the left child over the right child. */
585             /* XXX - This is not always necessary (if the children have
586                tags which must be seen for every match of that child). */
587             /* XXX - Check if this is the only place where tre_add_tag_right
588                is used.  If so, use tre_add_tag_left (putting the tag before
589                the child as opposed after the child) and throw away
590                tre_add_tag_right. */
591             if (node->num_submatches > 0)
592               {
593                 if (!first_pass)
594                   {
595                     status = tre_add_tag_right(mem, left, tag_left);
596                     tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
597                     status = tre_add_tag_right(mem, right, tag_right);
598                     tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
599                   }
600                 DPRINT(("  num_tags += 2\n"));
601                 num_tags += 2;
602               }
603             direction = TRE_TAG_MAXIMIZE;
604             break;
605           }
606
607         default:
608           assert(0);
609           break;
610
611         } /* end switch(symbol) */
612     } /* end while(tre_stack_num_objects(stack) > bottom) */
613
614   if (!first_pass)
615     tre_purge_regset(regset, tnfa, tag);
616
617   if (!first_pass && minimal_tag >= 0)
618     {
619       int i;
620       DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
621       for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
622       tnfa->minimal_tags[i] = tag;
623       tnfa->minimal_tags[i + 1] = minimal_tag;
624       tnfa->minimal_tags[i + 2] = -1;
625       minimal_tag = -1;
626       num_minimals++;
627     }
628
629   DPRINT(("tre_add_tags: %s complete.  Number of tags %d.\n",
630           first_pass? "First pass" : "Second pass", num_tags));
631
632   assert(tree->num_tags == num_tags);
633   tnfa->end_tag = num_tags;
634   tnfa->num_tags = num_tags;
635   tnfa->num_minimals = num_minimals;
636   xfree(orig_regset);
637   xfree(parents);
638   xfree(saved_states);
639   return status;
640 }
641
642
643
644 /*
645   AST to TNFA compilation routines.
646 */
647
648 typedef enum {
649   COPY_RECURSE,
650   COPY_SET_RESULT_PTR
651 } tre_copyast_symbol_t;
652
653 /* Flags for tre_copy_ast(). */
654 #define COPY_REMOVE_TAGS         1
655 #define COPY_MAXIMIZE_FIRST_TAG  2
656
657 static reg_errcode_t
658 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
659              int flags, int *pos_add, tre_tag_direction_t *tag_directions,
660              tre_ast_node_t **copy, int *max_pos)
661 {
662   reg_errcode_t status = REG_OK;
663   int bottom = tre_stack_num_objects(stack);
664   int num_copied = 0;
665   int first_tag = 1;
666   tre_ast_node_t **result = copy;
667   tre_copyast_symbol_t symbol;
668
669   STACK_PUSH(stack, voidptr, ast);
670   STACK_PUSH(stack, int, COPY_RECURSE);
671
672   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
673     {
674       tre_ast_node_t *node;
675       if (status != REG_OK)
676         break;
677
678       symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
679       switch (symbol)
680         {
681         case COPY_SET_RESULT_PTR:
682           result = tre_stack_pop_voidptr(stack);
683           break;
684         case COPY_RECURSE:
685           node = tre_stack_pop_voidptr(stack);
686           switch (node->type)
687             {
688             case LITERAL:
689               {
690                 tre_literal_t *lit = node->obj;
691                 int pos = lit->position;
692                 int min = lit->code_min;
693                 int max = lit->code_max;
694                 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
695                   {
696                     /* XXX - e.g. [ab] has only one position but two
697                        nodes, so we are creating holes in the state space
698                        here.  Not fatal, just wastes memory. */
699                     pos += *pos_add;
700                     num_copied++;
701                   }
702                 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
703                   {
704                     /* Change this tag to empty. */
705                     min = EMPTY;
706                     max = pos = -1;
707                   }
708                 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
709                          && first_tag)
710                   {
711                     /* Maximize the first tag. */
712                     tag_directions[max] = TRE_TAG_MAXIMIZE;
713                     first_tag = 0;
714                   }
715                 *result = tre_ast_new_literal(mem, min, max, pos);
716                 if (*result == NULL)
717                   status = REG_ESPACE;
718
719                 if (pos > *max_pos)
720                   *max_pos = pos;
721                 break;
722               }
723             case UNION:
724               {
725                 tre_union_t *uni = node->obj;
726                 tre_union_t *tmp;
727                 *result = tre_ast_new_union(mem, uni->left, uni->right);
728                 if (*result == NULL)
729                   {
730                     status = REG_ESPACE;
731                     break;
732                   }
733                 tmp = (*result)->obj;
734                 result = &tmp->left;
735                 STACK_PUSHX(stack, voidptr, uni->right);
736                 STACK_PUSHX(stack, int, COPY_RECURSE);
737                 STACK_PUSHX(stack, voidptr, &tmp->right);
738                 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
739                 STACK_PUSHX(stack, voidptr, uni->left);
740                 STACK_PUSHX(stack, int, COPY_RECURSE);
741                 break;
742               }
743             case CATENATION:
744               {
745                 tre_catenation_t *cat = node->obj;
746                 tre_catenation_t *tmp;
747                 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
748                 if (*result == NULL)
749                   {
750                     status = REG_ESPACE;
751                     break;
752                   }
753                 tmp = (*result)->obj;
754                 tmp->left = NULL;
755                 tmp->right = NULL;
756                 result = &tmp->left;
757
758                 STACK_PUSHX(stack, voidptr, cat->right);
759                 STACK_PUSHX(stack, int, COPY_RECURSE);
760                 STACK_PUSHX(stack, voidptr, &tmp->right);
761                 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
762                 STACK_PUSHX(stack, voidptr, cat->left);
763                 STACK_PUSHX(stack, int, COPY_RECURSE);
764                 break;
765               }
766             case ITERATION:
767               {
768                 tre_iteration_t *iter = node->obj;
769                 STACK_PUSHX(stack, voidptr, iter->arg);
770                 STACK_PUSHX(stack, int, COPY_RECURSE);
771                 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
772                                            iter->max, iter->minimal);
773                 if (*result == NULL)
774                   {
775                     status = REG_ESPACE;
776                     break;
777                   }
778                 iter = (*result)->obj;
779                 result = &iter->arg;
780                 break;
781               }
782             default:
783               assert(0);
784               break;
785             }
786           break;
787         }
788     }
789   *pos_add += num_copied;
790   return status;
791 }
792
793 typedef enum {
794   EXPAND_RECURSE,
795   EXPAND_AFTER_ITER
796 } tre_expand_ast_symbol_t;
797
798 /* Expands each iteration node that has a finite nonzero minimum or maximum
799    iteration count to a catenated sequence of copies of the node. */
800 static reg_errcode_t
801 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
802                int *position, tre_tag_direction_t *tag_directions,
803                int *max_depth)
804 {
805   reg_errcode_t status = REG_OK;
806   int bottom = tre_stack_num_objects(stack);
807   int pos_add = 0;
808   int pos_add_total = 0;
809   int max_pos = 0;
810   /* Current approximate matching parameters. */
811   int params[TRE_PARAM_LAST];
812   /* Approximate parameter nesting level. */
813   int params_depth = 0;
814   int iter_depth = 0;
815   int i;
816
817   for (i = 0; i < TRE_PARAM_LAST; i++)
818     params[i] = TRE_PARAM_DEFAULT;
819
820   STACK_PUSHR(stack, voidptr, ast);
821   STACK_PUSHR(stack, int, EXPAND_RECURSE);
822   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
823     {
824       tre_ast_node_t *node;
825       tre_expand_ast_symbol_t symbol;
826
827       if (status != REG_OK)
828         break;
829
830       DPRINT(("pos_add %d\n", pos_add));
831
832       symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
833       node = tre_stack_pop_voidptr(stack);
834       switch (symbol)
835         {
836         case EXPAND_RECURSE:
837           switch (node->type)
838             {
839             case LITERAL:
840               {
841                 tre_literal_t *lit= node->obj;
842                 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
843                   {
844                     lit->position += pos_add;
845                     if (lit->position > max_pos)
846                       max_pos = lit->position;
847                   }
848                 break;
849               }
850             case UNION:
851               {
852                 tre_union_t *uni = node->obj;
853                 STACK_PUSHX(stack, voidptr, uni->right);
854                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
855                 STACK_PUSHX(stack, voidptr, uni->left);
856                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
857                 break;
858               }
859             case CATENATION:
860               {
861                 tre_catenation_t *cat = node->obj;
862                 STACK_PUSHX(stack, voidptr, cat->right);
863                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
864                 STACK_PUSHX(stack, voidptr, cat->left);
865                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
866                 break;
867               }
868             case ITERATION:
869               {
870                 tre_iteration_t *iter = node->obj;
871                 STACK_PUSHX(stack, int, pos_add);
872                 STACK_PUSHX(stack, voidptr, node);
873                 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
874                 STACK_PUSHX(stack, voidptr, iter->arg);
875                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
876                 /* If we are going to expand this node at EXPAND_AFTER_ITER
877                    then don't increase the `pos' fields of the nodes now, it
878                    will get done when expanding. */
879                 if (iter->min > 1 || iter->max > 1)
880                   pos_add = 0;
881                 iter_depth++;
882                 DPRINT(("iter\n"));
883                 break;
884               }
885             default:
886               assert(0);
887               break;
888             }
889           break;
890         case EXPAND_AFTER_ITER:
891           {
892             tre_iteration_t *iter = node->obj;
893             int pos_add_last;
894             pos_add = tre_stack_pop_int(stack);
895             pos_add_last = pos_add;
896             if (iter->min > 1 || iter->max > 1)
897               {
898                 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
899                 int j;
900                 int pos_add_save = pos_add;
901
902                 /* Create a catenated sequence of copies of the node. */
903                 for (j = 0; j < iter->min; j++)
904                   {
905                     tre_ast_node_t *copy;
906                     /* Remove tags from all but the last copy. */
907                     int flags = ((j + 1 < iter->min)
908                                  ? COPY_REMOVE_TAGS
909                                  : COPY_MAXIMIZE_FIRST_TAG);
910                     DPRINT(("  pos_add %d\n", pos_add));
911                     pos_add_save = pos_add;
912                     status = tre_copy_ast(mem, stack, iter->arg, flags,
913                                           &pos_add, tag_directions, &copy,
914                                           &max_pos);
915                     if (status != REG_OK)
916                       return status;
917                     if (seq1 != NULL)
918                       seq1 = tre_ast_new_catenation(mem, seq1, copy);
919                     else
920                       seq1 = copy;
921                     if (seq1 == NULL)
922                       return REG_ESPACE;
923                   }
924
925                 if (iter->max == -1)
926                   {
927                     /* No upper limit. */
928                     pos_add_save = pos_add;
929                     status = tre_copy_ast(mem, stack, iter->arg, 0,
930                                           &pos_add, NULL, &seq2, &max_pos);
931                     if (status != REG_OK)
932                       return status;
933                     seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
934                     if (seq2 == NULL)
935                       return REG_ESPACE;
936                   }
937                 else
938                   {
939                     for (j = iter->min; j < iter->max; j++)
940                       {
941                         tre_ast_node_t *tmp, *copy;
942                         pos_add_save = pos_add;
943                         status = tre_copy_ast(mem, stack, iter->arg, 0,
944                                               &pos_add, NULL, &copy, &max_pos);
945                         if (status != REG_OK)
946                           return status;
947                         if (seq2 != NULL)
948                           seq2 = tre_ast_new_catenation(mem, copy, seq2);
949                         else
950                           seq2 = copy;
951                         if (seq2 == NULL)
952                           return REG_ESPACE;
953                         tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
954                         if (tmp == NULL)
955                           return REG_ESPACE;
956                         seq2 = tre_ast_new_union(mem, tmp, seq2);
957                         if (seq2 == NULL)
958                           return REG_ESPACE;
959                       }
960                   }
961
962                 pos_add = pos_add_save;
963                 if (seq1 == NULL)
964                   seq1 = seq2;
965                 else if (seq2 != NULL)
966                   seq1 = tre_ast_new_catenation(mem, seq1, seq2);
967                 if (seq1 == NULL)
968                   return REG_ESPACE;
969                 node->obj = seq1->obj;
970                 node->type = seq1->type;
971               }
972
973             iter_depth--;
974             pos_add_total += pos_add - pos_add_last;
975             if (iter_depth == 0)
976               pos_add = pos_add_total;
977
978             /* If approximate parameters are specified, surround the result
979                with two parameter setting nodes.  The one on the left sets
980                the specified parameters, and the one on the right restores
981                the old parameters. */
982             if (iter->params)
983               {
984                 tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
985                 int *old_params;
986
987                 tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
988                 if (!tmp_l)
989                   return REG_ESPACE;
990                 ((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
991                 iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
992                 tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
993                 if (!tmp_r)
994                   return REG_ESPACE;
995                 old_params = tre_mem_alloc(mem, sizeof(*old_params)
996                                            * TRE_PARAM_LAST);
997                 if (!old_params)
998                   return REG_ESPACE;
999                 for (i = 0; i < TRE_PARAM_LAST; i++)
1000                   old_params[i] = params[i];
1001                 ((tre_literal_t *)tmp_r->obj)->u.params = old_params;
1002                 old_params[TRE_PARAM_DEPTH] = params_depth;
1003                 /* XXX - this is the only place where ast_new_node is
1004                    needed -- should be moved inside AST module. */
1005                 node_copy = tre_ast_new_node(mem, ITERATION,
1006                                              sizeof(tre_iteration_t));
1007                 if (!node_copy)
1008                   return REG_ESPACE;
1009                 node_copy->obj = node->obj;
1010                 tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
1011                 if (!tmp_node)
1012                   return REG_ESPACE;
1013                 tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
1014                 if (!tmp_node)
1015                   return REG_ESPACE;
1016                 /* Replace the contents of `node' with `tmp_node'. */
1017                 memcpy(node, tmp_node, sizeof(*node));
1018                 node->obj = tmp_node->obj;
1019                 node->type = tmp_node->type;
1020                 params_depth++;
1021                 if (params_depth > *max_depth)
1022                   *max_depth = params_depth;
1023               }
1024             break;
1025           }
1026         default:
1027           assert(0);
1028           break;
1029         }
1030     }
1031
1032   *position += pos_add_total;
1033
1034   /* `max_pos' should never be larger than `*position' if the above
1035      code works, but just an extra safeguard let's make sure
1036      `*position' is set large enough so enough memory will be
1037      allocated for the transition table. */
1038   if (max_pos > *position)
1039     *position = max_pos;
1040
1041 #ifdef TRE_DEBUG
1042   DPRINT(("Expanded AST:\n"));
1043   tre_ast_print(ast);
1044   DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
1045 #endif
1046
1047   return status;
1048 }
1049
1050 static tre_pos_and_tags_t *
1051 tre_set_empty(tre_mem_t mem)
1052 {
1053   tre_pos_and_tags_t *new_set;
1054
1055   new_set = tre_mem_calloc(mem, sizeof(*new_set));
1056   if (new_set == NULL)
1057     return NULL;
1058
1059   new_set[0].position = -1;
1060   new_set[0].code_min = -1;
1061   new_set[0].code_max = -1;
1062
1063   return new_set;
1064 }
1065
1066 static tre_pos_and_tags_t *
1067 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
1068             tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
1069 {
1070   tre_pos_and_tags_t *new_set;
1071
1072   new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
1073   if (new_set == NULL)
1074     return NULL;
1075
1076   new_set[0].position = position;
1077   new_set[0].code_min = code_min;
1078   new_set[0].code_max = code_max;
1079   new_set[0].class = class;
1080   new_set[0].neg_classes = neg_classes;
1081   new_set[0].backref = backref;
1082   new_set[1].position = -1;
1083   new_set[1].code_min = -1;
1084   new_set[1].code_max = -1;
1085
1086   return new_set;
1087 }
1088
1089 static tre_pos_and_tags_t *
1090 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
1091               int *tags, int assertions, int *params)
1092 {
1093   int s1, s2, i, j;
1094   tre_pos_and_tags_t *new_set;
1095   int *new_tags;
1096   int num_tags;
1097
1098   for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
1099   for (s1 = 0; set1[s1].position >= 0; s1++);
1100   for (s2 = 0; set2[s2].position >= 0; s2++);
1101   new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
1102   if (!new_set )
1103     return NULL;
1104
1105   for (s1 = 0; set1[s1].position >= 0; s1++)
1106     {
1107       new_set[s1].position = set1[s1].position;
1108       new_set[s1].code_min = set1[s1].code_min;
1109       new_set[s1].code_max = set1[s1].code_max;
1110       new_set[s1].assertions = set1[s1].assertions | assertions;
1111       new_set[s1].class = set1[s1].class;
1112       new_set[s1].neg_classes = set1[s1].neg_classes;
1113       new_set[s1].backref = set1[s1].backref;
1114       if (set1[s1].tags == NULL && tags == NULL)
1115         new_set[s1].tags = NULL;
1116       else
1117         {
1118           for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
1119           new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
1120                                          * (i + num_tags + 1)));
1121           if (new_tags == NULL)
1122             return NULL;
1123           for (j = 0; j < i; j++)
1124             new_tags[j] = set1[s1].tags[j];
1125           for (i = 0; i < num_tags; i++)
1126             new_tags[j + i] = tags[i];
1127           new_tags[j + i] = -1;
1128           new_set[s1].tags = new_tags;
1129         }
1130       if (set1[s1].params)
1131         new_set[s1].params = set1[s1].params;
1132       if (params)
1133         {
1134           if (!new_set[s1].params)
1135             new_set[s1].params = params;
1136           else
1137             {
1138               new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
1139                                                  TRE_PARAM_LAST);
1140               if (!new_set[s1].params)
1141                 return NULL;
1142               for (i = 0; i < TRE_PARAM_LAST; i++)
1143                 if (params[i] != TRE_PARAM_UNSET)
1144                   new_set[s1].params[i] = params[i];
1145             }
1146         }
1147     }
1148
1149   for (s2 = 0; set2[s2].position >= 0; s2++)
1150     {
1151       new_set[s1 + s2].position = set2[s2].position;
1152       new_set[s1 + s2].code_min = set2[s2].code_min;
1153       new_set[s1 + s2].code_max = set2[s2].code_max;
1154       /* XXX - why not | assertions here as well? */
1155       new_set[s1 + s2].assertions = set2[s2].assertions;
1156       new_set[s1 + s2].class = set2[s2].class;
1157       new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
1158       new_set[s1 + s2].backref = set2[s2].backref;
1159       if (set2[s2].tags == NULL)
1160         new_set[s1 + s2].tags = NULL;
1161       else
1162         {
1163           for (i = 0; set2[s2].tags[i] >= 0; i++);
1164           new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
1165           if (new_tags == NULL)
1166             return NULL;
1167           for (j = 0; j < i; j++)
1168             new_tags[j] = set2[s2].tags[j];
1169           new_tags[j] = -1;
1170           new_set[s1 + s2].tags = new_tags;
1171         }
1172       if (set2[s2].params)
1173         new_set[s1 + s2].params = set2[s2].params;
1174       if (params)
1175         {
1176           if (!new_set[s1 + s2].params)
1177             new_set[s1 + s2].params = params;
1178           else
1179             {
1180               new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
1181                                                       TRE_PARAM_LAST);
1182               if (!new_set[s1 + s2].params)
1183                 return NULL;
1184               for (i = 0; i < TRE_PARAM_LAST; i++)
1185                 if (params[i] != TRE_PARAM_UNSET)
1186                   new_set[s1 + s2].params[i] = params[i];
1187             }
1188         }
1189     }
1190   new_set[s1 + s2].position = -1;
1191   return new_set;
1192 }
1193
1194 /* Finds the empty path through `node' which is the one that should be
1195    taken according to POSIX.2 rules, and adds the tags on that path to
1196    `tags'.   `tags' may be NULL.  If `num_tags_seen' is not NULL, it is
1197    set to the number of tags seen on the path. */
1198 static reg_errcode_t
1199 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
1200                 int *assertions, int *params, int *num_tags_seen,
1201                 int *params_seen)
1202 {
1203   tre_literal_t *lit;
1204   tre_union_t *uni;
1205   tre_catenation_t *cat;
1206   tre_iteration_t *iter;
1207   int i;
1208   int bottom = tre_stack_num_objects(stack);
1209   reg_errcode_t status = REG_OK;
1210   if (num_tags_seen)
1211     *num_tags_seen = 0;
1212   if (params_seen)
1213     *params_seen = 0;
1214
1215   status = tre_stack_push_voidptr(stack, node);
1216
1217   /* Walk through the tree recursively. */
1218   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1219     {
1220       node = tre_stack_pop_voidptr(stack);
1221
1222       switch (node->type)
1223         {
1224         case LITERAL:
1225           lit = (tre_literal_t *)node->obj;
1226           switch (lit->code_min)
1227             {
1228             case TAG:
1229               if (lit->code_max >= 0)
1230                 {
1231                   if (tags != NULL)
1232                     {
1233                       /* Add the tag to `tags'. */
1234                       for (i = 0; tags[i] >= 0; i++)
1235                         if (tags[i] == lit->code_max)
1236                           break;
1237                       if (tags[i] < 0)
1238                         {
1239                           tags[i] = lit->code_max;
1240                           tags[i + 1] = -1;
1241                         }
1242                     }
1243                   if (num_tags_seen)
1244                     (*num_tags_seen)++;
1245                 }
1246               break;
1247             case ASSERTION:
1248               assert(lit->code_max >= 1
1249                      || lit->code_max <= ASSERT_LAST);
1250               if (assertions != NULL)
1251                 *assertions |= lit->code_max;
1252               break;
1253             case PARAMETER:
1254               if (params != NULL)
1255                 for (i = 0; i < TRE_PARAM_LAST; i++)
1256                   params[i] = lit->u.params[i];
1257               if (params_seen != NULL)
1258                 *params_seen = 1;
1259               break;
1260             case EMPTY:
1261               break;
1262             default:
1263               assert(0);
1264               break;
1265             }
1266           break;
1267
1268         case UNION:
1269           /* Subexpressions starting earlier take priority over ones
1270              starting later, so we prefer the left subexpression over the
1271              right subexpression. */
1272           uni = (tre_union_t *)node->obj;
1273           if (uni->left->nullable)
1274             STACK_PUSHX(stack, voidptr, uni->left)
1275           else if (uni->right->nullable)
1276             STACK_PUSHX(stack, voidptr, uni->right)
1277           else
1278             assert(0);
1279           break;
1280
1281         case CATENATION:
1282           /* The path must go through both children. */
1283           cat = (tre_catenation_t *)node->obj;
1284           assert(cat->left->nullable);
1285           assert(cat->right->nullable);
1286           STACK_PUSHX(stack, voidptr, cat->left);
1287           STACK_PUSHX(stack, voidptr, cat->right);
1288           break;
1289
1290         case ITERATION:
1291           /* A match with an empty string is preferred over no match at
1292              all, so we go through the argument if possible. */
1293           iter = (tre_iteration_t *)node->obj;
1294           if (iter->arg->nullable)
1295             STACK_PUSHX(stack, voidptr, iter->arg);
1296           break;
1297
1298         default:
1299           assert(0);
1300           break;
1301         }
1302     }
1303
1304   return status;
1305 }
1306
1307
1308 typedef enum {
1309   NFL_RECURSE,
1310   NFL_POST_UNION,
1311   NFL_POST_CATENATION,
1312   NFL_POST_ITERATION
1313 } tre_nfl_stack_symbol_t;
1314
1315
1316 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
1317    the nodes of the AST `tree'. */
1318 static reg_errcode_t
1319 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
1320 {
1321   int bottom = tre_stack_num_objects(stack);
1322
1323   STACK_PUSHR(stack, voidptr, tree);
1324   STACK_PUSHR(stack, int, NFL_RECURSE);
1325
1326   while (tre_stack_num_objects(stack) > bottom)
1327     {
1328       tre_nfl_stack_symbol_t symbol;
1329       tre_ast_node_t *node;
1330
1331       symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
1332       node = tre_stack_pop_voidptr(stack);
1333       switch (symbol)
1334         {
1335         case NFL_RECURSE:
1336           switch (node->type)
1337             {
1338             case LITERAL:
1339               {
1340                 tre_literal_t *lit = (tre_literal_t *)node->obj;
1341                 if (IS_BACKREF(lit))
1342                   {
1343                     /* Back references: nullable = false, firstpos = {i},
1344                        lastpos = {i}. */
1345                     node->nullable = 0;
1346                     node->firstpos = tre_set_one(mem, lit->position, 0,
1347                                              TRE_CHAR_MAX, 0, NULL, -1);
1348                     if (!node->firstpos)
1349                       return REG_ESPACE;
1350                     node->lastpos = tre_set_one(mem, lit->position, 0,
1351                                                 TRE_CHAR_MAX, 0, NULL,
1352                                                 (int)lit->code_max);
1353                     if (!node->lastpos)
1354                       return REG_ESPACE;
1355                   }
1356                 else if (lit->code_min < 0)
1357                   {
1358                     /* Tags, empty strings, params, and zero width assertions:
1359                        nullable = true, firstpos = {}, and lastpos = {}. */
1360                     node->nullable = 1;
1361                     node->firstpos = tre_set_empty(mem);
1362                     if (!node->firstpos)
1363                       return REG_ESPACE;
1364                     node->lastpos = tre_set_empty(mem);
1365                     if (!node->lastpos)
1366                       return REG_ESPACE;
1367                   }
1368                 else
1369                   {
1370                     /* Literal at position i: nullable = false, firstpos = {i},
1371                        lastpos = {i}. */
1372                     node->nullable = 0;
1373                     node->firstpos =
1374                       tre_set_one(mem, lit->position, (int)lit->code_min,
1375                                   (int)lit->code_max, 0, NULL, -1);
1376                     if (!node->firstpos)
1377                       return REG_ESPACE;
1378                     node->lastpos = tre_set_one(mem, lit->position,
1379                                                 (int)lit->code_min,
1380                                                 (int)lit->code_max,
1381                                                 lit->u.class, lit->neg_classes,
1382                                                 -1);
1383                     if (!node->lastpos)
1384                       return REG_ESPACE;
1385                   }
1386                 break;
1387               }
1388
1389             case UNION:
1390               /* Compute the attributes for the two subtrees, and after that
1391                  for this node. */
1392               STACK_PUSHR(stack, voidptr, node);
1393               STACK_PUSHR(stack, int, NFL_POST_UNION);
1394               STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
1395               STACK_PUSHR(stack, int, NFL_RECURSE);
1396               STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
1397               STACK_PUSHR(stack, int, NFL_RECURSE);
1398               break;
1399
1400             case CATENATION:
1401               /* Compute the attributes for the two subtrees, and after that
1402                  for this node. */
1403               STACK_PUSHR(stack, voidptr, node);
1404               STACK_PUSHR(stack, int, NFL_POST_CATENATION);
1405               STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
1406               STACK_PUSHR(stack, int, NFL_RECURSE);
1407               STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
1408               STACK_PUSHR(stack, int, NFL_RECURSE);
1409               break;
1410
1411             case ITERATION:
1412               /* Compute the attributes for the subtree, and after that for
1413                  this node. */
1414               STACK_PUSHR(stack, voidptr, node);
1415               STACK_PUSHR(stack, int, NFL_POST_ITERATION);
1416               STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
1417               STACK_PUSHR(stack, int, NFL_RECURSE);
1418               break;
1419             }
1420           break; /* end case: NFL_RECURSE */
1421
1422         case NFL_POST_UNION:
1423           {
1424             tre_union_t *uni = (tre_union_t *)node->obj;
1425             node->nullable = uni->left->nullable || uni->right->nullable;
1426             node->firstpos = tre_set_union(mem, uni->left->firstpos,
1427                                            uni->right->firstpos, NULL, 0, NULL);
1428             if (!node->firstpos)
1429               return REG_ESPACE;
1430             node->lastpos = tre_set_union(mem, uni->left->lastpos,
1431                                           uni->right->lastpos, NULL, 0, NULL);
1432             if (!node->lastpos)
1433               return REG_ESPACE;
1434             break;
1435           }
1436
1437         case NFL_POST_ITERATION:
1438           {
1439             tre_iteration_t *iter = (tre_iteration_t *)node->obj;
1440
1441             if (iter->min == 0 || iter->arg->nullable)
1442               node->nullable = 1;
1443             else
1444               node->nullable = 0;
1445             node->firstpos = iter->arg->firstpos;
1446             node->lastpos = iter->arg->lastpos;
1447             break;
1448           }
1449
1450         case NFL_POST_CATENATION:
1451           {
1452             int num_tags, *tags, assertions, params_seen;
1453             int *params;
1454             reg_errcode_t status;
1455             tre_catenation_t *cat = node->obj;
1456             node->nullable = cat->left->nullable && cat->right->nullable;
1457
1458             /* Compute firstpos. */
1459             if (cat->left->nullable)
1460               {
1461                 /* The left side matches the empty string.  Make a first pass
1462                    with tre_match_empty() to get the number of tags and
1463                    parameters. */
1464                 status = tre_match_empty(stack, cat->left,
1465                                          NULL, NULL, NULL, &num_tags,
1466                                          &params_seen);
1467                 if (status != REG_OK)
1468                   return status;
1469                 /* Allocate arrays for the tags and parameters. */
1470                 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
1471                 if (!tags)
1472                   return REG_ESPACE;
1473                 tags[0] = -1;
1474                 assertions = 0;
1475                 params = NULL;
1476                 if (params_seen)
1477                   {
1478                     params = tre_mem_alloc(mem, sizeof(*params)
1479                                            * TRE_PARAM_LAST);
1480                     if (!params)
1481                       {
1482                         xfree(tags);
1483                         return REG_ESPACE;
1484                       }
1485                   }
1486                 /* Second pass with tre_mach_empty() to get the list of
1487                    tags and parameters. */
1488                 status = tre_match_empty(stack, cat->left, tags,
1489                                          &assertions, params, NULL, NULL);
1490                 if (status != REG_OK)
1491                   {
1492                     xfree(tags);
1493                     return status;
1494                   }
1495                 node->firstpos =
1496                   tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
1497                                 tags, assertions, params);
1498                 xfree(tags);
1499                 if (!node->firstpos)
1500                   return REG_ESPACE;
1501               }
1502             else
1503               {
1504                 node->firstpos = cat->left->firstpos;
1505               }
1506
1507             /* Compute lastpos. */
1508             if (cat->right->nullable)
1509               {
1510                 /* The right side matches the empty string.  Make a first pass
1511                    with tre_match_empty() to get the number of tags and
1512                    parameters. */
1513                 status = tre_match_empty(stack, cat->right,
1514                                          NULL, NULL, NULL, &num_tags,
1515                                          &params_seen);
1516                 if (status != REG_OK)
1517                   return status;
1518                 /* Allocate arrays for the tags and parameters. */
1519                 tags = xmalloc(sizeof(int) * (num_tags + 1));
1520                 if (!tags)
1521                   return REG_ESPACE;
1522                 tags[0] = -1;
1523                 assertions = 0;
1524                 params = NULL;
1525                 if (params_seen)
1526                   {
1527                     params = tre_mem_alloc(mem, sizeof(*params)
1528                                            * TRE_PARAM_LAST);
1529                     if (!params)
1530                       {
1531                         xfree(tags);
1532                         return REG_ESPACE;
1533                       }
1534                   }
1535                 /* Second pass with tre_mach_empty() to get the list of
1536                    tags and parameters. */
1537                 status = tre_match_empty(stack, cat->right, tags,
1538                                          &assertions, params, NULL, NULL);
1539                 if (status != REG_OK)
1540                   {
1541                     xfree(tags);
1542                     return status;
1543                   }
1544                 node->lastpos =
1545                   tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
1546                                 tags, assertions, params);
1547                 xfree(tags);
1548                 if (!node->lastpos)
1549                   return REG_ESPACE;
1550               }
1551             else
1552               {
1553                 node->lastpos = cat->right->lastpos;
1554               }
1555             break;
1556           }
1557
1558         default:
1559           assert(0);
1560           break;
1561         }
1562     }
1563
1564   return REG_OK;
1565 }
1566
1567
1568 /* Adds a transition from each position in `p1' to each position in `p2'. */
1569 static reg_errcode_t
1570 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
1571                tre_tnfa_transition_t *transitions,
1572                int *counts, int *offs)
1573 {
1574   tre_pos_and_tags_t *orig_p2 = p2;
1575   tre_tnfa_transition_t *trans;
1576   int i, j, k, l, dup, prev_p2_pos;
1577
1578   if (transitions != NULL)
1579     while (p1->position >= 0)
1580       {
1581         p2 = orig_p2;
1582         prev_p2_pos = -1;
1583         while (p2->position >= 0)
1584           {
1585             /* Optimization: if this position was already handled, skip it. */
1586             if (p2->position == prev_p2_pos)
1587               {
1588                 p2++;
1589                 continue;
1590               }
1591             prev_p2_pos = p2->position;
1592             /* Set `trans' to point to the next unused transition from
1593                position `p1->position'. */
1594             trans = transitions + offs[p1->position];
1595             while (trans->state != NULL)
1596               {
1597 #if 0
1598                 /* If we find a previous transition from `p1->position' to
1599                    `p2->position', it is overwritten.  This can happen only
1600                    if there are nested loops in the regexp, like in "((a)*)*".
1601                    In POSIX.2 repetition using the outer loop is always
1602                    preferred over using the inner loop.  Therefore the
1603                    transition for the inner loop is useless and can be thrown
1604                    away. */
1605                 /* XXX - The same position is used for all nodes in a bracket
1606                    expression, so this optimization cannot be used (it will
1607                    break bracket expressions) unless I figure out a way to
1608                    detect it here. */
1609                 if (trans->state_id == p2->position)
1610                   {
1611                     DPRINT(("*"));
1612                     break;
1613                   }
1614 #endif
1615                 trans++;
1616               }
1617
1618             if (trans->state == NULL)
1619               (trans + 1)->state = NULL;
1620             /* Use the character ranges, assertions, etc. from `p1' for
1621                the transition from `p1' to `p2'. */
1622             trans->code_min = p1->code_min;
1623             trans->code_max = p1->code_max;
1624             trans->state = transitions + offs[p2->position];
1625             trans->state_id = p2->position;
1626             trans->assertions = p1->assertions | p2->assertions
1627               | (p1->class ? ASSERT_CHAR_CLASS : 0)
1628               | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
1629             if (p1->backref >= 0)
1630               {
1631                 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
1632                 assert(p2->backref < 0);
1633                 trans->u.backref = p1->backref;
1634                 trans->assertions |= ASSERT_BACKREF;
1635               }
1636             else
1637               trans->u.class = p1->class;
1638             if (p1->neg_classes != NULL)
1639               {
1640                 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
1641                 trans->neg_classes =
1642                   xmalloc(sizeof(*trans->neg_classes) * (i + 1));
1643                 if (trans->neg_classes == NULL)
1644                   return REG_ESPACE;
1645                 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
1646                   trans->neg_classes[i] = p1->neg_classes[i];
1647                 trans->neg_classes[i] = (tre_ctype_t)0;
1648               }
1649             else
1650               trans->neg_classes = NULL;
1651
1652             /* Find out how many tags this transition has. */
1653             i = 0;
1654             if (p1->tags != NULL)
1655               while(p1->tags[i] >= 0)
1656                 i++;
1657             j = 0;
1658             if (p2->tags != NULL)
1659               while(p2->tags[j] >= 0)
1660                 j++;
1661
1662             /* If we are overwriting a transition, free the old tag array. */
1663             if (trans->tags != NULL)
1664               xfree(trans->tags);
1665             trans->tags = NULL;
1666
1667             /* If there were any tags, allocate an array and fill it. */
1668             if (i + j > 0)
1669               {
1670                 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
1671                 if (!trans->tags)
1672                   return REG_ESPACE;
1673                 i = 0;
1674                 if (p1->tags != NULL)
1675                   while(p1->tags[i] >= 0)
1676                     {
1677                       trans->tags[i] = p1->tags[i];
1678                       i++;
1679                     }
1680                 l = i;
1681                 j = 0;
1682                 if (p2->tags != NULL)
1683                   while (p2->tags[j] >= 0)
1684                     {
1685                       /* Don't add duplicates. */
1686                       dup = 0;
1687                       for (k = 0; k < i; k++)
1688                         if (trans->tags[k] == p2->tags[j])
1689                           {
1690                             dup = 1;
1691                             break;
1692                           }
1693                       if (!dup)
1694                         trans->tags[l++] = p2->tags[j];
1695                       j++;
1696                     }
1697                 trans->tags[l] = -1;
1698               }
1699
1700             /* Set the parameter array.  If both `p2' and `p1' have same
1701                parameters, the values in `p2' override those in `p1'. */
1702             if (p1->params || p2->params)
1703               {
1704                 if (!trans->params)
1705                   trans->params = xmalloc(sizeof(*trans->params)
1706                                           * TRE_PARAM_LAST);
1707                 if (!trans->params)
1708                   return REG_ESPACE;
1709                 for (i = 0; i < TRE_PARAM_LAST; i++)
1710                   {
1711                     trans->params[i] = TRE_PARAM_UNSET;
1712                     if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
1713                       trans->params[i] = p1->params[i];
1714                     if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
1715                       trans->params[i] = p2->params[i];
1716                   }
1717               }
1718             else
1719               {
1720                 if (trans->params)
1721                   xfree(trans->params);
1722                 trans->params = NULL;
1723               }
1724
1725
1726 #ifdef TRE_DEBUG
1727             {
1728               int *tags;
1729
1730               DPRINT(("  %2d -> %2d on %3d", p1->position, p2->position,
1731                       p1->code_min));
1732               if (p1->code_max != p1->code_min)
1733                 DPRINT(("-%3d", p1->code_max));
1734               tags = trans->tags;
1735               if (tags)
1736                 {
1737                   DPRINT((", tags ["));
1738                   while (*tags >= 0)
1739                     {
1740                       DPRINT(("%d", *tags));
1741                       tags++;
1742                       if (*tags >= 0)
1743                         DPRINT((","));
1744                     }
1745                   DPRINT(("]"));
1746                 }
1747               if (trans->assertions)
1748                 DPRINT((", assert %d", trans->assertions));
1749               if (trans->assertions & ASSERT_BACKREF)
1750                 DPRINT((", backref %d", trans->u.backref));
1751               else if (trans->u.class)
1752                 DPRINT((", class %ld", (long)trans->u.class));
1753               if (trans->neg_classes)
1754                 DPRINT((", neg_classes %p", trans->neg_classes));
1755               if (trans->params)
1756                 {
1757                   DPRINT((", "));
1758                   tre_print_params(trans->params);
1759                 }
1760               DPRINT(("\n"));
1761             }
1762 #endif /* TRE_DEBUG */
1763             p2++;
1764           }
1765         p1++;
1766       }
1767   else
1768     /* Compute a maximum limit for the number of transitions leaving
1769        from each state. */
1770     while (p1->position >= 0)
1771       {
1772         p2 = orig_p2;
1773         while (p2->position >= 0)
1774           {
1775             counts[p1->position]++;
1776             p2++;
1777           }
1778         p1++;
1779       }
1780   return REG_OK;
1781 }
1782
1783 /* Converts the syntax tree to a TNFA.  All the transitions in the TNFA are
1784    labelled with one character range (there are no transitions on empty
1785    strings).  The TNFA takes O(n^2) space in the worst case, `n' is size of
1786    the regexp. */
1787 static reg_errcode_t
1788 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
1789                 int *counts, int *offs)
1790 {
1791   tre_union_t *uni;
1792   tre_catenation_t *cat;
1793   tre_iteration_t *iter;
1794   reg_errcode_t errcode = REG_OK;
1795
1796   /* XXX - recurse using a stack!. */
1797   switch (node->type)
1798     {
1799     case LITERAL:
1800       break;
1801     case UNION:
1802       uni = (tre_union_t *)node->obj;
1803       errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
1804       if (errcode != REG_OK)
1805         return errcode;
1806       errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
1807       break;
1808
1809     case CATENATION:
1810       cat = (tre_catenation_t *)node->obj;
1811       /* Add a transition from each position in cat->left->lastpos
1812          to each position in cat->right->firstpos. */
1813       errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
1814                                transitions, counts, offs);
1815       if (errcode != REG_OK)
1816         return errcode;
1817       errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
1818       if (errcode != REG_OK)
1819         return errcode;
1820       errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
1821       break;
1822
1823     case ITERATION:
1824       iter = (tre_iteration_t *)node->obj;
1825       assert(iter->max == -1 || iter->max == 1);
1826
1827       if (iter->max == -1)
1828         {
1829           assert(iter->min == 0 || iter->min == 1);
1830           /* Add a transition from each last position in the iterated
1831              expression to each first position. */
1832           errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
1833                                    transitions, counts, offs);
1834           if (errcode != REG_OK)
1835             return errcode;
1836         }
1837       errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
1838       break;
1839     }
1840   return errcode;
1841 }
1842
1843
1844 #define ERROR_EXIT(err)           \
1845   do                              \
1846     {                             \
1847       errcode = err;              \
1848       if (/*CONSTCOND*/1)         \
1849         goto error_exit;          \
1850     }                             \
1851  while (/*CONSTCOND*/0)
1852
1853
1854 int
1855 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
1856 {
1857   tre_stack_t *stack;
1858   tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
1859   tre_pos_and_tags_t *p;
1860   int *counts = NULL, *offs = NULL;
1861   int i, add = 0;
1862   tre_tnfa_transition_t *transitions, *initial;
1863   tre_tnfa_t *tnfa = NULL;
1864   tre_submatch_data_t *submatch_data;
1865   tre_tag_direction_t *tag_directions = NULL;
1866   reg_errcode_t errcode;
1867   tre_mem_t mem;
1868
1869   /* Parse context. */
1870   tre_parse_ctx_t parse_ctx;
1871
1872   /* Allocate a stack used throughout the compilation process for various
1873      purposes. */
1874   stack = tre_stack_new(512, 10240, 128);
1875   if (!stack)
1876     return REG_ESPACE;
1877   /* Allocate a fast memory allocator. */
1878   mem = tre_mem_new();
1879   if (!mem)
1880     {
1881       tre_stack_destroy(stack);
1882       return REG_ESPACE;
1883     }
1884
1885   /* Parse the regexp. */
1886   memset(&parse_ctx, 0, sizeof(parse_ctx));
1887   parse_ctx.mem = mem;
1888   parse_ctx.stack = stack;
1889   parse_ctx.re = regex;
1890   parse_ctx.len = n;
1891   parse_ctx.cflags = cflags;
1892   parse_ctx.max_backref = -1;
1893   DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
1894   errcode = tre_parse(&parse_ctx);
1895   if (errcode != REG_OK)
1896     ERROR_EXIT(errcode);
1897   preg->re_nsub = parse_ctx.submatch_id - 1;
1898   tree = parse_ctx.result;
1899
1900   /* Back references and approximate matching cannot currently be used
1901      in the same regexp. */
1902   if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
1903     ERROR_EXIT(REG_BADPAT);
1904
1905 #ifdef TRE_DEBUG
1906   tre_ast_print(tree);
1907 #endif /* TRE_DEBUG */
1908
1909   /* Referring to nonexistent subexpressions is illegal. */
1910   if (parse_ctx.max_backref > (int)preg->re_nsub)
1911     ERROR_EXIT(REG_ESUBREG);
1912
1913   /* Allocate the TNFA struct. */
1914   tnfa = xcalloc(1, sizeof(tre_tnfa_t));
1915   if (tnfa == NULL)
1916     ERROR_EXIT(REG_ESPACE);
1917   tnfa->have_backrefs = parse_ctx.max_backref >= 0;
1918   tnfa->have_approx = parse_ctx.have_approx;
1919   tnfa->num_submatches = parse_ctx.submatch_id;
1920
1921   /* Set up tags for submatch addressing.  If REG_NOSUB is set and the
1922      regexp does not have back references, this can be skipped. */
1923   if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
1924     {
1925       DPRINT(("tre_compile: setting up tags\n"));
1926
1927       /* Figure out how many tags we will need. */
1928       errcode = tre_add_tags(NULL, stack, tree, tnfa);
1929       if (errcode != REG_OK)
1930         ERROR_EXIT(errcode);
1931 #ifdef TRE_DEBUG
1932       tre_ast_print(tree);
1933 #endif /* TRE_DEBUG */
1934
1935       if (tnfa->num_tags > 0)
1936         {
1937           tag_directions = xmalloc(sizeof(*tag_directions)
1938                                    * (tnfa->num_tags + 1));
1939           if (tag_directions == NULL)
1940             ERROR_EXIT(REG_ESPACE);
1941           tnfa->tag_directions = tag_directions;
1942           memset(tag_directions, -1,
1943                  sizeof(*tag_directions) * (tnfa->num_tags + 1));
1944         }
1945       tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
1946                                    sizeof(tnfa->minimal_tags));
1947       if (tnfa->minimal_tags == NULL)
1948         ERROR_EXIT(REG_ESPACE);
1949
1950       submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
1951                               sizeof(*submatch_data));
1952       if (submatch_data == NULL)
1953         ERROR_EXIT(REG_ESPACE);
1954       tnfa->submatch_data = submatch_data;
1955
1956       errcode = tre_add_tags(mem, stack, tree, tnfa);
1957       if (errcode != REG_OK)
1958         ERROR_EXIT(errcode);
1959
1960 #ifdef TRE_DEBUG
1961       for (i = 0; i < parse_ctx.submatch_id; i++)
1962         DPRINT(("pmatch[%d] = {t%d, t%d}\n",
1963                 i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
1964       for (i = 0; i < tnfa->num_tags; i++)
1965         DPRINT(("t%d is %s\n", i,
1966                 tag_directions[i] == TRE_TAG_MINIMIZE ?
1967                 "minimized" : "maximized"));
1968 #endif /* TRE_DEBUG */
1969     }
1970
1971   /* Expand iteration nodes. */
1972   errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
1973                            tag_directions, &tnfa->params_depth);
1974   if (errcode != REG_OK)
1975     ERROR_EXIT(errcode);
1976
1977   /* Add a dummy node for the final state.
1978      XXX - For certain patterns this dummy node can be optimized away,
1979            for example "a*" or "ab*".   Figure out a simple way to detect
1980            this possibility. */
1981   tmp_ast_l = tree;
1982   tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
1983   if (tmp_ast_r == NULL)
1984     ERROR_EXIT(REG_ESPACE);
1985
1986   tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
1987   if (tree == NULL)
1988     ERROR_EXIT(REG_ESPACE);
1989
1990 #ifdef TRE_DEBUG
1991   tre_ast_print(tree);
1992   DPRINT(("Number of states: %d\n", parse_ctx.position));
1993 #endif /* TRE_DEBUG */
1994
1995   errcode = tre_compute_nfl(mem, stack, tree);
1996   if (errcode != REG_OK)
1997     ERROR_EXIT(errcode);
1998
1999   counts = xmalloc(sizeof(int) * parse_ctx.position);
2000   if (counts == NULL)
2001     ERROR_EXIT(REG_ESPACE);
2002
2003   offs = xmalloc(sizeof(int) * parse_ctx.position);
2004   if (offs == NULL)
2005     ERROR_EXIT(REG_ESPACE);
2006
2007   for (i = 0; i < parse_ctx.position; i++)
2008     counts[i] = 0;
2009   tre_ast_to_tnfa(tree, NULL, counts, NULL);
2010
2011   add = 0;
2012   for (i = 0; i < parse_ctx.position; i++)
2013     {
2014       offs[i] = add;
2015       add += counts[i] + 1;
2016       counts[i] = 0;
2017     }
2018   transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2019   if (transitions == NULL)
2020     ERROR_EXIT(REG_ESPACE);
2021   tnfa->transitions = transitions;
2022   tnfa->num_transitions = add;
2023
2024   DPRINT(("Converting to TNFA:\n"));
2025   errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2026   if (errcode != REG_OK)
2027     ERROR_EXIT(errcode);
2028
2029   /* If in eight bit mode, compute a table of characters that can be the
2030      first character of a match. */
2031   tnfa->first_char = -1;
2032   if (TRE_MB_CUR_MAX == 1 && !tmp_ast_l->nullable)
2033     {
2034       int count = 0;
2035       tre_cint_t k;
2036       DPRINT(("Characters that can start a match:"));
2037       tnfa->firstpos_chars = xcalloc(256, sizeof(char));
2038       if (tnfa->firstpos_chars == NULL)
2039         ERROR_EXIT(REG_ESPACE);
2040       for (p = tree->firstpos; p->position >= 0; p++)
2041         {
2042           tre_tnfa_transition_t *j = transitions + offs[p->position];
2043           while (j->state != NULL)
2044             {
2045               for (k = j->code_min; k <= j->code_max && k < 256; k++)
2046                 {
2047                   DPRINT((" %d", k));
2048                   tnfa->firstpos_chars[k] = 1;
2049                   count++;
2050                 }
2051               j++;
2052             }
2053         }
2054       DPRINT(("\n"));
2055 #define TRE_OPTIMIZE_FIRST_CHAR 1
2056 #if TRE_OPTIMIZE_FIRST_CHAR
2057       if (count == 1)
2058         {
2059           for (k = 0; k < 256; k++)
2060             if (tnfa->firstpos_chars[k])
2061               {
2062                 DPRINT(("first char must be %d\n", k));
2063                 tnfa->first_char = k;
2064                 xfree(tnfa->firstpos_chars);
2065                 tnfa->firstpos_chars = NULL;
2066                 break;
2067               }
2068         }
2069 #endif
2070
2071     }
2072   else
2073     tnfa->firstpos_chars = NULL;
2074
2075
2076   p = tree->firstpos;
2077   i = 0;
2078   while (p->position >= 0)
2079     {
2080       i++;
2081
2082 #ifdef TRE_DEBUG
2083       {
2084         int *tags;
2085         DPRINT(("initial: %d", p->position));
2086         tags = p->tags;
2087         if (tags != NULL)
2088           {
2089             if (*tags >= 0)
2090               DPRINT(("/"));
2091             while (*tags >= 0)
2092               {
2093                 DPRINT(("%d", *tags));
2094                 tags++;
2095                 if (*tags >= 0)
2096                   DPRINT((","));
2097               }
2098           }
2099         DPRINT((", assert %d", p->assertions));
2100         if (p->params)
2101           {
2102             DPRINT((", "));
2103             tre_print_params(p->params);
2104           }
2105         DPRINT(("\n"));
2106       }
2107 #endif /* TRE_DEBUG */
2108
2109       p++;
2110     }
2111
2112   initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2113   if (initial == NULL)
2114     ERROR_EXIT(REG_ESPACE);
2115   tnfa->initial = initial;
2116
2117   i = 0;
2118   for (p = tree->firstpos; p->position >= 0; p++)
2119     {
2120       initial[i].state = transitions + offs[p->position];
2121       initial[i].state_id = p->position;
2122       initial[i].tags = NULL;
2123       /* Copy the arrays p->tags, and p->params, they are allocated
2124          from a tre_mem object. */
2125       if (p->tags)
2126         {
2127           int j;
2128           for (j = 0; p->tags[j] >= 0; j++);
2129           initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2130           if (!initial[i].tags)
2131             ERROR_EXIT(REG_ESPACE);
2132           memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2133         }
2134       initial[i].params = NULL;
2135       if (p->params)
2136         {
2137           initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
2138           if (!initial[i].params)
2139             ERROR_EXIT(REG_ESPACE);
2140           memcpy(initial[i].params, p->params,
2141                  sizeof(*p->params) * TRE_PARAM_LAST);
2142         }
2143       initial[i].assertions = p->assertions;
2144       i++;
2145     }
2146   initial[i].state = NULL;
2147
2148   tnfa->num_transitions = add;
2149   tnfa->final = transitions + offs[tree->lastpos[0].position];
2150   tnfa->num_states = parse_ctx.position;
2151   tnfa->cflags = cflags;
2152
2153   DPRINT(("final state %p\n", (void *)tnfa->final));
2154
2155   tre_mem_destroy(mem);
2156   tre_stack_destroy(stack);
2157   xfree(counts);
2158   xfree(offs);
2159
2160   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2161   return REG_OK;
2162
2163  error_exit:
2164   /* Free everything that was allocated and return the error code. */
2165   tre_mem_destroy(mem);
2166   if (stack != NULL)
2167     tre_stack_destroy(stack);
2168   if (counts != NULL)
2169     xfree(counts);
2170   if (offs != NULL)
2171     xfree(offs);
2172   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2173   tre_free(preg);
2174   return errcode;
2175 }
2176
2177
2178
2179
2180 void
2181 tre_free(regex_t *preg)
2182 {
2183   tre_tnfa_t *tnfa;
2184   unsigned int i;
2185   tre_tnfa_transition_t *trans;
2186
2187   tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2188   if (!tnfa)
2189     return;
2190
2191   for (i = 0; i < tnfa->num_transitions; i++)
2192     if (tnfa->transitions[i].state)
2193       {
2194         if (tnfa->transitions[i].tags)
2195           xfree(tnfa->transitions[i].tags);
2196         if (tnfa->transitions[i].neg_classes)
2197           xfree(tnfa->transitions[i].neg_classes);
2198         if (tnfa->transitions[i].params)
2199           xfree(tnfa->transitions[i].params);
2200       }
2201   if (tnfa->transitions)
2202     xfree(tnfa->transitions);
2203
2204   if (tnfa->initial)
2205     {
2206       for (trans = tnfa->initial; trans->state; trans++)
2207         {
2208           if (trans->tags)
2209             xfree(trans->tags);
2210           if (trans->params)
2211             xfree(trans->params);
2212         }
2213       xfree(tnfa->initial);
2214     }
2215
2216   if (tnfa->submatch_data)
2217     {
2218       for (i = 0; i < tnfa->num_submatches; i++)
2219         if (tnfa->submatch_data[i].parents)
2220           xfree(tnfa->submatch_data[i].parents);
2221       xfree(tnfa->submatch_data);
2222     }
2223
2224   if (tnfa->tag_directions)
2225     xfree(tnfa->tag_directions);
2226   if (tnfa->firstpos_chars)
2227     xfree(tnfa->firstpos_chars);
2228   if (tnfa->minimal_tags)
2229     xfree(tnfa->minimal_tags);
2230   xfree(tnfa);
2231 }
2232
2233 char *
2234 tre_version(void)
2235 {
2236   static char str[256];
2237   char *version;
2238
2239   if (str[0] == 0)
2240     {
2241       (void) tre_config(TRE_CONFIG_VERSION, &version);
2242       (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
2243     }
2244   return str;
2245 }
2246
2247 int
2248 tre_config(int query, void *result)
2249 {
2250   int *int_result = result;
2251   const char **string_result = result;
2252
2253   switch (query)
2254     {
2255     case TRE_CONFIG_APPROX:
2256 #ifdef TRE_APPROX
2257       *int_result = 1;
2258 #else /* !TRE_APPROX */
2259       *int_result = 0;
2260 #endif /* !TRE_APPROX */
2261       return REG_OK;
2262
2263     case TRE_CONFIG_WCHAR:
2264 #ifdef TRE_WCHAR
2265       *int_result = 1;
2266 #else /* !TRE_WCHAR */
2267       *int_result = 0;
2268 #endif /* !TRE_WCHAR */
2269       return REG_OK;
2270
2271     case TRE_CONFIG_MULTIBYTE:
2272 #ifdef TRE_MULTIBYTE
2273       *int_result = 1;
2274 #else /* !TRE_MULTIBYTE */
2275       *int_result = 0;
2276 #endif /* !TRE_MULTIBYTE */
2277       return REG_OK;
2278
2279     case TRE_CONFIG_SYSTEM_ABI:
2280 #ifdef TRE_CONFIG_SYSTEM_ABI
2281       *int_result = 1;
2282 #else /* !TRE_CONFIG_SYSTEM_ABI */
2283       *int_result = 0;
2284 #endif /* !TRE_CONFIG_SYSTEM_ABI */
2285       return REG_OK;
2286
2287     case TRE_CONFIG_VERSION:
2288       *string_result = TRE_VERSION;
2289       return REG_OK;
2290     }
2291
2292   return REG_NOMATCH;
2293 }
2294
2295
2296 /* EOF */