Import TRE regex library v0.8.0 to vendor branch gitea/vendor/TRE origin/vendor/TRE
authorJohn Marino <draco@marino.st>
Thu, 6 Aug 2015 22:04:13 +0000 (00:04 +0200)
committerJohn Marino <draco@marino.st>
Thu, 6 Aug 2015 22:04:13 +0000 (00:04 +0200)
21 files changed:
contrib/tre/LICENSE [new file with mode: 0644]
contrib/tre/README [new file with mode: 0644]
contrib/tre/lib/README [new file with mode: 0644]
contrib/tre/lib/regcomp.c [new file with mode: 0644]
contrib/tre/lib/regerror.c [new file with mode: 0644]
contrib/tre/lib/regexec.c [new file with mode: 0644]
contrib/tre/lib/tre-ast.c [new file with mode: 0644]
contrib/tre/lib/tre-ast.h [new file with mode: 0644]
contrib/tre/lib/tre-compile.c [new file with mode: 0644]
contrib/tre/lib/tre-compile.h [new file with mode: 0644]
contrib/tre/lib/tre-internal.h [new file with mode: 0644]
contrib/tre/lib/tre-match-backtrack.c [new file with mode: 0644]
contrib/tre/lib/tre-match-parallel.c [new file with mode: 0644]
contrib/tre/lib/tre-match-utils.h [new file with mode: 0644]
contrib/tre/lib/tre-mem.c [new file with mode: 0644]
contrib/tre/lib/tre-mem.h [new file with mode: 0644]
contrib/tre/lib/tre-parse.c [new file with mode: 0644]
contrib/tre/lib/tre-parse.h [new file with mode: 0644]
contrib/tre/lib/tre-stack.c [new file with mode: 0644]
contrib/tre/lib/tre-stack.h [new file with mode: 0644]
contrib/tre/lib/xmalloc.h [new file with mode: 0644]

diff --git a/contrib/tre/LICENSE b/contrib/tre/LICENSE
new file mode 100644 (file)
index 0000000..76ea75f
--- /dev/null
@@ -0,0 +1,29 @@
+This is the license, copyright notice, and disclaimer for TRE, a regex
+matching package (library and tools) with support for approximate
+matching.
+
+Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+  1. Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+  2. Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/contrib/tre/README b/contrib/tre/README
new file mode 100644 (file)
index 0000000..cff7639
--- /dev/null
@@ -0,0 +1,235 @@
+Introduction
+
+   TRE is a lightweight, robust, and efficient POSIX compliant regexp
+   matching library with some exciting features such as approximate
+   (fuzzy) matching.
+
+   The matching algorithm used in TRE uses linear worst-case time in
+   the length of the text being searched, and quadratic worst-case
+   time in the length of the used regular expression. In other words,
+   the time complexity of the algorithm is O(M^2N), where M is the
+   length of the regular expression and N is the length of the
+   text. The used space is also quadratic on the length of the regex,
+   but does not depend on the searched string. This quadratic
+   behaviour occurs only on pathological cases which are probably very
+   rare in practice.
+
+Features
+
+   TRE is not just yet another regexp matcher. TRE has some features
+   which are not there in most free POSIX compatible
+   implementations. Most of these features are not present in non-free
+   implementations either, for that matter.
+
+Approximate matching
+
+   Approximate pattern matching allows matches to be approximate, that
+   is, allows the matches to be close to the searched pattern under
+   some measure of closeness. TRE uses the edit-distance measure (also
+   known as the Levenshtein distance) where characters can be
+   inserted, deleted, or substituted in the searched text in order to
+   get an exact match. Each insertion, deletion, or substitution adds
+   the distance, or cost, of the match. TRE can report the matches
+   which have a cost lower than some given threshold value. TRE can
+   also be used to search for matches with the lowest cost.
+
+   TRE includes a version of the agrep (approximate grep) command line
+   tool for approximate regexp matching in the style of grep. Unlike
+   other agrep implementations (like the one by Sun Wu and Udi Manber
+   from University of Arizona available here) TRE agrep allows full
+   regexps of any length, any number of errors, and non-uniform costs
+   for insertion, deletion and substitution.
+
+Strict standard conformance
+
+   POSIX defines the behaviour of regexp functions precisely. TRE
+   attempts to conform to these specifications as strictly as
+   possible. TRE always returns the correct matches for subpatterns,
+   for example. Very few other implementations do this correctly. In
+   fact, the only other implementations besides TRE that I am aware of
+   (free or not) that get it right are Rx by Tom Lord, Regex++ by John
+   Maddock, and the AT&T ast regex by Glenn Fowler and Doug McIlroy.
+
+   The standard TRE tries to conform to is the IEEE Std 1003.1-2001,
+   or Open Group Base Specifications Issue 6, commonly referred to as
+   "POSIX".  It can be found online here. The relevant parts are the
+   base specifications on regular expressions (and the rationale) and
+   the description of the regcomp() API.
+
+   For an excellent survey on POSIX regexp matchers, see the testregex
+   pages by Glenn Fowler of AT&T Labs Research.
+
+Predictable matching speed
+
+   Because of the matching algorithm used in TRE, the maximum time
+   consumed by any regexec() call is always directly proportional to
+   the length of the searched string. There is one exception: if back
+   references are used, the matching may take time that grows
+   exponentially with the length of the string. This is because
+   matching back references is an NP complete problem, and almost
+   certainly requires exponential time to match in the worst case.
+
+Predictable and modest memory consumption
+
+   A regexec() call never allocates memory from the heap. TRE
+   allocates all the memory it needs during a regcomp() call, and some
+   temporary working space from the stack frame for the duration of
+   the regexec() call. The amount of temporary space needed is
+   constant during matching and does not depend on the searched
+   string. For regexps of reasonable size TRE needs less than 50K of
+   dynamically allocated memory during the regcomp() call, less than
+   20K for the compiled pattern buffer, and less than two kilobytes of
+   temporary working space from the stack frame during a regexec()
+   call. There is no time/memory tradeoff. TRE is also small in code
+   size; statically linking with TRE increases the executable size
+   less than 30K (gcc-3.2, x86, GNU/Linux).
+
+Wide character and multibyte character set support
+
+   TRE supports multibyte character sets. This makes it possible to
+   use regexps seamlessly with, for example, Japanese locales. TRE
+   also provides a wide character API.
+
+Binary pattern and data support
+
+   TRE provides APIs which allow binary zero characters both in
+   regexps and searched strings. The standard API cannot be easily
+   used to, for example, search for printable words from binary data
+   (although it is possible with some hacking). Searching for patterns
+   which contain binary zeroes embedded is not possible at all with
+   the standard API.
+
+Completely thread safe
+
+   TRE is completely thread safe. All the exported functions are
+   re-entrant, and a single compiled regexp object can be used
+   simultaneously in multiple contexts; e.g. in main() and a signal
+   handler, or in many threads of a multithreaded application.
+
+Portable
+
+   TRE is portable across multiple platforms. Here's a table of
+   platforms and compilers that have been successfully used to compile
+   and run TRE:
+
+      Platform(s)                       | Compiler(s)
+      ----------------------------------+------------
+      AIX 4.3.2 - 5.3.0                 | GCC, C for AIX compiler version 5
+      Compaq Tru64 UNIX V5.1A/B         | Compaq C V6.4-014 - V6.5-011
+      Cygwin 1.3 - 1.5                  | GCC
+      Digital UNIX V4.0                 | DEC C V5.9-005
+      FreeBSD 4 and above               | GCC
+      GNU/Linux systems on x86, x86_64, | GCC
+      ppc64, s390                      |
+      HP-UX 10.20- 11.00                | GCC, HP C Compiler
+      IRIX 6.5                          | GCC, MIPSpro Compilers 7.3.1.3m
+      Max OS X                         |
+      NetBSD 1.5 and above              | GCC, egcs
+      OpenBSD 3.3 and above             | GCC
+      Solaris 2.7-10 sparc/x86          | GCC, Sun Workshop 6 compilers
+      Windows 98 - XP                   | Microsoft Visual C++ 6.0
+
+   TRE 0.7.5 should compile without changes on all of the above
+   platforms.  Tell me if you are using TRE on a platform that is not
+   listed above, and I'll add it to the list. Also let me know if TRE
+   does not work on a listed platform.
+
+   Depending on the platform, you may need to install libutf8 to get
+   wide character and multibyte character set support.
+
+ Free
+
+   TRE is released under a license which is essentially the same as
+   the "2 clause" BSD-style license used in NetBSD.  See the file
+   LICENSE for details.
+
+Roadmap
+
+   There are currently two features, both related to collating
+   elements, missing from 100% POSIX compliance. These are:
+
+     * Support for collating elements (e.g. [[.<X>.]], where <X> is a
+       collating element). It is not possible to support
+       multi-character collating elements portably, since POSIX does
+       not define a way to determine whether a character sequence is a
+       multi-character collating element or not.
+
+     * Support for equivalence classes, for example [[=<X>=]], where
+       <X> is a collating element. An equivalence class matches any
+       character which has the same primary collation weight as
+       <X>. Again, POSIX provides no portable mechanism for
+       determining the primary collation weight of a collating
+       element.
+
+   Note that other portable regexp implementations don't support
+   collating elements either. The single exception is Regex++, which
+   comes with its own database for collating elements for different
+   locales. Support for collating elements and equivalence classes has
+   not been widely requested and is not very high on the TODO list at
+   the moment.
+
+   These are other features I'm planning to implement real soon now:
+
+     * All the missing GNU extensions enabled in GNU regex, such as
+       [[:<:]] and [[:>:]]
+
+     * A REG_SHORTEST regexec() flag for returning the shortest match
+       instead of the longest match.
+
+     * Perl-compatible syntax
+
+            [:^class:]
+               Matches anything but the characters in class. Note that
+               [^[:class:]] works already, this would be just a
+               convenience shorthand.
+
+            \A
+               Match only at beginning of string
+
+            \Z
+               Match only at end of string, or before newline at the end
+
+            \z
+               Match only at end of string
+
+            \l
+               Lowercase next char (think vi)
+
+            \u
+               Uppercase next char (think vi)
+
+            \L
+               Lowercase till \E (think vi)
+
+            \U
+               Uppercase till \E (think vi)
+
+            (?=pattern)
+               Zero-width positive look-ahead assertions.
+
+            (?!pattern)
+               Zero-width negative look-ahead assertions.
+
+            (?<=pattern)
+               Zero-width positive look-behind assertions.
+
+            (?<!pattern)
+               Zero-width negative look-behind assertions.
+
+   Documentation especially for the nonstandard features of TRE, such
+   as approximate matching, is a work in progress (with "progress"
+   loosely defined...)
+
+   Mailing lists
+
+   tre-general@lists.laurikari.net
+      This list is for any discussion on the TRE software, including
+      reporting bugs, feature requests, requests for help, and other
+      things.
+
+   tre-announce@lists.laurikari.net
+      Subscribe to this list to get announcements of new releases of
+      TRE.  Alternatively, you can subscribe to the freshmeat.net
+      project and get similar announcements that way, if you prefer.
+
+Ville Laurikari    <vl@iki.fi>
diff --git a/contrib/tre/lib/README b/contrib/tre/lib/README
new file mode 100644 (file)
index 0000000..f4d4021
--- /dev/null
@@ -0,0 +1,69 @@
+This code is structured roughly as follows:
+
+xmalloc.c:
+  - Wrappers for the malloc() functions, for error generation and
+    memory leak checking purposes.
+
+tre-mem.c:
+  - A simple and efficient memory allocator.
+
+tre-stack.c:
+  - Implements a simple stack data structure.
+
+tre-ast.c:
+  - Abstract syntax tree (AST) definitions.
+
+tre-parse.c:
+  - Regexp parser.  Parses a POSIX regexp (with TRE extensions) into
+    an abstract syntax tree (AST).
+
+tre-compile.c:
+  - Compiles ASTs to ready-to-use regex objects.  Comprised of two parts:
+    * Routine to convert an AST to a tagged AST.  A tagged AST has
+      appropriate minimized or maximized tags added to keep track of
+      submatches.
+    * Routine to convert tagged ASTs to tagged nondeterministic state
+      machines (TNFAs) without epsilon transitions (transitions on
+      empty strings).
+
+tre-match-parallel.c:
+  - Parallel TNFA matcher.
+    * The matcher basically takes a string and a TNFA and finds the
+      leftmost longest match and submatches in one pass over the input
+      string.  Only the beginning of the input string is scanned until
+      a leftmost match and longest match is found.
+    * The matcher cannot handle back references, but the worst case
+      time consumption is O(l) where l is the length of the input
+      string.  The space consumption is constant.
+
+tre-match-backtrack.c:
+  - A traditional backtracking matcher.
+    * Like the parallel matcher, takes a string and a TNFA and finds
+      the leftmost longest match and submatches.  Portions of the
+      input string may (and usually are) scanned multiple times.
+    * Can handle back references.  The worst case time consumption,
+      however, is O(k^l) where k is some constant and l is the length
+      of the input string.  The worst case space consumption is O(l).
+
+tre-match-approx.c:
+  - Approximate parallel TNFA matcher.
+    * Finds the leftmost and longest match and submatches in one pass
+      over the input string.  The match may contain errors.  Each
+      missing, substituted, or extra character in the match increases
+      the cost of the match.  A maximum cost for the returned match
+      can be given.  The cost of the found match is returned.
+    * Cannot handle back references.  The space and time consumption
+      bounds are the same as for the parallel exact matcher, but
+      in general this matcher is slower than the exact matcher.
+
+regcomp.c:
+  - Implementation of the regcomp() family of functions as simple
+    wrappers for tre_compile().
+
+regexec.c:
+  - Implementation of the regexec() family of functions.
+    * The appropriate matcher is dispatched according to the
+      features used in the compiled regex object.
+
+regerror.c:
+  - Implements the regerror() function.
diff --git a/contrib/tre/lib/regcomp.c b/contrib/tre/lib/regcomp.c
new file mode 100644 (file)
index 0000000..281b38e
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+  tre_regcomp.c - TRE POSIX compatible regex compilation functions.
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+#include "tre.h"
+#include "tre-internal.h"
+#include "xmalloc.h"
+
+int
+tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags)
+{
+  int ret;
+#if TRE_WCHAR
+  tre_char_t *wregex;
+  size_t wlen;
+
+  wregex = xmalloc(sizeof(tre_char_t) * (n + 1));
+  if (wregex == NULL)
+    return REG_ESPACE;
+
+  /* If the current locale uses the standard single byte encoding of
+     characters, we don't do a multibyte string conversion.  If we did,
+     many applications which use the default locale would break since
+     the default "C" locale uses the 7-bit ASCII character set, and
+     all characters with the eighth bit set would be considered invalid. */
+#if TRE_MULTIBYTE
+  if (TRE_MB_CUR_MAX == 1)
+#endif /* TRE_MULTIBYTE */
+    {
+      unsigned int i;
+      const unsigned char *str = (const unsigned char *)regex;
+      tre_char_t *wstr = wregex;
+
+      for (i = 0; i < n; i++)
+       *(wstr++) = *(str++);
+      wlen = n;
+    }
+#if TRE_MULTIBYTE
+  else
+    {
+      int consumed;
+      tre_char_t *wcptr = wregex;
+#ifdef HAVE_MBSTATE_T
+      mbstate_t state;
+      memset(&state, '\0', sizeof(state));
+#endif /* HAVE_MBSTATE_T */
+      while (n > 0)
+       {
+         consumed = tre_mbrtowc(wcptr, regex, n, &state);
+
+         switch (consumed)
+           {
+           case 0:
+             if (*regex == '\0')
+               consumed = 1;
+             else
+               {
+                 xfree(wregex);
+                 return REG_BADPAT;
+               }
+             break;
+           case -1:
+             DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
+             xfree(wregex);
+             return REG_BADPAT;
+           case -2:
+             /* The last character wasn't complete.  Let's not call it a
+                fatal error. */
+             consumed = n;
+             break;
+           }
+         regex += consumed;
+         n -= consumed;
+         wcptr++;
+       }
+      wlen = wcptr - wregex;
+    }
+#endif /* TRE_MULTIBYTE */
+
+  wregex[wlen] = L'\0';
+  ret = tre_compile(preg, wregex, wlen, cflags);
+  xfree(wregex);
+#else /* !TRE_WCHAR */
+  ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags);
+#endif /* !TRE_WCHAR */
+
+  return ret;
+}
+
+int
+tre_regcomp(regex_t *preg, const char *regex, int cflags)
+{
+  return tre_regncomp(preg, regex, regex ? strlen(regex) : 0, cflags);
+}
+
+
+#ifdef TRE_WCHAR
+int
+tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t n, int cflags)
+{
+  return tre_compile(preg, regex, n, cflags);
+}
+
+int
+tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags)
+{
+  return tre_compile(preg, regex, regex ? wcslen(regex) : 0, cflags);
+}
+#endif /* TRE_WCHAR */
+
+void
+tre_regfree(regex_t *preg)
+{
+  tre_free(preg);
+}
+
+/* EOF */
diff --git a/contrib/tre/lib/regerror.c b/contrib/tre/lib/regerror.c
new file mode 100644 (file)
index 0000000..701f701
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+  tre_regerror.c - POSIX tre_regerror() implementation for TRE.
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <string.h>
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif /* HAVE_WCTYPE_H */
+
+#include "tre-internal.h"
+#include "tre.h"
+
+#ifdef HAVE_GETTEXT
+#include <libintl.h>
+#else
+#define dgettext(p, s) s
+#define gettext(s) s
+#endif
+
+#define _(String) dgettext(PACKAGE, String)
+#define gettext_noop(String) String
+
+/* Error message strings for error codes listed in `tre.h'.  This list
+   needs to be in sync with the codes listed there, naturally. */
+static const char *tre_error_messages[] =
+  { gettext_noop("No error"),                           /* REG_OK */
+    gettext_noop("No match"),                           /* REG_NOMATCH */
+    gettext_noop("Invalid regexp"),                     /* REG_BADPAT */
+    gettext_noop("Unknown collating element"),          /* REG_ECOLLATE */
+    gettext_noop("Unknown character class name"),       /* REG_ECTYPE */
+    gettext_noop("Trailing backslash"),                         /* REG_EESCAPE */
+    gettext_noop("Invalid back reference"),             /* REG_ESUBREG */
+    gettext_noop("Missing ']'"),                        /* REG_EBRACK */
+    gettext_noop("Missing ')'"),                        /* REG_EPAREN */
+    gettext_noop("Missing '}'"),                        /* REG_EBRACE */
+    gettext_noop("Invalid contents of {}"),             /* REG_BADBR */
+    gettext_noop("Invalid character range"),            /* REG_ERANGE */
+    gettext_noop("Out of memory"),                      /* REG_ESPACE */
+    gettext_noop("Invalid use of repetition operators")         /* REG_BADRPT */
+  };
+
+size_t
+tre_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
+{
+  const char *err;
+  size_t err_len;
+
+  /*LINTED*/(void)&preg;
+  if (errcode >= 0
+      && errcode < (int)(sizeof(tre_error_messages)
+                        / sizeof(*tre_error_messages)))
+    err = gettext(tre_error_messages[errcode]);
+  else
+    err = gettext("Unknown error");
+
+  err_len = strlen(err) + 1;
+  if (errbuf_size > 0 && errbuf != NULL)
+    {
+      if (err_len > errbuf_size)
+       {
+         strncpy(errbuf, err, errbuf_size - 1);
+         errbuf[errbuf_size - 1] = '\0';
+       }
+      else
+       {
+         strcpy(errbuf, err);
+       }
+    }
+  return err_len;
+}
+
+/* EOF */
diff --git a/contrib/tre/lib/regexec.c b/contrib/tre/lib/regexec.c
new file mode 100644 (file)
index 0000000..54779ef
--- /dev/null
@@ -0,0 +1,348 @@
+/*
+  tre_regexec.c - TRE POSIX compatible matching functions (and more).
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#ifdef TRE_USE_ALLOCA
+/* AIX requires this to be the first thing in the file.         */
+#ifndef __GNUC__
+# if HAVE_ALLOCA_H
+#  include <alloca.h>
+# else
+#  ifdef _AIX
+ #pragma alloca
+#  else
+#   ifndef alloca /* predefined by HP cc +Olibcalls */
+char *alloca ();
+#   endif
+#  endif
+# endif
+#endif
+#endif /* TRE_USE_ALLOCA */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif /* HAVE_WCTYPE_H */
+#ifndef TRE_WCHAR
+#include <ctype.h>
+#endif /* !TRE_WCHAR */
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif /* HAVE_MALLOC_H */
+#include <limits.h>
+
+#include "tre-internal.h"
+#include "tre.h"
+#include "xmalloc.h"
+
+
+/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
+   endpoint values. */
+void
+tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
+               const tre_tnfa_t *tnfa, int *tags, int match_eo)
+{
+  tre_submatch_data_t *submatch_data;
+  unsigned int i, j;
+  int *parents;
+
+  i = 0;
+  if (match_eo >= 0 && !(cflags & REG_NOSUB))
+    {
+      /* Construct submatch offsets from the tags. */
+      DPRINT(("end tag = t%d = %d\n", tnfa->end_tag, match_eo));
+      submatch_data = tnfa->submatch_data;
+      while (i < tnfa->num_submatches && i < nmatch)
+       {
+         if (submatch_data[i].so_tag == tnfa->end_tag)
+           pmatch[i].rm_so = match_eo;
+         else
+           pmatch[i].rm_so = tags[submatch_data[i].so_tag];
+
+         if (submatch_data[i].eo_tag == tnfa->end_tag)
+           pmatch[i].rm_eo = match_eo;
+         else
+           pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
+
+         /* If either of the endpoints were not used, this submatch
+            was not part of the match. */
+         if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
+           pmatch[i].rm_so = pmatch[i].rm_eo = -1;
+
+         DPRINT(("pmatch[%d] = {t%d = %d, t%d = %d}\n", i,
+                 submatch_data[i].so_tag, pmatch[i].rm_so,
+                 submatch_data[i].eo_tag, pmatch[i].rm_eo));
+         i++;
+       }
+      /* Reset all submatches that are not within all of their parent
+        submatches. */
+      i = 0;
+      while (i < tnfa->num_submatches && i < nmatch)
+       {
+         if (pmatch[i].rm_eo == -1)
+           assert(pmatch[i].rm_so == -1);
+         assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
+
+         parents = submatch_data[i].parents;
+         if (parents != NULL)
+           for (j = 0; parents[j] >= 0; j++)
+             {
+               DPRINT(("pmatch[%d] parent %d\n", i, parents[j]));
+               if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
+                   || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
+                 pmatch[i].rm_so = pmatch[i].rm_eo = -1;
+             }
+         i++;
+       }
+    }
+
+  while (i < nmatch)
+    {
+      pmatch[i].rm_so = -1;
+      pmatch[i].rm_eo = -1;
+      i++;
+    }
+}
+
+
+/*
+  Wrapper functions for POSIX compatible regexp matching.
+*/
+
+int
+tre_have_backrefs(const regex_t *preg)
+{
+  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+  return tnfa->have_backrefs;
+}
+
+int
+tre_have_approx(const regex_t *preg)
+{
+  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+  return tnfa->have_approx;
+}
+
+static int
+tre_match(const tre_tnfa_t *tnfa, const void *string, size_t len,
+         tre_str_type_t type, size_t nmatch, regmatch_t pmatch[],
+         int eflags)
+{
+  reg_errcode_t status;
+  int *tags = NULL, eo;
+  if (tnfa->num_tags > 0 && nmatch > 0)
+    {
+#ifdef TRE_USE_ALLOCA
+      tags = alloca(sizeof(*tags) * tnfa->num_tags);
+#else /* !TRE_USE_ALLOCA */
+      tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
+#endif /* !TRE_USE_ALLOCA */
+      if (tags == NULL)
+       return REG_ESPACE;
+    }
+
+  /* Dispatch to the appropriate matcher. */
+  if (tnfa->have_backrefs || eflags & REG_BACKTRACKING_MATCHER)
+    {
+      /* The regex has back references, use the backtracking matcher. */
+      if (type == STR_USER)
+       {
+         const tre_str_source *source = string;
+         if (source->rewind == NULL || source->compare == NULL)
+           /* The backtracking matcher requires rewind and compare
+              capabilities from the input stream. */
+           return REG_BADPAT;
+       }
+      status = tre_tnfa_run_backtrack(tnfa, string, (int)len, type,
+                                     tags, eflags, &eo);
+    }
+#ifdef TRE_APPROX
+  else if (tnfa->have_approx || eflags & REG_APPROX_MATCHER)
+    {
+      /* The regex uses approximate matching, use the approximate matcher. */
+      regamatch_t match;
+      regaparams_t params;
+      tre_regaparams_default(&params);
+      params.max_err = 0;
+      params.max_cost = 0;
+      status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags,
+                                  &match, params, eflags, &eo);
+    }
+#endif /* TRE_APPROX */
+  else
+    {
+      /* Exact matching, no back references, use the parallel matcher. */
+      status = tre_tnfa_run_parallel(tnfa, string, (int)len, type,
+                                    tags, eflags, &eo);
+    }
+
+  if (status == REG_OK)
+    /* A match was found, so fill the submatch registers. */
+    tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
+#ifndef TRE_USE_ALLOCA
+  if (tags)
+    xfree(tags);
+#endif /* !TRE_USE_ALLOCA */
+  return status;
+}
+
+int
+tre_regnexec(const regex_t *preg, const char *str, size_t len,
+        size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+  tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;
+
+  return tre_match(tnfa, str, len, type, nmatch, pmatch, eflags);
+}
+
+int
+tre_regexec(const regex_t *preg, const char *str,
+       size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+  return tre_regnexec(preg, str, (unsigned)-1, nmatch, pmatch, eflags);
+}
+
+
+#ifdef TRE_WCHAR
+
+int
+tre_regwnexec(const regex_t *preg, const wchar_t *str, size_t len,
+         size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+  return tre_match(tnfa, str, len, STR_WIDE, nmatch, pmatch, eflags);
+}
+
+int
+tre_regwexec(const regex_t *preg, const wchar_t *str,
+        size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+  return tre_regwnexec(preg, str, (unsigned)-1, nmatch, pmatch, eflags);
+}
+
+#endif /* TRE_WCHAR */
+
+int
+tre_reguexec(const regex_t *preg, const tre_str_source *str,
+        size_t nmatch, regmatch_t pmatch[], int eflags)
+{
+  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+  return tre_match(tnfa, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags);
+}
+
+
+#ifdef TRE_APPROX
+
+/*
+  Wrapper functions for approximate regexp matching.
+*/
+
+static int
+tre_match_approx(const tre_tnfa_t *tnfa, const void *string, size_t len,
+                tre_str_type_t type, regamatch_t *match, regaparams_t params,
+                int eflags)
+{
+  reg_errcode_t status;
+  int *tags = NULL, eo;
+
+  /* If the regexp does not use approximate matching features, the
+     maximum cost is zero, and the approximate matcher isn't forced,
+     use the exact matcher instead. */
+  if (params.max_cost == 0 && !tnfa->have_approx
+      && !(eflags & REG_APPROX_MATCHER))
+    return tre_match(tnfa, string, len, type, match->nmatch, match->pmatch,
+                    eflags);
+
+  /* Back references are not supported by the approximate matcher. */
+  if (tnfa->have_backrefs)
+    return REG_BADPAT;
+
+  if (tnfa->num_tags > 0 && match->nmatch > 0)
+    {
+#if TRE_USE_ALLOCA
+      tags = alloca(sizeof(*tags) * tnfa->num_tags);
+#else /* !TRE_USE_ALLOCA */
+      tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
+#endif /* !TRE_USE_ALLOCA */
+      if (tags == NULL)
+       return REG_ESPACE;
+    }
+  status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags,
+                              match, params, eflags, &eo);
+  if (status == REG_OK)
+    tre_fill_pmatch(match->nmatch, match->pmatch, tnfa->cflags, tnfa, tags, eo);
+#ifndef TRE_USE_ALLOCA
+  if (tags)
+    xfree(tags);
+#endif /* !TRE_USE_ALLOCA */
+  return status;
+}
+
+int
+tre_reganexec(const regex_t *preg, const char *str, size_t len,
+         regamatch_t *match, regaparams_t params, int eflags)
+{
+  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+  tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS;
+
+  return tre_match_approx(tnfa, str, len, type, match, params, eflags);
+}
+
+int
+tre_regaexec(const regex_t *preg, const char *str,
+        regamatch_t *match, regaparams_t params, int eflags)
+{
+  return tre_reganexec(preg, str, (unsigned)-1, match, params, eflags);
+}
+
+#ifdef TRE_WCHAR
+
+int
+tre_regawnexec(const regex_t *preg, const wchar_t *str, size_t len,
+          regamatch_t *match, regaparams_t params, int eflags)
+{
+  tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+  return tre_match_approx(tnfa, str, len, STR_WIDE,
+                         match, params, eflags);
+}
+
+int
+tre_regawexec(const regex_t *preg, const wchar_t *str,
+         regamatch_t *match, regaparams_t params, int eflags)
+{
+  return tre_regawnexec(preg, str, (unsigned)-1, match, params, eflags);
+}
+
+#endif /* TRE_WCHAR */
+
+void
+tre_regaparams_default(regaparams_t *params)
+{
+  memset(params, 0, sizeof(*params));
+  params->cost_ins = 1;
+  params->cost_del = 1;
+  params->cost_subst = 1;
+  params->max_cost = INT_MAX;
+  params->max_ins = INT_MAX;
+  params->max_del = INT_MAX;
+  params->max_subst = INT_MAX;
+  params->max_err = INT_MAX;
+}
+
+#endif /* TRE_APPROX */
+
+/* EOF */
diff --git a/contrib/tre/lib/tre-ast.c b/contrib/tre/lib/tre-ast.c
new file mode 100644 (file)
index 0000000..acb387a
--- /dev/null
@@ -0,0 +1,226 @@
+/*
+  tre-ast.c - Abstract syntax tree (AST) routines
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+#include <assert.h>
+
+#include "tre-ast.h"
+#include "tre-mem.h"
+
+tre_ast_node_t *
+tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
+{
+  tre_ast_node_t *node;
+
+  node = tre_mem_calloc(mem, sizeof(*node));
+  if (!node)
+    return NULL;
+  node->obj = tre_mem_calloc(mem, size);
+  if (!node->obj)
+    return NULL;
+  node->type = type;
+  node->nullable = -1;
+  node->submatch_id = -1;
+
+  return node;
+}
+
+tre_ast_node_t *
+tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
+{
+  tre_ast_node_t *node;
+  tre_literal_t *lit;
+
+  node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t));
+  if (!node)
+    return NULL;
+  lit = node->obj;
+  lit->code_min = code_min;
+  lit->code_max = code_max;
+  lit->position = position;
+
+  return node;
+}
+
+tre_ast_node_t *
+tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
+                int minimal)
+{
+  tre_ast_node_t *node;
+  tre_iteration_t *iter;
+
+  node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t));
+  if (!node)
+    return NULL;
+  iter = node->obj;
+  iter->arg = arg;
+  iter->min = min;
+  iter->max = max;
+  iter->minimal = minimal;
+  node->num_submatches = arg->num_submatches;
+
+  return node;
+}
+
+tre_ast_node_t *
+tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
+{
+  tre_ast_node_t *node;
+
+  node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t));
+  if (node == NULL)
+    return NULL;
+  ((tre_union_t *)node->obj)->left = left;
+  ((tre_union_t *)node->obj)->right = right;
+  node->num_submatches = left->num_submatches + right->num_submatches;
+
+  return node;
+}
+
+tre_ast_node_t *
+tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
+                      tre_ast_node_t *right)
+{
+  tre_ast_node_t *node;
+
+  node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t));
+  if (node == NULL)
+    return NULL;
+  ((tre_catenation_t *)node->obj)->left = left;
+  ((tre_catenation_t *)node->obj)->right = right;
+  node->num_submatches = left->num_submatches + right->num_submatches;
+
+  return node;
+}
+
+#ifdef TRE_DEBUG
+
+static void
+tre_findent(FILE *stream, int i)
+{
+  while (i-- > 0)
+    fputc(' ', stream);
+}
+
+void
+tre_print_params(int *params)
+{
+  int i;
+  if (params)
+    {
+      DPRINT(("params ["));
+      for (i = 0; i < TRE_PARAM_LAST; i++)
+       {
+         if (params[i] == TRE_PARAM_UNSET)
+           DPRINT(("unset"));
+         else if (params[i] == TRE_PARAM_DEFAULT)
+           DPRINT(("default"));
+         else
+           DPRINT(("%d", params[i]));
+         if (i < TRE_PARAM_LAST - 1)
+           DPRINT((", "));
+       }
+      DPRINT(("]"));
+    }
+}
+
+static void
+tre_do_print(FILE *stream, tre_ast_node_t *ast, int indent)
+{
+  int code_min, code_max, pos;
+  int num_tags = ast->num_tags;
+  tre_literal_t *lit;
+  tre_iteration_t *iter;
+
+  tre_findent(stream, indent);
+  switch (ast->type)
+    {
+    case LITERAL:
+      lit = ast->obj;
+      code_min = lit->code_min;
+      code_max = lit->code_max;
+      pos = lit->position;
+      if (IS_EMPTY(lit))
+       {
+         fprintf(stream, "literal empty\n");
+       }
+      else if (IS_ASSERTION(lit))
+       {
+         int i;
+         char *assertions[] = { "bol", "eol", "ctype", "!ctype",
+                                "bow", "eow", "wb", "!wb" };
+         if (code_max >= ASSERT_LAST << 1)
+           assert(0);
+         fprintf(stream, "assertions: ");
+         for (i = 0; (1 << i) <= ASSERT_LAST; i++)
+           if (code_max & (1 << i))
+             fprintf(stream, "%s ", assertions[i]);
+         fprintf(stream, "\n");
+       }
+      else if (IS_TAG(lit))
+       {
+         fprintf(stream, "tag %d\n", code_max);
+       }
+      else if (IS_BACKREF(lit))
+       {
+         fprintf(stream, "backref %d, pos %d\n", code_max, pos);
+       }
+      else if (IS_PARAMETER(lit))
+       {
+         tre_print_params(lit->u.params);
+         fprintf(stream, "\n");
+       }
+      else
+       {
+         fprintf(stream, "literal (%c, %c) (%d, %d), pos %d, sub %d, "
+                 "%d tags\n", code_min, code_max, code_min, code_max, pos,
+                 ast->submatch_id, num_tags);
+       }
+      break;
+    case ITERATION:
+      iter = ast->obj;
+      fprintf(stream, "iteration {%d, %d}, sub %d, %d tags, %s\n",
+             iter->min, iter->max, ast->submatch_id, num_tags,
+             iter->minimal ? "minimal" : "greedy");
+      tre_do_print(stream, iter->arg, indent + 2);
+      break;
+    case UNION:
+      fprintf(stream, "union, sub %d, %d tags\n", ast->submatch_id, num_tags);
+      tre_do_print(stream, ((tre_union_t *)ast->obj)->left, indent + 2);
+      tre_do_print(stream, ((tre_union_t *)ast->obj)->right, indent + 2);
+      break;
+    case CATENATION:
+      fprintf(stream, "catenation, sub %d, %d tags\n", ast->submatch_id,
+             num_tags);
+      tre_do_print(stream, ((tre_catenation_t *)ast->obj)->left, indent + 2);
+      tre_do_print(stream, ((tre_catenation_t *)ast->obj)->right, indent + 2);
+      break;
+    default:
+      assert(0);
+      break;
+    }
+}
+
+static void
+tre_ast_fprint(FILE *stream, tre_ast_node_t *ast)
+{
+  tre_do_print(stream, ast, 0);
+}
+
+void
+tre_ast_print(tre_ast_node_t *tree)
+{
+  printf("AST:\n");
+  tre_ast_fprint(stdout, tree);
+}
+
+#endif /* TRE_DEBUG */
+
+/* EOF */
diff --git a/contrib/tre/lib/tre-ast.h b/contrib/tre/lib/tre-ast.h
new file mode 100644 (file)
index 0000000..d9af376
--- /dev/null
@@ -0,0 +1,128 @@
+/*
+  tre-ast.h - Abstract syntax tree (AST) definitions
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+
+#ifndef TRE_AST_H
+#define TRE_AST_H 1
+
+#include "tre-mem.h"
+#include "tre-internal.h"
+#include "tre-compile.h"
+
+/* The different AST node types. */
+typedef enum {
+  LITERAL,
+  CATENATION,
+  ITERATION,
+  UNION
+} tre_ast_type_t;
+
+/* Special subtypes of TRE_LITERAL. */
+#define EMPTY    -1   /* Empty leaf (denotes empty string). */
+#define ASSERTION -2   /* Assertion leaf. */
+#define TAG      -3   /* Tag leaf. */
+#define BACKREF          -4   /* Back reference leaf. */
+#define PARAMETER -5   /* Parameter. */
+
+#define IS_SPECIAL(x)  ((x)->code_min < 0)
+#define IS_EMPTY(x)    ((x)->code_min == EMPTY)
+#define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
+#define IS_TAG(x)      ((x)->code_min == TAG)
+#define IS_BACKREF(x)  ((x)->code_min == BACKREF)
+#define IS_PARAMETER(x) ((x)->code_min == PARAMETER)
+
+
+/* A generic AST node.  All AST nodes consist of this node on the top
+   level with `obj' pointing to the actual content. */
+typedef struct {
+  tre_ast_type_t type;   /* Type of the node. */
+  void *obj;             /* Pointer to actual node. */
+  int nullable;
+  int submatch_id;
+  int num_submatches;
+  int num_tags;
+  tre_pos_and_tags_t *firstpos;
+  tre_pos_and_tags_t *lastpos;
+} tre_ast_node_t;
+
+
+/* A "literal" node.  These are created for assertions, back references,
+   tags, matching parameter settings, and all expressions that match one
+   character. */
+typedef struct {
+  long code_min;
+  long code_max;
+  int position;
+  union {
+    tre_ctype_t class;
+    int *params;
+  } u;
+  tre_ctype_t *neg_classes;
+} tre_literal_t;
+
+/* A "catenation" node.         These are created when two regexps are concatenated.
+   If there are more than one subexpressions in sequence, the `left' part
+   holds all but the last, and `right' part holds the last subexpression
+   (catenation is left associative). */
+typedef struct {
+  tre_ast_node_t *left;
+  tre_ast_node_t *right;
+} tre_catenation_t;
+
+/* An "iteration" node.         These are created for the "*", "+", "?", and "{m,n}"
+   operators. */
+typedef struct {
+  /* Subexpression to match. */
+  tre_ast_node_t *arg;
+  /* Minimum number of consecutive matches. */
+  int min;
+  /* Maximum number of consecutive matches. */
+  int max;
+  /* If 0, match as many characters as possible, if 1 match as few as
+     possible. Note that this does not always mean the same thing as
+     matching as many/few repetitions as possible. */
+  unsigned int minimal:1;
+  /* Approximate matching parameters (or NULL). */
+  int *params;
+} tre_iteration_t;
+
+/* An "union" node.  These are created for the "|" operator. */
+typedef struct {
+  tre_ast_node_t *left;
+  tre_ast_node_t *right;
+} tre_union_t;
+
+tre_ast_node_t *
+tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size);
+
+tre_ast_node_t *
+tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position);
+
+tre_ast_node_t *
+tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
+                int minimal);
+
+tre_ast_node_t *
+tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right);
+
+tre_ast_node_t *
+tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
+                      tre_ast_node_t *right);
+
+#ifdef TRE_DEBUG
+void
+tre_ast_print(tre_ast_node_t *tree);
+
+/* XXX - rethink AST printing API */
+void
+tre_print_params(int *params);
+#endif /* TRE_DEBUG */
+
+#endif /* TRE_AST_H */
+
+/* EOF */
diff --git a/contrib/tre/lib/tre-compile.c b/contrib/tre/lib/tre-compile.c
new file mode 100644 (file)
index 0000000..948cc46
--- /dev/null
@@ -0,0 +1,2296 @@
+/*
+  tre-compile.c - TRE regex compiler
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+/*
+  TODO:
+   - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
+     function calls.
+*/
+
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+
+#include "tre-internal.h"
+#include "tre-mem.h"
+#include "tre-stack.h"
+#include "tre-ast.h"
+#include "tre-parse.h"
+#include "tre-compile.h"
+#include "tre.h"
+#include "xmalloc.h"
+
+/*
+  Algorithms to setup tags so that submatch addressing can be done.
+*/
+
+
+/* Inserts a catenation node to the root of the tree given in `node'.
+   As the left child a new tag with number `tag_id' to `node' is added,
+   and the right child is the old root. */
+static reg_errcode_t
+tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
+{
+  tre_catenation_t *c;
+
+  DPRINT(("add_tag_left: tag %d\n", tag_id));
+
+  c = tre_mem_alloc(mem, sizeof(*c));
+  if (c == NULL)
+    return REG_ESPACE;
+  c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
+  if (c->left == NULL)
+    return REG_ESPACE;
+  c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
+  if (c->right == NULL)
+    return REG_ESPACE;
+
+  c->right->obj = node->obj;
+  c->right->type = node->type;
+  c->right->nullable = -1;
+  c->right->submatch_id = -1;
+  c->right->firstpos = NULL;
+  c->right->lastpos = NULL;
+  c->right->num_tags = 0;
+  node->obj = c;
+  node->type = CATENATION;
+  return REG_OK;
+}
+
+/* Inserts a catenation node to the root of the tree given in `node'.
+   As the right child a new tag with number `tag_id' to `node' is added,
+   and the left child is the old root. */
+static reg_errcode_t
+tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
+{
+  tre_catenation_t *c;
+
+  DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
+
+  c = tre_mem_alloc(mem, sizeof(*c));
+  if (c == NULL)
+    return REG_ESPACE;
+  c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
+  if (c->right == NULL)
+    return REG_ESPACE;
+  c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
+  if (c->left == NULL)
+    return REG_ESPACE;
+
+  c->left->obj = node->obj;
+  c->left->type = node->type;
+  c->left->nullable = -1;
+  c->left->submatch_id = -1;
+  c->left->firstpos = NULL;
+  c->left->lastpos = NULL;
+  c->left->num_tags = 0;
+  node->obj = c;
+  node->type = CATENATION;
+  return REG_OK;
+}
+
+typedef enum {
+  ADDTAGS_RECURSE,
+  ADDTAGS_AFTER_ITERATION,
+  ADDTAGS_AFTER_UNION_LEFT,
+  ADDTAGS_AFTER_UNION_RIGHT,
+  ADDTAGS_AFTER_CAT_LEFT,
+  ADDTAGS_AFTER_CAT_RIGHT,
+  ADDTAGS_SET_SUBMATCH_END
+} tre_addtags_symbol_t;
+
+
+typedef struct {
+  int tag;
+  int next_tag;
+} tre_tag_states_t;
+
+
+/* Go through `regset' and set submatch data for submatches that are
+   using this tag. */
+static void
+tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
+{
+  int i;
+
+  for (i = 0; regset[i] >= 0; i++)
+    {
+      int id = regset[i] / 2;
+      int start = !(regset[i] % 2);
+      DPRINT(("  Using tag %d for %s offset of "
+             "submatch %d\n", tag,
+             start ? "start" : "end", id));
+      if (start)
+       tnfa->submatch_data[id].so_tag = tag;
+      else
+       tnfa->submatch_data[id].eo_tag = tag;
+    }
+  regset[0] = -1;
+}
+
+
+/* Adds tags to appropriate locations in the parse tree in `tree', so that
+   subexpressions marked for submatch addressing can be traced. */
+static reg_errcode_t
+tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
+            tre_tnfa_t *tnfa)
+{
+  reg_errcode_t status = REG_OK;
+  tre_addtags_symbol_t symbol;
+  tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
+  int bottom = tre_stack_num_objects(stack);
+  /* True for first pass (counting number of needed tags) */
+  int first_pass = (mem == NULL || tnfa == NULL);
+  int *regset, *orig_regset;
+  int num_tags = 0; /* Total number of tags. */
+  int num_minimals = 0;         /* Number of special minimal tags. */
+  int tag = 0;     /* The tag that is to be added next. */
+  int next_tag = 1; /* Next tag to use after this one. */
+  int *parents;            /* Stack of submatches the current submatch is
+                      contained in. */
+  int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
+  tre_tag_states_t *saved_states;
+
+  tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
+  if (!first_pass)
+    {
+      tnfa->end_tag = 0;
+      tnfa->minimal_tags[0] = -1;
+    }
+
+  regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
+  if (regset == NULL)
+    return REG_ESPACE;
+  regset[0] = -1;
+  orig_regset = regset;
+
+  parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
+  if (parents == NULL)
+    {
+      xfree(regset);
+      return REG_ESPACE;
+    }
+  parents[0] = -1;
+
+  saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
+  if (saved_states == NULL)
+    {
+      xfree(regset);
+      xfree(parents);
+      return REG_ESPACE;
+    }
+  else
+    {
+      unsigned int i;
+      for (i = 0; i <= tnfa->num_submatches; i++)
+       saved_states[i].tag = -1;
+    }
+
+  STACK_PUSH(stack, voidptr, node);
+  STACK_PUSH(stack, int, ADDTAGS_RECURSE);
+
+  while (tre_stack_num_objects(stack) > bottom)
+    {
+      if (status != REG_OK)
+       break;
+
+      symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
+      switch (symbol)
+       {
+
+       case ADDTAGS_SET_SUBMATCH_END:
+         {
+           int id = tre_stack_pop_int(stack);
+           int i;
+
+           /* Add end of this submatch to regset. */
+           for (i = 0; regset[i] >= 0; i++);
+           regset[i] = id * 2 + 1;
+           regset[i + 1] = -1;
+
+           /* Pop this submatch from the parents stack. */
+           for (i = 0; parents[i] >= 0; i++);
+           parents[i - 1] = -1;
+           break;
+         }
+
+       case ADDTAGS_RECURSE:
+         node = tre_stack_pop_voidptr(stack);
+
+         if (node->submatch_id >= 0)
+           {
+             int id = node->submatch_id;
+             int i;
+
+
+             /* Add start of this submatch to regset. */
+             for (i = 0; regset[i] >= 0; i++);
+             regset[i] = id * 2;
+             regset[i + 1] = -1;
+
+             if (!first_pass)
+               {
+                 for (i = 0; parents[i] >= 0; i++);
+                 tnfa->submatch_data[id].parents = NULL;
+                 if (i > 0)
+                   {
+                     int *p = xmalloc(sizeof(*p) * (i + 1));
+                     if (p == NULL)
+                       {
+                         status = REG_ESPACE;
+                         break;
+                       }
+                     assert(tnfa->submatch_data[id].parents == NULL);
+                     tnfa->submatch_data[id].parents = p;
+                     for (i = 0; parents[i] >= 0; i++)
+                       p[i] = parents[i];
+                     p[i] = -1;
+                   }
+               }
+
+             /* Add end of this submatch to regset after processing this
+                node. */
+             STACK_PUSHX(stack, int, node->submatch_id);
+             STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
+           }
+
+         switch (node->type)
+           {
+           case LITERAL:
+             {
+               tre_literal_t *lit = node->obj;
+
+               if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
+                 {
+                   int i;
+                   DPRINT(("Literal %d-%d\n",
+                           (int)lit->code_min, (int)lit->code_max));
+                   if (regset[0] >= 0)
+                     {
+                       /* Regset is not empty, so add a tag before the
+                          literal or backref. */
+                       if (!first_pass)
+                         {
+                           status = tre_add_tag_left(mem, node, tag);
+                           tnfa->tag_directions[tag] = direction;
+                           if (minimal_tag >= 0)
+                             {
+                               DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
+                               for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
+                               tnfa->minimal_tags[i] = tag;
+                               tnfa->minimal_tags[i + 1] = minimal_tag;
+                               tnfa->minimal_tags[i + 2] = -1;
+                               minimal_tag = -1;
+                               num_minimals++;
+                             }
+                           tre_purge_regset(regset, tnfa, tag);
+                         }
+                       else
+                         {
+                           DPRINT(("  num_tags = 1\n"));
+                           node->num_tags = 1;
+                         }
+
+                       DPRINT(("  num_tags++\n"));
+                       regset[0] = -1;
+                       tag = next_tag;
+                       num_tags++;
+                       next_tag++;
+                     }
+                 }
+               else
+                 {
+                   assert(!IS_TAG(lit));
+                 }
+               break;
+             }
+           case CATENATION:
+             {
+               tre_catenation_t *cat = node->obj;
+               tre_ast_node_t *left = cat->left;
+               tre_ast_node_t *right = cat->right;
+               int reserved_tag = -1;
+               DPRINT(("Catenation, next_tag = %d\n", next_tag));
+
+
+               /* After processing right child. */
+               STACK_PUSHX(stack, voidptr, node);
+               STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
+
+               /* Process right child. */
+               STACK_PUSHX(stack, voidptr, right);
+               STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+               /* After processing left child. */
+               STACK_PUSHX(stack, int, next_tag + left->num_tags);
+               DPRINT(("  Pushing %d for after left\n",
+                       next_tag + left->num_tags));
+               if (left->num_tags > 0 && right->num_tags > 0)
+                 {
+                   /* Reserve the next tag to the right child. */
+                   DPRINT(("  Reserving next_tag %d to right child\n",
+                           next_tag));
+                   reserved_tag = next_tag;
+                   next_tag++;
+                 }
+               STACK_PUSHX(stack, int, reserved_tag);
+               STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
+
+               /* Process left child. */
+               STACK_PUSHX(stack, voidptr, left);
+               STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+               }
+             break;
+           case ITERATION:
+             {
+               tre_iteration_t *iter = node->obj;
+               DPRINT(("Iteration\n"));
+
+               if (first_pass)
+                 {
+                   STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
+                 }
+               else
+                 {
+                   STACK_PUSHX(stack, int, tag);
+                   STACK_PUSHX(stack, int, iter->minimal);
+                 }
+               STACK_PUSHX(stack, voidptr, node);
+               STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
+
+               STACK_PUSHX(stack, voidptr, iter->arg);
+               STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+               /* Regset is not empty, so add a tag here. */
+               if (regset[0] >= 0 || iter->minimal)
+                 {
+                   if (!first_pass)
+                     {
+                       int i;
+                       status = tre_add_tag_left(mem, node, tag);
+                       if (iter->minimal)
+                         tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
+                       else
+                         tnfa->tag_directions[tag] = direction;
+                       if (minimal_tag >= 0)
+                         {
+                           DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
+                           for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
+                           tnfa->minimal_tags[i] = tag;
+                           tnfa->minimal_tags[i + 1] = minimal_tag;
+                           tnfa->minimal_tags[i + 2] = -1;
+                           minimal_tag = -1;
+                           num_minimals++;
+                         }
+                       tre_purge_regset(regset, tnfa, tag);
+                     }
+
+                   DPRINT(("  num_tags++\n"));
+                   regset[0] = -1;
+                   tag = next_tag;
+                   num_tags++;
+                   next_tag++;
+                 }
+               direction = TRE_TAG_MINIMIZE;
+             }
+             break;
+           case UNION:
+             {
+               tre_union_t *uni = node->obj;
+               tre_ast_node_t *left = uni->left;
+               tre_ast_node_t *right = uni->right;
+               int left_tag;
+               int right_tag;
+
+               if (regset[0] >= 0)
+                 {
+                   left_tag = next_tag;
+                   right_tag = next_tag + 1;
+                 }
+               else
+                 {
+                   left_tag = tag;
+                   right_tag = next_tag;
+                 }
+
+               DPRINT(("Union\n"));
+
+               /* After processing right child. */
+               STACK_PUSHX(stack, int, right_tag);
+               STACK_PUSHX(stack, int, left_tag);
+               STACK_PUSHX(stack, voidptr, regset);
+               STACK_PUSHX(stack, int, regset[0] >= 0);
+               STACK_PUSHX(stack, voidptr, node);
+               STACK_PUSHX(stack, voidptr, right);
+               STACK_PUSHX(stack, voidptr, left);
+               STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
+
+               /* Process right child. */
+               STACK_PUSHX(stack, voidptr, right);
+               STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+               /* After processing left child. */
+               STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
+
+               /* Process left child. */
+               STACK_PUSHX(stack, voidptr, left);
+               STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+               /* Regset is not empty, so add a tag here. */
+               if (regset[0] >= 0)
+                 {
+                   if (!first_pass)
+                     {
+                       int i;
+                       status = tre_add_tag_left(mem, node, tag);
+                       tnfa->tag_directions[tag] = direction;
+                       if (minimal_tag >= 0)
+                         {
+                           DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
+                           for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
+                           tnfa->minimal_tags[i] = tag;
+                           tnfa->minimal_tags[i + 1] = minimal_tag;
+                           tnfa->minimal_tags[i + 2] = -1;
+                           minimal_tag = -1;
+                           num_minimals++;
+                         }
+                       tre_purge_regset(regset, tnfa, tag);
+                     }
+
+                   DPRINT(("  num_tags++\n"));
+                   regset[0] = -1;
+                   tag = next_tag;
+                   num_tags++;
+                   next_tag++;
+                 }
+
+               if (node->num_submatches > 0)
+                 {
+                   /* The next two tags are reserved for markers. */
+                   next_tag++;
+                   tag = next_tag;
+                   next_tag++;
+                 }
+
+               break;
+             }
+           }
+
+         if (node->submatch_id >= 0)
+           {
+             int i;
+             /* Push this submatch on the parents stack. */
+             for (i = 0; parents[i] >= 0; i++);
+             parents[i] = node->submatch_id;
+             parents[i + 1] = -1;
+           }
+
+         break; /* end case: ADDTAGS_RECURSE */
+
+       case ADDTAGS_AFTER_ITERATION:
+         {
+           int minimal = 0;
+           int enter_tag;
+           node = tre_stack_pop_voidptr(stack);
+           if (first_pass)
+             {
+               node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
+                 + tre_stack_pop_int(stack);
+               minimal_tag = -1;
+             }
+           else
+             {
+               minimal = tre_stack_pop_int(stack);
+               enter_tag = tre_stack_pop_int(stack);
+               if (minimal)
+                 minimal_tag = enter_tag;
+             }
+
+           DPRINT(("After iteration\n"));
+           if (!first_pass)
+             {
+               DPRINT(("  Setting direction to %s\n",
+                       minimal ? "minimize" : "maximize"));
+               if (minimal)
+                 direction = TRE_TAG_MINIMIZE;
+               else
+                 direction = TRE_TAG_MAXIMIZE;
+             }
+           break;
+         }
+
+       case ADDTAGS_AFTER_CAT_LEFT:
+         {
+           int new_tag = tre_stack_pop_int(stack);
+           next_tag = tre_stack_pop_int(stack);
+           DPRINT(("After cat left, tag = %d, next_tag = %d\n",
+                   tag, next_tag));
+           if (new_tag >= 0)
+             {
+               DPRINT(("  Setting tag to %d\n", new_tag));
+               tag = new_tag;
+             }
+           break;
+         }
+
+       case ADDTAGS_AFTER_CAT_RIGHT:
+         DPRINT(("After cat right\n"));
+         node = tre_stack_pop_voidptr(stack);
+         if (first_pass)
+           node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
+             + ((tre_catenation_t *)node->obj)->right->num_tags;
+         break;
+
+       case ADDTAGS_AFTER_UNION_LEFT:
+         DPRINT(("After union left\n"));
+         /* Lift the bottom of the `regset' array so that when processing
+            the right operand the items currently in the array are
+            invisible.  The original bottom was saved at ADDTAGS_UNION and
+            will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
+         while (*regset >= 0)
+           regset++;
+         break;
+
+       case ADDTAGS_AFTER_UNION_RIGHT:
+         {
+           int added_tags, tag_left, tag_right;
+           tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
+           tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
+           DPRINT(("After union right\n"));
+           node = tre_stack_pop_voidptr(stack);
+           added_tags = tre_stack_pop_int(stack);
+           if (first_pass)
+             {
+               node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
+                 + ((tre_union_t *)node->obj)->right->num_tags + added_tags
+                 + ((node->num_submatches > 0) ? 2 : 0);
+             }
+           regset = tre_stack_pop_voidptr(stack);
+           tag_left = tre_stack_pop_int(stack);
+           tag_right = tre_stack_pop_int(stack);
+
+           /* Add tags after both children, the left child gets a smaller
+              tag than the right child.  This guarantees that we prefer
+              the left child over the right child. */
+           /* XXX - This is not always necessary (if the children have
+              tags which must be seen for every match of that child). */
+           /* XXX - Check if this is the only place where tre_add_tag_right
+              is used.  If so, use tre_add_tag_left (putting the tag before
+              the child as opposed after the child) and throw away
+              tre_add_tag_right. */
+           if (node->num_submatches > 0)
+             {
+               if (!first_pass)
+                 {
+                   status = tre_add_tag_right(mem, left, tag_left);
+                   tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
+                   status = tre_add_tag_right(mem, right, tag_right);
+                   tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
+                 }
+               DPRINT(("  num_tags += 2\n"));
+               num_tags += 2;
+             }
+           direction = TRE_TAG_MAXIMIZE;
+           break;
+         }
+
+       default:
+         assert(0);
+         break;
+
+       } /* end switch(symbol) */
+    } /* end while(tre_stack_num_objects(stack) > bottom) */
+
+  if (!first_pass)
+    tre_purge_regset(regset, tnfa, tag);
+
+  if (!first_pass && minimal_tag >= 0)
+    {
+      int i;
+      DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
+      for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
+      tnfa->minimal_tags[i] = tag;
+      tnfa->minimal_tags[i + 1] = minimal_tag;
+      tnfa->minimal_tags[i + 2] = -1;
+      minimal_tag = -1;
+      num_minimals++;
+    }
+
+  DPRINT(("tre_add_tags: %s complete.  Number of tags %d.\n",
+         first_pass? "First pass" : "Second pass", num_tags));
+
+  assert(tree->num_tags == num_tags);
+  tnfa->end_tag = num_tags;
+  tnfa->num_tags = num_tags;
+  tnfa->num_minimals = num_minimals;
+  xfree(orig_regset);
+  xfree(parents);
+  xfree(saved_states);
+  return status;
+}
+
+
+
+/*
+  AST to TNFA compilation routines.
+*/
+
+typedef enum {
+  COPY_RECURSE,
+  COPY_SET_RESULT_PTR
+} tre_copyast_symbol_t;
+
+/* Flags for tre_copy_ast(). */
+#define COPY_REMOVE_TAGS        1
+#define COPY_MAXIMIZE_FIRST_TAG         2
+
+static reg_errcode_t
+tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
+            int flags, int *pos_add, tre_tag_direction_t *tag_directions,
+            tre_ast_node_t **copy, int *max_pos)
+{
+  reg_errcode_t status = REG_OK;
+  int bottom = tre_stack_num_objects(stack);
+  int num_copied = 0;
+  int first_tag = 1;
+  tre_ast_node_t **result = copy;
+  tre_copyast_symbol_t symbol;
+
+  STACK_PUSH(stack, voidptr, ast);
+  STACK_PUSH(stack, int, COPY_RECURSE);
+
+  while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
+    {
+      tre_ast_node_t *node;
+      if (status != REG_OK)
+       break;
+
+      symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
+      switch (symbol)
+       {
+       case COPY_SET_RESULT_PTR:
+         result = tre_stack_pop_voidptr(stack);
+         break;
+       case COPY_RECURSE:
+         node = tre_stack_pop_voidptr(stack);
+         switch (node->type)
+           {
+           case LITERAL:
+             {
+               tre_literal_t *lit = node->obj;
+               int pos = lit->position;
+               int min = lit->code_min;
+               int max = lit->code_max;
+               if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
+                 {
+                   /* XXX - e.g. [ab] has only one position but two
+                      nodes, so we are creating holes in the state space
+                      here.  Not fatal, just wastes memory. */
+                   pos += *pos_add;
+                   num_copied++;
+                 }
+               else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
+                 {
+                   /* Change this tag to empty. */
+                   min = EMPTY;
+                   max = pos = -1;
+                 }
+               else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
+                        && first_tag)
+                 {
+                   /* Maximize the first tag. */
+                   tag_directions[max] = TRE_TAG_MAXIMIZE;
+                   first_tag = 0;
+                 }
+               *result = tre_ast_new_literal(mem, min, max, pos);
+               if (*result == NULL)
+                 status = REG_ESPACE;
+
+               if (pos > *max_pos)
+                 *max_pos = pos;
+               break;
+             }
+           case UNION:
+             {
+               tre_union_t *uni = node->obj;
+               tre_union_t *tmp;
+               *result = tre_ast_new_union(mem, uni->left, uni->right);
+               if (*result == NULL)
+                 {
+                   status = REG_ESPACE;
+                   break;
+                 }
+               tmp = (*result)->obj;
+               result = &tmp->left;
+               STACK_PUSHX(stack, voidptr, uni->right);
+               STACK_PUSHX(stack, int, COPY_RECURSE);
+               STACK_PUSHX(stack, voidptr, &tmp->right);
+               STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
+               STACK_PUSHX(stack, voidptr, uni->left);
+               STACK_PUSHX(stack, int, COPY_RECURSE);
+               break;
+             }
+           case CATENATION:
+             {
+               tre_catenation_t *cat = node->obj;
+               tre_catenation_t *tmp;
+               *result = tre_ast_new_catenation(mem, cat->left, cat->right);
+               if (*result == NULL)
+                 {
+                   status = REG_ESPACE;
+                   break;
+                 }
+               tmp = (*result)->obj;
+               tmp->left = NULL;
+               tmp->right = NULL;
+               result = &tmp->left;
+
+               STACK_PUSHX(stack, voidptr, cat->right);
+               STACK_PUSHX(stack, int, COPY_RECURSE);
+               STACK_PUSHX(stack, voidptr, &tmp->right);
+               STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
+               STACK_PUSHX(stack, voidptr, cat->left);
+               STACK_PUSHX(stack, int, COPY_RECURSE);
+               break;
+             }
+           case ITERATION:
+             {
+               tre_iteration_t *iter = node->obj;
+               STACK_PUSHX(stack, voidptr, iter->arg);
+               STACK_PUSHX(stack, int, COPY_RECURSE);
+               *result = tre_ast_new_iter(mem, iter->arg, iter->min,
+                                          iter->max, iter->minimal);
+               if (*result == NULL)
+                 {
+                   status = REG_ESPACE;
+                   break;
+                 }
+               iter = (*result)->obj;
+               result = &iter->arg;
+               break;
+             }
+           default:
+             assert(0);
+             break;
+           }
+         break;
+       }
+    }
+  *pos_add += num_copied;
+  return status;
+}
+
+typedef enum {
+  EXPAND_RECURSE,
+  EXPAND_AFTER_ITER
+} tre_expand_ast_symbol_t;
+
+/* Expands each iteration node that has a finite nonzero minimum or maximum
+   iteration count to a catenated sequence of copies of the node. */
+static reg_errcode_t
+tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
+              int *position, tre_tag_direction_t *tag_directions,
+              int *max_depth)
+{
+  reg_errcode_t status = REG_OK;
+  int bottom = tre_stack_num_objects(stack);
+  int pos_add = 0;
+  int pos_add_total = 0;
+  int max_pos = 0;
+  /* Current approximate matching parameters. */
+  int params[TRE_PARAM_LAST];
+  /* Approximate parameter nesting level. */
+  int params_depth = 0;
+  int iter_depth = 0;
+  int i;
+
+  for (i = 0; i < TRE_PARAM_LAST; i++)
+    params[i] = TRE_PARAM_DEFAULT;
+
+  STACK_PUSHR(stack, voidptr, ast);
+  STACK_PUSHR(stack, int, EXPAND_RECURSE);
+  while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
+    {
+      tre_ast_node_t *node;
+      tre_expand_ast_symbol_t symbol;
+
+      if (status != REG_OK)
+       break;
+
+      DPRINT(("pos_add %d\n", pos_add));
+
+      symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
+      node = tre_stack_pop_voidptr(stack);
+      switch (symbol)
+       {
+       case EXPAND_RECURSE:
+         switch (node->type)
+           {
+           case LITERAL:
+             {
+               tre_literal_t *lit= node->obj;
+               if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
+                 {
+                   lit->position += pos_add;
+                   if (lit->position > max_pos)
+                     max_pos = lit->position;
+                 }
+               break;
+             }
+           case UNION:
+             {
+               tre_union_t *uni = node->obj;
+               STACK_PUSHX(stack, voidptr, uni->right);
+               STACK_PUSHX(stack, int, EXPAND_RECURSE);
+               STACK_PUSHX(stack, voidptr, uni->left);
+               STACK_PUSHX(stack, int, EXPAND_RECURSE);
+               break;
+             }
+           case CATENATION:
+             {
+               tre_catenation_t *cat = node->obj;
+               STACK_PUSHX(stack, voidptr, cat->right);
+               STACK_PUSHX(stack, int, EXPAND_RECURSE);
+               STACK_PUSHX(stack, voidptr, cat->left);
+               STACK_PUSHX(stack, int, EXPAND_RECURSE);
+               break;
+             }
+           case ITERATION:
+             {
+               tre_iteration_t *iter = node->obj;
+               STACK_PUSHX(stack, int, pos_add);
+               STACK_PUSHX(stack, voidptr, node);
+               STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
+               STACK_PUSHX(stack, voidptr, iter->arg);
+               STACK_PUSHX(stack, int, EXPAND_RECURSE);
+               /* If we are going to expand this node at EXPAND_AFTER_ITER
+                  then don't increase the `pos' fields of the nodes now, it
+                  will get done when expanding. */
+               if (iter->min > 1 || iter->max > 1)
+                 pos_add = 0;
+               iter_depth++;
+               DPRINT(("iter\n"));
+               break;
+             }
+           default:
+             assert(0);
+             break;
+           }
+         break;
+       case EXPAND_AFTER_ITER:
+         {
+           tre_iteration_t *iter = node->obj;
+           int pos_add_last;
+           pos_add = tre_stack_pop_int(stack);
+           pos_add_last = pos_add;
+           if (iter->min > 1 || iter->max > 1)
+             {
+               tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
+               int j;
+               int pos_add_save = pos_add;
+
+               /* Create a catenated sequence of copies of the node. */
+               for (j = 0; j < iter->min; j++)
+                 {
+                   tre_ast_node_t *copy;
+                   /* Remove tags from all but the last copy. */
+                   int flags = ((j + 1 < iter->min)
+                                ? COPY_REMOVE_TAGS
+                                : COPY_MAXIMIZE_FIRST_TAG);
+                   DPRINT(("  pos_add %d\n", pos_add));
+                   pos_add_save = pos_add;
+                   status = tre_copy_ast(mem, stack, iter->arg, flags,
+                                         &pos_add, tag_directions, &copy,
+                                         &max_pos);
+                   if (status != REG_OK)
+                     return status;
+                   if (seq1 != NULL)
+                     seq1 = tre_ast_new_catenation(mem, seq1, copy);
+                   else
+                     seq1 = copy;
+                   if (seq1 == NULL)
+                     return REG_ESPACE;
+                 }
+
+               if (iter->max == -1)
+                 {
+                   /* No upper limit. */
+                   pos_add_save = pos_add;
+                   status = tre_copy_ast(mem, stack, iter->arg, 0,
+                                         &pos_add, NULL, &seq2, &max_pos);
+                   if (status != REG_OK)
+                     return status;
+                   seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
+                   if (seq2 == NULL)
+                     return REG_ESPACE;
+                 }
+               else
+                 {
+                   for (j = iter->min; j < iter->max; j++)
+                     {
+                       tre_ast_node_t *tmp, *copy;
+                       pos_add_save = pos_add;
+                       status = tre_copy_ast(mem, stack, iter->arg, 0,
+                                             &pos_add, NULL, &copy, &max_pos);
+                       if (status != REG_OK)
+                         return status;
+                       if (seq2 != NULL)
+                         seq2 = tre_ast_new_catenation(mem, copy, seq2);
+                       else
+                         seq2 = copy;
+                       if (seq2 == NULL)
+                         return REG_ESPACE;
+                       tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
+                       if (tmp == NULL)
+                         return REG_ESPACE;
+                       seq2 = tre_ast_new_union(mem, tmp, seq2);
+                       if (seq2 == NULL)
+                         return REG_ESPACE;
+                     }
+                 }
+
+               pos_add = pos_add_save;
+               if (seq1 == NULL)
+                 seq1 = seq2;
+               else if (seq2 != NULL)
+                 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
+               if (seq1 == NULL)
+                 return REG_ESPACE;
+               node->obj = seq1->obj;
+               node->type = seq1->type;
+             }
+
+           iter_depth--;
+           pos_add_total += pos_add - pos_add_last;
+           if (iter_depth == 0)
+             pos_add = pos_add_total;
+
+           /* If approximate parameters are specified, surround the result
+              with two parameter setting nodes.  The one on the left sets
+              the specified parameters, and the one on the right restores
+              the old parameters. */
+           if (iter->params)
+             {
+               tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
+               int *old_params;
+
+               tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
+               if (!tmp_l)
+                 return REG_ESPACE;
+               ((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
+               iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
+               tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
+               if (!tmp_r)
+                 return REG_ESPACE;
+               old_params = tre_mem_alloc(mem, sizeof(*old_params)
+                                          * TRE_PARAM_LAST);
+               if (!old_params)
+                 return REG_ESPACE;
+               for (i = 0; i < TRE_PARAM_LAST; i++)
+                 old_params[i] = params[i];
+               ((tre_literal_t *)tmp_r->obj)->u.params = old_params;
+               old_params[TRE_PARAM_DEPTH] = params_depth;
+               /* XXX - this is the only place where ast_new_node is
+                  needed -- should be moved inside AST module. */
+               node_copy = tre_ast_new_node(mem, ITERATION,
+                                            sizeof(tre_iteration_t));
+               if (!node_copy)
+                 return REG_ESPACE;
+               node_copy->obj = node->obj;
+               tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
+               if (!tmp_node)
+                 return REG_ESPACE;
+               tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
+               if (!tmp_node)
+                 return REG_ESPACE;
+               /* Replace the contents of `node' with `tmp_node'. */
+               memcpy(node, tmp_node, sizeof(*node));
+               node->obj = tmp_node->obj;
+               node->type = tmp_node->type;
+               params_depth++;
+               if (params_depth > *max_depth)
+                 *max_depth = params_depth;
+             }
+           break;
+         }
+       default:
+         assert(0);
+         break;
+       }
+    }
+
+  *position += pos_add_total;
+
+  /* `max_pos' should never be larger than `*position' if the above
+     code works, but just an extra safeguard let's make sure
+     `*position' is set large enough so enough memory will be
+     allocated for the transition table. */
+  if (max_pos > *position)
+    *position = max_pos;
+
+#ifdef TRE_DEBUG
+  DPRINT(("Expanded AST:\n"));
+  tre_ast_print(ast);
+  DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
+#endif
+
+  return status;
+}
+
+static tre_pos_and_tags_t *
+tre_set_empty(tre_mem_t mem)
+{
+  tre_pos_and_tags_t *new_set;
+
+  new_set = tre_mem_calloc(mem, sizeof(*new_set));
+  if (new_set == NULL)
+    return NULL;
+
+  new_set[0].position = -1;
+  new_set[0].code_min = -1;
+  new_set[0].code_max = -1;
+
+  return new_set;
+}
+
+static tre_pos_and_tags_t *
+tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
+           tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
+{
+  tre_pos_and_tags_t *new_set;
+
+  new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
+  if (new_set == NULL)
+    return NULL;
+
+  new_set[0].position = position;
+  new_set[0].code_min = code_min;
+  new_set[0].code_max = code_max;
+  new_set[0].class = class;
+  new_set[0].neg_classes = neg_classes;
+  new_set[0].backref = backref;
+  new_set[1].position = -1;
+  new_set[1].code_min = -1;
+  new_set[1].code_max = -1;
+
+  return new_set;
+}
+
+static tre_pos_and_tags_t *
+tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
+             int *tags, int assertions, int *params)
+{
+  int s1, s2, i, j;
+  tre_pos_and_tags_t *new_set;
+  int *new_tags;
+  int num_tags;
+
+  for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
+  for (s1 = 0; set1[s1].position >= 0; s1++);
+  for (s2 = 0; set2[s2].position >= 0; s2++);
+  new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
+  if (!new_set )
+    return NULL;
+
+  for (s1 = 0; set1[s1].position >= 0; s1++)
+    {
+      new_set[s1].position = set1[s1].position;
+      new_set[s1].code_min = set1[s1].code_min;
+      new_set[s1].code_max = set1[s1].code_max;
+      new_set[s1].assertions = set1[s1].assertions | assertions;
+      new_set[s1].class = set1[s1].class;
+      new_set[s1].neg_classes = set1[s1].neg_classes;
+      new_set[s1].backref = set1[s1].backref;
+      if (set1[s1].tags == NULL && tags == NULL)
+       new_set[s1].tags = NULL;
+      else
+       {
+         for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
+         new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
+                                        * (i + num_tags + 1)));
+         if (new_tags == NULL)
+           return NULL;
+         for (j = 0; j < i; j++)
+           new_tags[j] = set1[s1].tags[j];
+         for (i = 0; i < num_tags; i++)
+           new_tags[j + i] = tags[i];
+         new_tags[j + i] = -1;
+         new_set[s1].tags = new_tags;
+       }
+      if (set1[s1].params)
+       new_set[s1].params = set1[s1].params;
+      if (params)
+       {
+         if (!new_set[s1].params)
+           new_set[s1].params = params;
+         else
+           {
+             new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
+                                                TRE_PARAM_LAST);
+             if (!new_set[s1].params)
+               return NULL;
+             for (i = 0; i < TRE_PARAM_LAST; i++)
+               if (params[i] != TRE_PARAM_UNSET)
+                 new_set[s1].params[i] = params[i];
+           }
+       }
+    }
+
+  for (s2 = 0; set2[s2].position >= 0; s2++)
+    {
+      new_set[s1 + s2].position = set2[s2].position;
+      new_set[s1 + s2].code_min = set2[s2].code_min;
+      new_set[s1 + s2].code_max = set2[s2].code_max;
+      /* XXX - why not | assertions here as well? */
+      new_set[s1 + s2].assertions = set2[s2].assertions;
+      new_set[s1 + s2].class = set2[s2].class;
+      new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
+      new_set[s1 + s2].backref = set2[s2].backref;
+      if (set2[s2].tags == NULL)
+       new_set[s1 + s2].tags = NULL;
+      else
+       {
+         for (i = 0; set2[s2].tags[i] >= 0; i++);
+         new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
+         if (new_tags == NULL)
+           return NULL;
+         for (j = 0; j < i; j++)
+           new_tags[j] = set2[s2].tags[j];
+         new_tags[j] = -1;
+         new_set[s1 + s2].tags = new_tags;
+       }
+      if (set2[s2].params)
+       new_set[s1 + s2].params = set2[s2].params;
+      if (params)
+       {
+         if (!new_set[s1 + s2].params)
+           new_set[s1 + s2].params = params;
+         else
+           {
+             new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
+                                                     TRE_PARAM_LAST);
+             if (!new_set[s1 + s2].params)
+               return NULL;
+             for (i = 0; i < TRE_PARAM_LAST; i++)
+               if (params[i] != TRE_PARAM_UNSET)
+                 new_set[s1 + s2].params[i] = params[i];
+           }
+       }
+    }
+  new_set[s1 + s2].position = -1;
+  return new_set;
+}
+
+/* Finds the empty path through `node' which is the one that should be
+   taken according to POSIX.2 rules, and adds the tags on that path to
+   `tags'.   `tags' may be NULL.  If `num_tags_seen' is not NULL, it is
+   set to the number of tags seen on the path. */
+static reg_errcode_t
+tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
+               int *assertions, int *params, int *num_tags_seen,
+               int *params_seen)
+{
+  tre_literal_t *lit;
+  tre_union_t *uni;
+  tre_catenation_t *cat;
+  tre_iteration_t *iter;
+  int i;
+  int bottom = tre_stack_num_objects(stack);
+  reg_errcode_t status = REG_OK;
+  if (num_tags_seen)
+    *num_tags_seen = 0;
+  if (params_seen)
+    *params_seen = 0;
+
+  status = tre_stack_push_voidptr(stack, node);
+
+  /* Walk through the tree recursively. */
+  while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
+    {
+      node = tre_stack_pop_voidptr(stack);
+
+      switch (node->type)
+       {
+       case LITERAL:
+         lit = (tre_literal_t *)node->obj;
+         switch (lit->code_min)
+           {
+           case TAG:
+             if (lit->code_max >= 0)
+               {
+                 if (tags != NULL)
+                   {
+                     /* Add the tag to `tags'. */
+                     for (i = 0; tags[i] >= 0; i++)
+                       if (tags[i] == lit->code_max)
+                         break;
+                     if (tags[i] < 0)
+                       {
+                         tags[i] = lit->code_max;
+                         tags[i + 1] = -1;
+                       }
+                   }
+                 if (num_tags_seen)
+                   (*num_tags_seen)++;
+               }
+             break;
+           case ASSERTION:
+             assert(lit->code_max >= 1
+                    || lit->code_max <= ASSERT_LAST);
+             if (assertions != NULL)
+               *assertions |= lit->code_max;
+             break;
+           case PARAMETER:
+             if (params != NULL)
+               for (i = 0; i < TRE_PARAM_LAST; i++)
+                 params[i] = lit->u.params[i];
+             if (params_seen != NULL)
+               *params_seen = 1;
+             break;
+           case EMPTY:
+             break;
+           default:
+             assert(0);
+             break;
+           }
+         break;
+
+       case UNION:
+         /* Subexpressions starting earlier take priority over ones
+            starting later, so we prefer the left subexpression over the
+            right subexpression. */
+         uni = (tre_union_t *)node->obj;
+         if (uni->left->nullable)
+           STACK_PUSHX(stack, voidptr, uni->left)
+         else if (uni->right->nullable)
+           STACK_PUSHX(stack, voidptr, uni->right)
+         else
+           assert(0);
+         break;
+
+       case CATENATION:
+         /* The path must go through both children. */
+         cat = (tre_catenation_t *)node->obj;
+         assert(cat->left->nullable);
+         assert(cat->right->nullable);
+         STACK_PUSHX(stack, voidptr, cat->left);
+         STACK_PUSHX(stack, voidptr, cat->right);
+         break;
+
+       case ITERATION:
+         /* A match with an empty string is preferred over no match at
+            all, so we go through the argument if possible. */
+         iter = (tre_iteration_t *)node->obj;
+         if (iter->arg->nullable)
+           STACK_PUSHX(stack, voidptr, iter->arg);
+         break;
+
+       default:
+         assert(0);
+         break;
+       }
+    }
+
+  return status;
+}
+
+
+typedef enum {
+  NFL_RECURSE,
+  NFL_POST_UNION,
+  NFL_POST_CATENATION,
+  NFL_POST_ITERATION
+} tre_nfl_stack_symbol_t;
+
+
+/* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
+   the nodes of the AST `tree'. */
+static reg_errcode_t
+tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
+{
+  int bottom = tre_stack_num_objects(stack);
+
+  STACK_PUSHR(stack, voidptr, tree);
+  STACK_PUSHR(stack, int, NFL_RECURSE);
+
+  while (tre_stack_num_objects(stack) > bottom)
+    {
+      tre_nfl_stack_symbol_t symbol;
+      tre_ast_node_t *node;
+
+      symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
+      node = tre_stack_pop_voidptr(stack);
+      switch (symbol)
+       {
+       case NFL_RECURSE:
+         switch (node->type)
+           {
+           case LITERAL:
+             {
+               tre_literal_t *lit = (tre_literal_t *)node->obj;
+               if (IS_BACKREF(lit))
+                 {
+                   /* Back references: nullable = false, firstpos = {i},
+                      lastpos = {i}. */
+                   node->nullable = 0;
+                   node->firstpos = tre_set_one(mem, lit->position, 0,
+                                            TRE_CHAR_MAX, 0, NULL, -1);
+                   if (!node->firstpos)
+                     return REG_ESPACE;
+                   node->lastpos = tre_set_one(mem, lit->position, 0,
+                                               TRE_CHAR_MAX, 0, NULL,
+                                               (int)lit->code_max);
+                   if (!node->lastpos)
+                     return REG_ESPACE;
+                 }
+               else if (lit->code_min < 0)
+                 {
+                   /* Tags, empty strings, params, and zero width assertions:
+                      nullable = true, firstpos = {}, and lastpos = {}. */
+                   node->nullable = 1;
+                   node->firstpos = tre_set_empty(mem);
+                   if (!node->firstpos)
+                     return REG_ESPACE;
+                   node->lastpos = tre_set_empty(mem);
+                   if (!node->lastpos)
+                     return REG_ESPACE;
+                 }
+               else
+                 {
+                   /* Literal at position i: nullable = false, firstpos = {i},
+                      lastpos = {i}. */
+                   node->nullable = 0;
+                   node->firstpos =
+                     tre_set_one(mem, lit->position, (int)lit->code_min,
+                                 (int)lit->code_max, 0, NULL, -1);
+                   if (!node->firstpos)
+                     return REG_ESPACE;
+                   node->lastpos = tre_set_one(mem, lit->position,
+                                               (int)lit->code_min,
+                                               (int)lit->code_max,
+                                               lit->u.class, lit->neg_classes,
+                                               -1);
+                   if (!node->lastpos)
+                     return REG_ESPACE;
+                 }
+               break;
+             }
+
+           case UNION:
+             /* Compute the attributes for the two subtrees, and after that
+                for this node. */
+             STACK_PUSHR(stack, voidptr, node);
+             STACK_PUSHR(stack, int, NFL_POST_UNION);
+             STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
+             STACK_PUSHR(stack, int, NFL_RECURSE);
+             STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
+             STACK_PUSHR(stack, int, NFL_RECURSE);
+             break;
+
+           case CATENATION:
+             /* Compute the attributes for the two subtrees, and after that
+                for this node. */
+             STACK_PUSHR(stack, voidptr, node);
+             STACK_PUSHR(stack, int, NFL_POST_CATENATION);
+             STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
+             STACK_PUSHR(stack, int, NFL_RECURSE);
+             STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
+             STACK_PUSHR(stack, int, NFL_RECURSE);
+             break;
+
+           case ITERATION:
+             /* Compute the attributes for the subtree, and after that for
+                this node. */
+             STACK_PUSHR(stack, voidptr, node);
+             STACK_PUSHR(stack, int, NFL_POST_ITERATION);
+             STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
+             STACK_PUSHR(stack, int, NFL_RECURSE);
+             break;
+           }
+         break; /* end case: NFL_RECURSE */
+
+       case NFL_POST_UNION:
+         {
+           tre_union_t *uni = (tre_union_t *)node->obj;
+           node->nullable = uni->left->nullable || uni->right->nullable;
+           node->firstpos = tre_set_union(mem, uni->left->firstpos,
+                                          uni->right->firstpos, NULL, 0, NULL);
+           if (!node->firstpos)
+             return REG_ESPACE;
+           node->lastpos = tre_set_union(mem, uni->left->lastpos,
+                                         uni->right->lastpos, NULL, 0, NULL);
+           if (!node->lastpos)
+             return REG_ESPACE;
+           break;
+         }
+
+       case NFL_POST_ITERATION:
+         {
+           tre_iteration_t *iter = (tre_iteration_t *)node->obj;
+
+           if (iter->min == 0 || iter->arg->nullable)
+             node->nullable = 1;
+           else
+             node->nullable = 0;
+           node->firstpos = iter->arg->firstpos;
+           node->lastpos = iter->arg->lastpos;
+           break;
+         }
+
+       case NFL_POST_CATENATION:
+         {
+           int num_tags, *tags, assertions, params_seen;
+           int *params;
+           reg_errcode_t status;
+           tre_catenation_t *cat = node->obj;
+           node->nullable = cat->left->nullable && cat->right->nullable;
+
+           /* Compute firstpos. */
+           if (cat->left->nullable)
+             {
+               /* The left side matches the empty string.  Make a first pass
+                  with tre_match_empty() to get the number of tags and
+                  parameters. */
+               status = tre_match_empty(stack, cat->left,
+                                        NULL, NULL, NULL, &num_tags,
+                                        &params_seen);
+               if (status != REG_OK)
+                 return status;
+               /* Allocate arrays for the tags and parameters. */
+               tags = xmalloc(sizeof(*tags) * (num_tags + 1));
+               if (!tags)
+                 return REG_ESPACE;
+               tags[0] = -1;
+               assertions = 0;
+               params = NULL;
+               if (params_seen)
+                 {
+                   params = tre_mem_alloc(mem, sizeof(*params)
+                                          * TRE_PARAM_LAST);
+                   if (!params)
+                     {
+                       xfree(tags);
+                       return REG_ESPACE;
+                     }
+                 }
+               /* Second pass with tre_mach_empty() to get the list of
+                  tags and parameters. */
+               status = tre_match_empty(stack, cat->left, tags,
+                                        &assertions, params, NULL, NULL);
+               if (status != REG_OK)
+                 {
+                   xfree(tags);
+                   return status;
+                 }
+               node->firstpos =
+                 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
+                               tags, assertions, params);
+               xfree(tags);
+               if (!node->firstpos)
+                 return REG_ESPACE;
+             }
+           else
+             {
+               node->firstpos = cat->left->firstpos;
+             }
+
+           /* Compute lastpos. */
+           if (cat->right->nullable)
+             {
+               /* The right side matches the empty string.  Make a first pass
+                  with tre_match_empty() to get the number of tags and
+                  parameters. */
+               status = tre_match_empty(stack, cat->right,
+                                        NULL, NULL, NULL, &num_tags,
+                                        &params_seen);
+               if (status != REG_OK)
+                 return status;
+               /* Allocate arrays for the tags and parameters. */
+               tags = xmalloc(sizeof(int) * (num_tags + 1));
+               if (!tags)
+                 return REG_ESPACE;
+               tags[0] = -1;
+               assertions = 0;
+               params = NULL;
+               if (params_seen)
+                 {
+                   params = tre_mem_alloc(mem, sizeof(*params)
+                                          * TRE_PARAM_LAST);
+                   if (!params)
+                     {
+                       xfree(tags);
+                       return REG_ESPACE;
+                     }
+                 }
+               /* Second pass with tre_mach_empty() to get the list of
+                  tags and parameters. */
+               status = tre_match_empty(stack, cat->right, tags,
+                                        &assertions, params, NULL, NULL);
+               if (status != REG_OK)
+                 {
+                   xfree(tags);
+                   return status;
+                 }
+               node->lastpos =
+                 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
+                               tags, assertions, params);
+               xfree(tags);
+               if (!node->lastpos)
+                 return REG_ESPACE;
+             }
+           else
+             {
+               node->lastpos = cat->right->lastpos;
+             }
+           break;
+         }
+
+       default:
+         assert(0);
+         break;
+       }
+    }
+
+  return REG_OK;
+}
+
+
+/* Adds a transition from each position in `p1' to each position in `p2'. */
+static reg_errcode_t
+tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
+              tre_tnfa_transition_t *transitions,
+              int *counts, int *offs)
+{
+  tre_pos_and_tags_t *orig_p2 = p2;
+  tre_tnfa_transition_t *trans;
+  int i, j, k, l, dup, prev_p2_pos;
+
+  if (transitions != NULL)
+    while (p1->position >= 0)
+      {
+       p2 = orig_p2;
+       prev_p2_pos = -1;
+       while (p2->position >= 0)
+         {
+           /* Optimization: if this position was already handled, skip it. */
+           if (p2->position == prev_p2_pos)
+             {
+               p2++;
+               continue;
+             }
+           prev_p2_pos = p2->position;
+           /* Set `trans' to point to the next unused transition from
+              position `p1->position'. */
+           trans = transitions + offs[p1->position];
+           while (trans->state != NULL)
+             {
+#if 0
+               /* If we find a previous transition from `p1->position' to
+                  `p2->position', it is overwritten.  This can happen only
+                  if there are nested loops in the regexp, like in "((a)*)*".
+                  In POSIX.2 repetition using the outer loop is always
+                  preferred over using the inner loop.  Therefore the
+                  transition for the inner loop is useless and can be thrown
+                  away. */
+               /* XXX - The same position is used for all nodes in a bracket
+                  expression, so this optimization cannot be used (it will
+                  break bracket expressions) unless I figure out a way to
+                  detect it here. */
+               if (trans->state_id == p2->position)
+                 {
+                   DPRINT(("*"));
+                   break;
+                 }
+#endif
+               trans++;
+             }
+
+           if (trans->state == NULL)
+             (trans + 1)->state = NULL;
+           /* Use the character ranges, assertions, etc. from `p1' for
+              the transition from `p1' to `p2'. */
+           trans->code_min = p1->code_min;
+           trans->code_max = p1->code_max;
+           trans->state = transitions + offs[p2->position];
+           trans->state_id = p2->position;
+           trans->assertions = p1->assertions | p2->assertions
+             | (p1->class ? ASSERT_CHAR_CLASS : 0)
+             | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
+           if (p1->backref >= 0)
+             {
+               assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
+               assert(p2->backref < 0);
+               trans->u.backref = p1->backref;
+               trans->assertions |= ASSERT_BACKREF;
+             }
+           else
+             trans->u.class = p1->class;
+           if (p1->neg_classes != NULL)
+             {
+               for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
+               trans->neg_classes =
+                 xmalloc(sizeof(*trans->neg_classes) * (i + 1));
+               if (trans->neg_classes == NULL)
+                 return REG_ESPACE;
+               for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
+                 trans->neg_classes[i] = p1->neg_classes[i];
+               trans->neg_classes[i] = (tre_ctype_t)0;
+             }
+           else
+             trans->neg_classes = NULL;
+
+           /* Find out how many tags this transition has. */
+           i = 0;
+           if (p1->tags != NULL)
+             while(p1->tags[i] >= 0)
+               i++;
+           j = 0;
+           if (p2->tags != NULL)
+             while(p2->tags[j] >= 0)
+               j++;
+
+           /* If we are overwriting a transition, free the old tag array. */
+           if (trans->tags != NULL)
+             xfree(trans->tags);
+           trans->tags = NULL;
+
+           /* If there were any tags, allocate an array and fill it. */
+           if (i + j > 0)
+             {
+               trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
+               if (!trans->tags)
+                 return REG_ESPACE;
+               i = 0;
+               if (p1->tags != NULL)
+                 while(p1->tags[i] >= 0)
+                   {
+                     trans->tags[i] = p1->tags[i];
+                     i++;
+                   }
+               l = i;
+               j = 0;
+               if (p2->tags != NULL)
+                 while (p2->tags[j] >= 0)
+                   {
+                     /* Don't add duplicates. */
+                     dup = 0;
+                     for (k = 0; k < i; k++)
+                       if (trans->tags[k] == p2->tags[j])
+                         {
+                           dup = 1;
+                           break;
+                         }
+                     if (!dup)
+                       trans->tags[l++] = p2->tags[j];
+                     j++;
+                   }
+               trans->tags[l] = -1;
+             }
+
+           /* Set the parameter array.  If both `p2' and `p1' have same
+              parameters, the values in `p2' override those in `p1'. */
+           if (p1->params || p2->params)
+             {
+               if (!trans->params)
+                 trans->params = xmalloc(sizeof(*trans->params)
+                                         * TRE_PARAM_LAST);
+               if (!trans->params)
+                 return REG_ESPACE;
+               for (i = 0; i < TRE_PARAM_LAST; i++)
+                 {
+                   trans->params[i] = TRE_PARAM_UNSET;
+                   if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
+                     trans->params[i] = p1->params[i];
+                   if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
+                     trans->params[i] = p2->params[i];
+                 }
+             }
+           else
+             {
+               if (trans->params)
+                 xfree(trans->params);
+               trans->params = NULL;
+             }
+
+
+#ifdef TRE_DEBUG
+           {
+             int *tags;
+
+             DPRINT(("  %2d -> %2d on %3d", p1->position, p2->position,
+                     p1->code_min));
+             if (p1->code_max != p1->code_min)
+               DPRINT(("-%3d", p1->code_max));
+             tags = trans->tags;
+             if (tags)
+               {
+                 DPRINT((", tags ["));
+                 while (*tags >= 0)
+                   {
+                     DPRINT(("%d", *tags));
+                     tags++;
+                     if (*tags >= 0)
+                       DPRINT((","));
+                   }
+                 DPRINT(("]"));
+               }
+             if (trans->assertions)
+               DPRINT((", assert %d", trans->assertions));
+             if (trans->assertions & ASSERT_BACKREF)
+               DPRINT((", backref %d", trans->u.backref));
+             else if (trans->u.class)
+               DPRINT((", class %ld", (long)trans->u.class));
+             if (trans->neg_classes)
+               DPRINT((", neg_classes %p", trans->neg_classes));
+             if (trans->params)
+               {
+                 DPRINT((", "));
+                 tre_print_params(trans->params);
+               }
+             DPRINT(("\n"));
+           }
+#endif /* TRE_DEBUG */
+           p2++;
+         }
+       p1++;
+      }
+  else
+    /* Compute a maximum limit for the number of transitions leaving
+       from each state. */
+    while (p1->position >= 0)
+      {
+       p2 = orig_p2;
+       while (p2->position >= 0)
+         {
+           counts[p1->position]++;
+           p2++;
+         }
+       p1++;
+      }
+  return REG_OK;
+}
+
+/* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
+   labelled with one character range (there are no transitions on empty
+   strings).  The TNFA takes O(n^2) space in the worst case, `n' is size of
+   the regexp. */
+static reg_errcode_t
+tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
+               int *counts, int *offs)
+{
+  tre_union_t *uni;
+  tre_catenation_t *cat;
+  tre_iteration_t *iter;
+  reg_errcode_t errcode = REG_OK;
+
+  /* XXX - recurse using a stack!. */
+  switch (node->type)
+    {
+    case LITERAL:
+      break;
+    case UNION:
+      uni = (tre_union_t *)node->obj;
+      errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
+      if (errcode != REG_OK)
+       return errcode;
+      errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
+      break;
+
+    case CATENATION:
+      cat = (tre_catenation_t *)node->obj;
+      /* Add a transition from each position in cat->left->lastpos
+        to each position in cat->right->firstpos. */
+      errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
+                              transitions, counts, offs);
+      if (errcode != REG_OK)
+       return errcode;
+      errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
+      if (errcode != REG_OK)
+       return errcode;
+      errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
+      break;
+
+    case ITERATION:
+      iter = (tre_iteration_t *)node->obj;
+      assert(iter->max == -1 || iter->max == 1);
+
+      if (iter->max == -1)
+       {
+         assert(iter->min == 0 || iter->min == 1);
+         /* Add a transition from each last position in the iterated
+            expression to each first position. */
+         errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
+                                  transitions, counts, offs);
+         if (errcode != REG_OK)
+           return errcode;
+       }
+      errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
+      break;
+    }
+  return errcode;
+}
+
+
+#define ERROR_EXIT(err)                  \
+  do                             \
+    {                            \
+      errcode = err;             \
+      if (/*CONSTCOND*/1)        \
+       goto error_exit;          \
+    }                            \
+ while (/*CONSTCOND*/0)
+
+
+int
+tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
+{
+  tre_stack_t *stack;
+  tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
+  tre_pos_and_tags_t *p;
+  int *counts = NULL, *offs = NULL;
+  int i, add = 0;
+  tre_tnfa_transition_t *transitions, *initial;
+  tre_tnfa_t *tnfa = NULL;
+  tre_submatch_data_t *submatch_data;
+  tre_tag_direction_t *tag_directions = NULL;
+  reg_errcode_t errcode;
+  tre_mem_t mem;
+
+  /* Parse context. */
+  tre_parse_ctx_t parse_ctx;
+
+  /* Allocate a stack used throughout the compilation process for various
+     purposes. */
+  stack = tre_stack_new(512, 10240, 128);
+  if (!stack)
+    return REG_ESPACE;
+  /* Allocate a fast memory allocator. */
+  mem = tre_mem_new();
+  if (!mem)
+    {
+      tre_stack_destroy(stack);
+      return REG_ESPACE;
+    }
+
+  /* Parse the regexp. */
+  memset(&parse_ctx, 0, sizeof(parse_ctx));
+  parse_ctx.mem = mem;
+  parse_ctx.stack = stack;
+  parse_ctx.re = regex;
+  parse_ctx.len = n;
+  parse_ctx.cflags = cflags;
+  parse_ctx.max_backref = -1;
+  DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
+  errcode = tre_parse(&parse_ctx);
+  if (errcode != REG_OK)
+    ERROR_EXIT(errcode);
+  preg->re_nsub = parse_ctx.submatch_id - 1;
+  tree = parse_ctx.result;
+
+  /* Back references and approximate matching cannot currently be used
+     in the same regexp. */
+  if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
+    ERROR_EXIT(REG_BADPAT);
+
+#ifdef TRE_DEBUG
+  tre_ast_print(tree);
+#endif /* TRE_DEBUG */
+
+  /* Referring to nonexistent subexpressions is illegal. */
+  if (parse_ctx.max_backref > (int)preg->re_nsub)
+    ERROR_EXIT(REG_ESUBREG);
+
+  /* Allocate the TNFA struct. */
+  tnfa = xcalloc(1, sizeof(tre_tnfa_t));
+  if (tnfa == NULL)
+    ERROR_EXIT(REG_ESPACE);
+  tnfa->have_backrefs = parse_ctx.max_backref >= 0;
+  tnfa->have_approx = parse_ctx.have_approx;
+  tnfa->num_submatches = parse_ctx.submatch_id;
+
+  /* Set up tags for submatch addressing.  If REG_NOSUB is set and the
+     regexp does not have back references, this can be skipped. */
+  if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
+    {
+      DPRINT(("tre_compile: setting up tags\n"));
+
+      /* Figure out how many tags we will need. */
+      errcode = tre_add_tags(NULL, stack, tree, tnfa);
+      if (errcode != REG_OK)
+       ERROR_EXIT(errcode);
+#ifdef TRE_DEBUG
+      tre_ast_print(tree);
+#endif /* TRE_DEBUG */
+
+      if (tnfa->num_tags > 0)
+       {
+         tag_directions = xmalloc(sizeof(*tag_directions)
+                                  * (tnfa->num_tags + 1));
+         if (tag_directions == NULL)
+           ERROR_EXIT(REG_ESPACE);
+         tnfa->tag_directions = tag_directions;
+         memset(tag_directions, -1,
+                sizeof(*tag_directions) * (tnfa->num_tags + 1));
+       }
+      tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
+                                  sizeof(tnfa->minimal_tags));
+      if (tnfa->minimal_tags == NULL)
+       ERROR_EXIT(REG_ESPACE);
+
+      submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
+                             sizeof(*submatch_data));
+      if (submatch_data == NULL)
+       ERROR_EXIT(REG_ESPACE);
+      tnfa->submatch_data = submatch_data;
+
+      errcode = tre_add_tags(mem, stack, tree, tnfa);
+      if (errcode != REG_OK)
+       ERROR_EXIT(errcode);
+
+#ifdef TRE_DEBUG
+      for (i = 0; i < parse_ctx.submatch_id; i++)
+       DPRINT(("pmatch[%d] = {t%d, t%d}\n",
+               i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
+      for (i = 0; i < tnfa->num_tags; i++)
+       DPRINT(("t%d is %s\n", i,
+               tag_directions[i] == TRE_TAG_MINIMIZE ?
+               "minimized" : "maximized"));
+#endif /* TRE_DEBUG */
+    }
+
+  /* Expand iteration nodes. */
+  errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
+                          tag_directions, &tnfa->params_depth);
+  if (errcode != REG_OK)
+    ERROR_EXIT(errcode);
+
+  /* Add a dummy node for the final state.
+     XXX - For certain patterns this dummy node can be optimized away,
+          for example "a*" or "ab*".   Figure out a simple way to detect
+          this possibility. */
+  tmp_ast_l = tree;
+  tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
+  if (tmp_ast_r == NULL)
+    ERROR_EXIT(REG_ESPACE);
+
+  tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
+  if (tree == NULL)
+    ERROR_EXIT(REG_ESPACE);
+
+#ifdef TRE_DEBUG
+  tre_ast_print(tree);
+  DPRINT(("Number of states: %d\n", parse_ctx.position));
+#endif /* TRE_DEBUG */
+
+  errcode = tre_compute_nfl(mem, stack, tree);
+  if (errcode != REG_OK)
+    ERROR_EXIT(errcode);
+
+  counts = xmalloc(sizeof(int) * parse_ctx.position);
+  if (counts == NULL)
+    ERROR_EXIT(REG_ESPACE);
+
+  offs = xmalloc(sizeof(int) * parse_ctx.position);
+  if (offs == NULL)
+    ERROR_EXIT(REG_ESPACE);
+
+  for (i = 0; i < parse_ctx.position; i++)
+    counts[i] = 0;
+  tre_ast_to_tnfa(tree, NULL, counts, NULL);
+
+  add = 0;
+  for (i = 0; i < parse_ctx.position; i++)
+    {
+      offs[i] = add;
+      add += counts[i] + 1;
+      counts[i] = 0;
+    }
+  transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
+  if (transitions == NULL)
+    ERROR_EXIT(REG_ESPACE);
+  tnfa->transitions = transitions;
+  tnfa->num_transitions = add;
+
+  DPRINT(("Converting to TNFA:\n"));
+  errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
+  if (errcode != REG_OK)
+    ERROR_EXIT(errcode);
+
+  /* If in eight bit mode, compute a table of characters that can be the
+     first character of a match. */
+  tnfa->first_char = -1;
+  if (TRE_MB_CUR_MAX == 1 && !tmp_ast_l->nullable)
+    {
+      int count = 0;
+      tre_cint_t k;
+      DPRINT(("Characters that can start a match:"));
+      tnfa->firstpos_chars = xcalloc(256, sizeof(char));
+      if (tnfa->firstpos_chars == NULL)
+       ERROR_EXIT(REG_ESPACE);
+      for (p = tree->firstpos; p->position >= 0; p++)
+       {
+         tre_tnfa_transition_t *j = transitions + offs[p->position];
+         while (j->state != NULL)
+           {
+             for (k = j->code_min; k <= j->code_max && k < 256; k++)
+               {
+                 DPRINT((" %d", k));
+                 tnfa->firstpos_chars[k] = 1;
+                 count++;
+               }
+             j++;
+           }
+       }
+      DPRINT(("\n"));
+#define TRE_OPTIMIZE_FIRST_CHAR 1
+#if TRE_OPTIMIZE_FIRST_CHAR
+      if (count == 1)
+       {
+         for (k = 0; k < 256; k++)
+           if (tnfa->firstpos_chars[k])
+             {
+               DPRINT(("first char must be %d\n", k));
+               tnfa->first_char = k;
+               xfree(tnfa->firstpos_chars);
+               tnfa->firstpos_chars = NULL;
+               break;
+             }
+       }
+#endif
+
+    }
+  else
+    tnfa->firstpos_chars = NULL;
+
+
+  p = tree->firstpos;
+  i = 0;
+  while (p->position >= 0)
+    {
+      i++;
+
+#ifdef TRE_DEBUG
+      {
+       int *tags;
+       DPRINT(("initial: %d", p->position));
+       tags = p->tags;
+       if (tags != NULL)
+         {
+           if (*tags >= 0)
+             DPRINT(("/"));
+           while (*tags >= 0)
+             {
+               DPRINT(("%d", *tags));
+               tags++;
+               if (*tags >= 0)
+                 DPRINT((","));
+             }
+         }
+       DPRINT((", assert %d", p->assertions));
+       if (p->params)
+         {
+           DPRINT((", "));
+           tre_print_params(p->params);
+         }
+       DPRINT(("\n"));
+      }
+#endif /* TRE_DEBUG */
+
+      p++;
+    }
+
+  initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
+  if (initial == NULL)
+    ERROR_EXIT(REG_ESPACE);
+  tnfa->initial = initial;
+
+  i = 0;
+  for (p = tree->firstpos; p->position >= 0; p++)
+    {
+      initial[i].state = transitions + offs[p->position];
+      initial[i].state_id = p->position;
+      initial[i].tags = NULL;
+      /* Copy the arrays p->tags, and p->params, they are allocated
+        from a tre_mem object. */
+      if (p->tags)
+       {
+         int j;
+         for (j = 0; p->tags[j] >= 0; j++);
+         initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
+         if (!initial[i].tags)
+           ERROR_EXIT(REG_ESPACE);
+         memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
+       }
+      initial[i].params = NULL;
+      if (p->params)
+       {
+         initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
+         if (!initial[i].params)
+           ERROR_EXIT(REG_ESPACE);
+         memcpy(initial[i].params, p->params,
+                sizeof(*p->params) * TRE_PARAM_LAST);
+       }
+      initial[i].assertions = p->assertions;
+      i++;
+    }
+  initial[i].state = NULL;
+
+  tnfa->num_transitions = add;
+  tnfa->final = transitions + offs[tree->lastpos[0].position];
+  tnfa->num_states = parse_ctx.position;
+  tnfa->cflags = cflags;
+
+  DPRINT(("final state %p\n", (void *)tnfa->final));
+
+  tre_mem_destroy(mem);
+  tre_stack_destroy(stack);
+  xfree(counts);
+  xfree(offs);
+
+  preg->TRE_REGEX_T_FIELD = (void *)tnfa;
+  return REG_OK;
+
+ error_exit:
+  /* Free everything that was allocated and return the error code. */
+  tre_mem_destroy(mem);
+  if (stack != NULL)
+    tre_stack_destroy(stack);
+  if (counts != NULL)
+    xfree(counts);
+  if (offs != NULL)
+    xfree(offs);
+  preg->TRE_REGEX_T_FIELD = (void *)tnfa;
+  tre_free(preg);
+  return errcode;
+}
+
+
+
+
+void
+tre_free(regex_t *preg)
+{
+  tre_tnfa_t *tnfa;
+  unsigned int i;
+  tre_tnfa_transition_t *trans;
+
+  tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+  if (!tnfa)
+    return;
+
+  for (i = 0; i < tnfa->num_transitions; i++)
+    if (tnfa->transitions[i].state)
+      {
+       if (tnfa->transitions[i].tags)
+         xfree(tnfa->transitions[i].tags);
+       if (tnfa->transitions[i].neg_classes)
+         xfree(tnfa->transitions[i].neg_classes);
+       if (tnfa->transitions[i].params)
+         xfree(tnfa->transitions[i].params);
+      }
+  if (tnfa->transitions)
+    xfree(tnfa->transitions);
+
+  if (tnfa->initial)
+    {
+      for (trans = tnfa->initial; trans->state; trans++)
+       {
+         if (trans->tags)
+           xfree(trans->tags);
+         if (trans->params)
+           xfree(trans->params);
+       }
+      xfree(tnfa->initial);
+    }
+
+  if (tnfa->submatch_data)
+    {
+      for (i = 0; i < tnfa->num_submatches; i++)
+       if (tnfa->submatch_data[i].parents)
+         xfree(tnfa->submatch_data[i].parents);
+      xfree(tnfa->submatch_data);
+    }
+
+  if (tnfa->tag_directions)
+    xfree(tnfa->tag_directions);
+  if (tnfa->firstpos_chars)
+    xfree(tnfa->firstpos_chars);
+  if (tnfa->minimal_tags)
+    xfree(tnfa->minimal_tags);
+  xfree(tnfa);
+}
+
+char *
+tre_version(void)
+{
+  static char str[256];
+  char *version;
+
+  if (str[0] == 0)
+    {
+      (void) tre_config(TRE_CONFIG_VERSION, &version);
+      (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
+    }
+  return str;
+}
+
+int
+tre_config(int query, void *result)
+{
+  int *int_result = result;
+  const char **string_result = result;
+
+  switch (query)
+    {
+    case TRE_CONFIG_APPROX:
+#ifdef TRE_APPROX
+      *int_result = 1;
+#else /* !TRE_APPROX */
+      *int_result = 0;
+#endif /* !TRE_APPROX */
+      return REG_OK;
+
+    case TRE_CONFIG_WCHAR:
+#ifdef TRE_WCHAR
+      *int_result = 1;
+#else /* !TRE_WCHAR */
+      *int_result = 0;
+#endif /* !TRE_WCHAR */
+      return REG_OK;
+
+    case TRE_CONFIG_MULTIBYTE:
+#ifdef TRE_MULTIBYTE
+      *int_result = 1;
+#else /* !TRE_MULTIBYTE */
+      *int_result = 0;
+#endif /* !TRE_MULTIBYTE */
+      return REG_OK;
+
+    case TRE_CONFIG_SYSTEM_ABI:
+#ifdef TRE_CONFIG_SYSTEM_ABI
+      *int_result = 1;
+#else /* !TRE_CONFIG_SYSTEM_ABI */
+      *int_result = 0;
+#endif /* !TRE_CONFIG_SYSTEM_ABI */
+      return REG_OK;
+
+    case TRE_CONFIG_VERSION:
+      *string_result = TRE_VERSION;
+      return REG_OK;
+    }
+
+  return REG_NOMATCH;
+}
+
+
+/* EOF */
diff --git a/contrib/tre/lib/tre-compile.h b/contrib/tre/lib/tre-compile.h
new file mode 100644 (file)
index 0000000..51d5ac9
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+  tre-compile.h: Regex compilation definitions
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+
+#ifndef TRE_COMPILE_H
+#define TRE_COMPILE_H 1
+
+typedef struct {
+  int position;
+  int code_min;
+  int code_max;
+  int *tags;
+  int assertions;
+  tre_ctype_t class;
+  tre_ctype_t *neg_classes;
+  int backref;
+  int *params;
+} tre_pos_and_tags_t;
+
+#endif /* TRE_COMPILE_H */
+
+/* EOF */
diff --git a/contrib/tre/lib/tre-internal.h b/contrib/tre/lib/tre-internal.h
new file mode 100644 (file)
index 0000000..dacde8c
--- /dev/null
@@ -0,0 +1,287 @@
+/*
+  tre-internal.h - TRE internal definitions
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+#ifndef TRE_INTERNAL_H
+#define TRE_INTERNAL_H 1
+
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
+
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif /* !HAVE_WCTYPE_H */
+
+#include <ctype.h>
+#include "tre.h"
+
+#ifdef TRE_DEBUG
+#include <stdio.h>
+#define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/0)
+#else /* !TRE_DEBUG */
+#define DPRINT(msg) do { } while(/*CONSTCOND*/0)
+#endif /* !TRE_DEBUG */
+
+#define elementsof(x)  ( sizeof(x) / sizeof(x[0]) )
+
+#ifdef HAVE_MBRTOWC
+#define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps)))
+#else /* !HAVE_MBRTOWC */
+#ifdef HAVE_MBTOWC
+#define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
+#endif /* HAVE_MBTOWC */
+#endif /* !HAVE_MBRTOWC */
+
+#ifdef TRE_MULTIBYTE
+#ifdef HAVE_MBSTATE_T
+#define TRE_MBSTATE
+#endif /* TRE_MULTIBYTE */
+#endif /* HAVE_MBSTATE_T */
+
+/* Define the character types and functions. */
+#ifdef TRE_WCHAR
+
+/* Wide characters. */
+typedef wint_t tre_cint_t;
+#define TRE_CHAR_MAX WCHAR_MAX
+
+#ifdef TRE_MULTIBYTE
+#define TRE_MB_CUR_MAX MB_CUR_MAX
+#else /* !TRE_MULTIBYTE */
+#define TRE_MB_CUR_MAX 1
+#endif /* !TRE_MULTIBYTE */
+
+#define tre_isalnum iswalnum
+#define tre_isalpha iswalpha
+#ifdef HAVE_ISWBLANK
+#define tre_isblank iswblank
+#endif /* HAVE_ISWBLANK */
+#define tre_iscntrl iswcntrl
+#define tre_isdigit iswdigit
+#define tre_isgraph iswgraph
+#define tre_islower iswlower
+#define tre_isprint iswprint
+#define tre_ispunct iswpunct
+#define tre_isspace iswspace
+#define tre_isupper iswupper
+#define tre_isxdigit iswxdigit
+
+#define tre_tolower towlower
+#define tre_toupper towupper
+#define tre_strlen  wcslen
+
+#else /* !TRE_WCHAR */
+
+/* 8 bit characters. */
+typedef short tre_cint_t;
+#define TRE_CHAR_MAX 255
+#define TRE_MB_CUR_MAX 1
+
+#define tre_isalnum isalnum
+#define tre_isalpha isalpha
+#ifdef HAVE_ISASCII
+#define tre_isascii isascii
+#endif /* HAVE_ISASCII */
+#ifdef HAVE_ISBLANK
+#define tre_isblank isblank
+#endif /* HAVE_ISBLANK */
+#define tre_iscntrl iscntrl
+#define tre_isdigit isdigit
+#define tre_isgraph isgraph
+#define tre_islower islower
+#define tre_isprint isprint
+#define tre_ispunct ispunct
+#define tre_isspace isspace
+#define tre_isupper isupper
+#define tre_isxdigit isxdigit
+
+#define tre_tolower(c) (tre_cint_t)(tolower(c))
+#define tre_toupper(c) (tre_cint_t)(toupper(c))
+#define tre_strlen(s)  (strlen((const char*)s))
+
+#endif /* !TRE_WCHAR */
+
+#if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE)
+#define TRE_USE_SYSTEM_WCTYPE 1
+#endif
+
+#ifdef TRE_USE_SYSTEM_WCTYPE
+/* Use system provided iswctype() and wctype(). */
+typedef wctype_t tre_ctype_t;
+#define tre_isctype iswctype
+#define tre_ctype   wctype
+#else /* !TRE_USE_SYSTEM_WCTYPE */
+/* Define our own versions of iswctype() and wctype(). */
+typedef int (*tre_ctype_t)(tre_cint_t);
+#define tre_isctype(c, type) ( (type)(c) )
+tre_ctype_t tre_ctype(const char *name);
+#endif /* !TRE_USE_SYSTEM_WCTYPE */
+
+typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t;
+
+/* Returns number of bytes to add to (char *)ptr to make it
+   properly aligned for the type. */
+#define ALIGN(ptr, type) \
+  ((((long)ptr) % sizeof(type)) \
+   ? (sizeof(type) - (((long)ptr) % sizeof(type))) \
+   : 0)
+
+#undef MAX
+#undef MIN
+#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
+#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
+
+/* Define STRF to the correct printf formatter for strings. */
+#ifdef TRE_WCHAR
+#define STRF "ls"
+#else /* !TRE_WCHAR */
+#define STRF "s"
+#endif /* !TRE_WCHAR */
+
+/* TNFA transition type. A TNFA state is an array of transitions,
+   the terminator is a transition with NULL `state'. */
+typedef struct tnfa_transition tre_tnfa_transition_t;
+
+struct tnfa_transition {
+  /* Range of accepted characters. */
+  tre_cint_t code_min;
+  tre_cint_t code_max;
+  /* Pointer to the destination state. */
+  tre_tnfa_transition_t *state;
+  /* ID number of the destination state. */
+  int state_id;
+  /* -1 terminated array of tags (or NULL). */
+  int *tags;
+  /* Matching parameters settings (or NULL). */
+  int *params;
+  /* Assertion bitmap. */
+  int assertions;
+  /* Assertion parameters. */
+  union {
+    /* Character class assertion. */
+    tre_ctype_t class;
+    /* Back reference assertion. */
+    int backref;
+  } u;
+  /* Negative character class assertions. */
+  tre_ctype_t *neg_classes;
+};
+
+
+/* Assertions. */
+#define ASSERT_AT_BOL            1   /* Beginning of line. */
+#define ASSERT_AT_EOL            2   /* End of line. */
+#define ASSERT_CHAR_CLASS        4   /* Character class in `class'. */
+#define ASSERT_CHAR_CLASS_NEG    8   /* Character classes in `neg_classes'. */
+#define ASSERT_AT_BOW           16   /* Beginning of word. */
+#define ASSERT_AT_EOW           32   /* End of word. */
+#define ASSERT_AT_WB            64   /* Word boundary. */
+#define ASSERT_AT_WB_NEG       128   /* Not a word boundary. */
+#define ASSERT_BACKREF         256   /* A back reference in `backref'. */
+#define ASSERT_LAST            256
+
+/* Tag directions. */
+typedef enum {
+  TRE_TAG_MINIMIZE = 0,
+  TRE_TAG_MAXIMIZE = 1
+} tre_tag_direction_t;
+
+/* Parameters that can be changed dynamically while matching. */
+typedef enum {
+  TRE_PARAM_COST_INS       = 0,
+  TRE_PARAM_COST_DEL       = 1,
+  TRE_PARAM_COST_SUBST     = 2,
+  TRE_PARAM_COST_MAX       = 3,
+  TRE_PARAM_MAX_INS        = 4,
+  TRE_PARAM_MAX_DEL        = 5,
+  TRE_PARAM_MAX_SUBST      = 6,
+  TRE_PARAM_MAX_ERR        = 7,
+  TRE_PARAM_DEPTH          = 8,
+  TRE_PARAM_LAST           = 9
+} tre_param_t;
+
+/* Unset matching parameter */
+#define TRE_PARAM_UNSET -1
+
+/* Signifies the default matching parameter value. */
+#define TRE_PARAM_DEFAULT -2
+
+/* Instructions to compute submatch register values from tag values
+   after a successful match.  */
+struct tre_submatch_data {
+  /* Tag that gives the value for rm_so (submatch start offset). */
+  int so_tag;
+  /* Tag that gives the value for rm_eo (submatch end offset). */
+  int eo_tag;
+  /* List of submatches this submatch is contained in. */
+  int *parents;
+};
+
+typedef struct tre_submatch_data tre_submatch_data_t;
+
+
+/* TNFA definition. */
+typedef struct tnfa tre_tnfa_t;
+
+struct tnfa {
+  tre_tnfa_transition_t *transitions;
+  unsigned int num_transitions;
+  tre_tnfa_transition_t *initial;
+  tre_tnfa_transition_t *final;
+  tre_submatch_data_t *submatch_data;
+  char *firstpos_chars;
+  int first_char;
+  unsigned int num_submatches;
+  tre_tag_direction_t *tag_directions;
+  int *minimal_tags;
+  int num_tags;
+  int num_minimals;
+  int end_tag;
+  int num_states;
+  int cflags;
+  int have_backrefs;
+  int have_approx;
+  int params_depth;
+};
+
+int
+tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags);
+
+void
+tre_free(regex_t *preg);
+
+void
+tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
+               const tre_tnfa_t *tnfa, int *tags, int match_eo);
+
+reg_errcode_t
+tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
+                     tre_str_type_t type, int *match_tags, int eflags,
+                     int *match_end_ofs);
+
+reg_errcode_t
+tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
+                     tre_str_type_t type, int *match_tags, int eflags,
+                     int *match_end_ofs);
+
+reg_errcode_t
+tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
+                      int len, tre_str_type_t type, int *match_tags,
+                      int eflags, int *match_end_ofs);
+
+#ifdef TRE_APPROX
+reg_errcode_t
+tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
+                   tre_str_type_t type, int *match_tags,
+                   regamatch_t *match, regaparams_t params,
+                   int eflags, int *match_end_ofs);
+#endif /* TRE_APPROX */
+
+#endif /* TRE_INTERNAL_H */
+
+/* EOF */
diff --git a/contrib/tre/lib/tre-match-backtrack.c b/contrib/tre/lib/tre-match-backtrack.c
new file mode 100644 (file)
index 0000000..eab13a7
--- /dev/null
@@ -0,0 +1,666 @@
+/*
+  tre-match-backtrack.c - TRE backtracking regex matching engine
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+/*
+  This matcher is for regexps that use back referencing.  Regexp matching
+  with back referencing is an NP-complete problem on the number of back
+  references.  The easiest way to match them is to use a backtracking
+  routine which basically goes through all possible paths in the TNFA
+  and chooses the one which results in the best (leftmost and longest)
+  match.  This can be spectacularly expensive and may run out of stack
+  space, but there really is no better known generic algorithm.         Quoting
+  Henry Spencer from comp.compilers:
+  <URL: http://compilers.iecc.com/comparch/article/93-03-102>
+
+    POSIX.2 REs require longest match, which is really exciting to
+    implement since the obsolete ("basic") variant also includes
+    \<digit>.  I haven't found a better way of tackling this than doing
+    a preliminary match using a DFA (or simulation) on a modified RE
+    that just replicates subREs for \<digit>, and then doing a
+    backtracking match to determine whether the subRE matches were
+    right.  This can be rather slow, but I console myself with the
+    thought that people who use \<digit> deserve very slow execution.
+    (Pun unintentional but very appropriate.)
+
+*/
+
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#ifdef TRE_USE_ALLOCA
+/* AIX requires this to be the first thing in the file.         */
+#ifndef __GNUC__
+# if HAVE_ALLOCA_H
+#  include <alloca.h>
+# else
+#  ifdef _AIX
+ #pragma alloca
+#  else
+#   ifndef alloca /* predefined by HP cc +Olibcalls */
+char *alloca ();
+#   endif
+#  endif
+# endif
+#endif
+#endif /* TRE_USE_ALLOCA */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif /* HAVE_WCTYPE_H */
+#ifndef TRE_WCHAR
+#include <ctype.h>
+#endif /* !TRE_WCHAR */
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif /* HAVE_MALLOC_H */
+
+#include "tre-internal.h"
+#include "tre-mem.h"
+#include "tre-match-utils.h"
+#include "tre.h"
+#include "xmalloc.h"
+
+typedef struct {
+  int pos;
+  const char *str_byte;
+#ifdef TRE_WCHAR
+  const wchar_t *str_wide;
+#endif /* TRE_WCHAR */
+  tre_tnfa_transition_t *state;
+  int state_id;
+  int next_c;
+  int *tags;
+#ifdef TRE_MBSTATE
+  mbstate_t mbstate;
+#endif /* TRE_MBSTATE */
+} tre_backtrack_item_t;
+
+typedef struct tre_backtrack_struct {
+  tre_backtrack_item_t item;
+  struct tre_backtrack_struct *prev;
+  struct tre_backtrack_struct *next;
+} *tre_backtrack_t;
+
+#ifdef TRE_WCHAR
+#define BT_STACK_WIDE_IN(_str_wide)    stack->item.str_wide = (_str_wide)
+#define BT_STACK_WIDE_OUT              (str_wide) = stack->item.str_wide
+#else /* !TRE_WCHAR */
+#define BT_STACK_WIDE_IN(_str_wide)
+#define BT_STACK_WIDE_OUT
+#endif /* !TRE_WCHAR */
+
+#ifdef TRE_MBSTATE
+#define BT_STACK_MBSTATE_IN  stack->item.mbstate = (mbstate)
+#define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate
+#else /* !TRE_MBSTATE */
+#define BT_STACK_MBSTATE_IN
+#define BT_STACK_MBSTATE_OUT
+#endif /* !TRE_MBSTATE */
+
+
+#ifdef TRE_USE_ALLOCA
+#define tre_bt_mem_new           tre_mem_newa
+#define tre_bt_mem_alloc         tre_mem_alloca
+#define tre_bt_mem_destroy(obj)          do { } while (0)
+#else /* !TRE_USE_ALLOCA */
+#define tre_bt_mem_new           tre_mem_new
+#define tre_bt_mem_alloc         tre_mem_alloc
+#define tre_bt_mem_destroy       tre_mem_destroy
+#endif /* !TRE_USE_ALLOCA */
+
+
+#define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \
+  do                                                                         \
+    {                                                                        \
+      int i;                                                                 \
+      if (!stack->next)                                                              \
+       {                                                                     \
+         tre_backtrack_t s;                                                  \
+         s = tre_bt_mem_alloc(mem, sizeof(*s));                              \
+         if (!s)                                                             \
+           {                                                                 \
+             tre_bt_mem_destroy(mem);                                        \
+             if (tags)                                                       \
+               xfree(tags);                                                  \
+             if (pmatch)                                                     \
+               xfree(pmatch);                                                \
+             if (states_seen)                                                \
+               xfree(states_seen);                                           \
+             return REG_ESPACE;                                              \
+           }                                                                 \
+         s->prev = stack;                                                    \
+         s->next = NULL;                                                     \
+         s->item.tags = tre_bt_mem_alloc(mem,                                \
+                                         sizeof(*tags) * tnfa->num_tags);    \
+         if (!s->item.tags)                                                  \
+           {                                                                 \
+             tre_bt_mem_destroy(mem);                                        \
+             if (tags)                                                       \
+               xfree(tags);                                                  \
+             if (pmatch)                                                     \
+               xfree(pmatch);                                                \
+             if (states_seen)                                                \
+               xfree(states_seen);                                           \
+             return REG_ESPACE;                                              \
+           }                                                                 \
+         stack->next = s;                                                    \
+         stack = s;                                                          \
+       }                                                                     \
+      else                                                                   \
+       stack = stack->next;                                                  \
+      stack->item.pos = (_pos);                                                      \
+      stack->item.str_byte = (_str_byte);                                    \
+      BT_STACK_WIDE_IN(_str_wide);                                           \
+      stack->item.state = (_state);                                          \
+      stack->item.state_id = (_state_id);                                    \
+      stack->item.next_c = (_next_c);                                        \
+      for (i = 0; i < tnfa->num_tags; i++)                                   \
+       stack->item.tags[i] = (_tags)[i];                                     \
+      BT_STACK_MBSTATE_IN;                                                   \
+    }                                                                        \
+  while (/*CONSTCOND*/0)
+
+#define BT_STACK_POP()                                                       \
+  do                                                                         \
+    {                                                                        \
+      int i;                                                                 \
+      assert(stack->prev);                                                   \
+      pos = stack->item.pos;                                                 \
+      if (type == STR_USER)                                                   \
+        str_source->rewind(pos + pos_add_next, str_source->context);          \
+      str_byte = stack->item.str_byte;                                       \
+      BT_STACK_WIDE_OUT;                                                     \
+      state = stack->item.state;                                             \
+      next_c = stack->item.next_c;                                           \
+      for (i = 0; i < tnfa->num_tags; i++)                                   \
+       tags[i] = stack->item.tags[i];                                        \
+      BT_STACK_MBSTATE_OUT;                                                  \
+      stack = stack->prev;                                                   \
+    }                                                                        \
+  while (/*CONSTCOND*/0)
+
+#undef MIN
+#define MIN(a, b) ((a) <= (b) ? (a) : (b))
+
+reg_errcode_t
+tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
+                      int len, tre_str_type_t type, int *match_tags,
+                      int eflags, int *match_end_ofs)
+{
+  /* State variables required by GET_NEXT_WCHAR. */
+  tre_char_t prev_c = 0, next_c = 0;
+  const char *str_byte = string;
+  int pos = 0;
+  unsigned int pos_add_next = 1;
+#ifdef TRE_WCHAR
+  const wchar_t *str_wide = string;
+#ifdef TRE_MBSTATE
+  mbstate_t mbstate;
+#endif /* TRE_MBSTATE */
+#endif /* TRE_WCHAR */
+  int reg_notbol = eflags & REG_NOTBOL;
+  int reg_noteol = eflags & REG_NOTEOL;
+  int reg_newline = tnfa->cflags & REG_NEWLINE;
+  int str_user_end = 0;
+
+  /* These are used to remember the necessary values of the above
+     variables to return to the position where the current search
+     started from. */
+  int next_c_start;
+  const char *str_byte_start;
+  int pos_start = -1;
+#ifdef TRE_WCHAR
+  const wchar_t *str_wide_start;
+#endif /* TRE_WCHAR */
+#ifdef TRE_MBSTATE
+  mbstate_t mbstate_start;
+#endif /* TRE_MBSTATE */
+
+  /* End offset of best match so far, or -1 if no match found yet. */
+  int match_eo = -1;
+  /* Tag arrays. */
+  int *next_tags, *tags = NULL;
+  /* Current TNFA state. */
+  tre_tnfa_transition_t *state;
+  int *states_seen = NULL;
+
+  /* Memory allocator to for allocating the backtracking stack. */
+  tre_mem_t mem = tre_bt_mem_new();
+
+  /* The backtracking stack. */
+  tre_backtrack_t stack;
+
+  tre_tnfa_transition_t *trans_i;
+  regmatch_t *pmatch = NULL;
+  int ret;
+
+#ifdef TRE_MBSTATE
+  memset(&mbstate, '\0', sizeof(mbstate));
+#endif /* TRE_MBSTATE */
+
+  if (!mem)
+    return REG_ESPACE;
+  stack = tre_bt_mem_alloc(mem, sizeof(*stack));
+  if (!stack)
+    {
+      ret = REG_ESPACE;
+      goto error_exit;
+    }
+  stack->prev = NULL;
+  stack->next = NULL;
+
+  DPRINT(("tnfa_execute_backtrack, input type %d\n", type));
+  DPRINT(("len = %d\n", len));
+
+#ifdef TRE_USE_ALLOCA
+  tags = alloca(sizeof(*tags) * tnfa->num_tags);
+  pmatch = alloca(sizeof(*pmatch) * tnfa->num_submatches);
+  states_seen = alloca(sizeof(*states_seen) * tnfa->num_states);
+#else /* !TRE_USE_ALLOCA */
+  if (tnfa->num_tags)
+    {
+      tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
+      if (!tags)
+       {
+         ret = REG_ESPACE;
+         goto error_exit;
+       }
+    }
+  if (tnfa->num_submatches)
+    {
+      pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches);
+      if (!pmatch)
+       {
+         ret = REG_ESPACE;
+         goto error_exit;
+       }
+    }
+  if (tnfa->num_states)
+    {
+      states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states);
+      if (!states_seen)
+       {
+         ret = REG_ESPACE;
+         goto error_exit;
+       }
+    }
+#endif /* !TRE_USE_ALLOCA */
+
+ retry:
+  {
+    int i;
+    for (i = 0; i < tnfa->num_tags; i++)
+      {
+       tags[i] = -1;
+       if (match_tags)
+         match_tags[i] = -1;
+      }
+    for (i = 0; i < tnfa->num_states; i++)
+      states_seen[i] = 0;
+  }
+
+  state = NULL;
+  pos = pos_start;
+  if (type == STR_USER)
+    str_source->rewind(pos + pos_add_next, str_source->context);
+  GET_NEXT_WCHAR();
+  pos_start = pos;
+  next_c_start = next_c;
+  str_byte_start = str_byte;
+#ifdef TRE_WCHAR
+  str_wide_start = str_wide;
+#endif /* TRE_WCHAR */
+#ifdef TRE_MBSTATE
+  mbstate_start = mbstate;
+#endif /* TRE_MBSTATE */
+
+  /* Handle initial states. */
+  next_tags = NULL;
+  for (trans_i = tnfa->initial; trans_i->state; trans_i++)
+    {
+      DPRINT(("> init %p, prev_c %lc\n", trans_i->state, (tre_cint_t)prev_c));
+      if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions))
+       {
+         DPRINT(("assert failed\n"));
+         continue;
+       }
+      if (state == NULL)
+       {
+         /* Start from this state. */
+         state = trans_i->state;
+         next_tags = trans_i->tags;
+       }
+      else
+       {
+         /* Backtrack to this state. */
+         DPRINT(("saving state %d for backtracking\n", trans_i->state_id));
+         BT_STACK_PUSH(pos, str_byte, str_wide, trans_i->state,
+                       trans_i->state_id, next_c, tags, mbstate);
+         {
+           int *tmp = trans_i->tags;
+           if (tmp)
+             while (*tmp >= 0)
+               stack->item.tags[*tmp++] = pos;
+         }
+       }
+    }
+
+  if (next_tags)
+    for (; *next_tags >= 0; next_tags++)
+      tags[*next_tags] = pos;
+
+
+  DPRINT(("entering match loop, pos %d, str_byte %p\n", pos, str_byte));
+  DPRINT(("pos:chr/code | state and tags\n"));
+  DPRINT(("-------------+------------------------------------------------\n"));
+
+  if (state == NULL)
+    goto backtrack;
+
+  while (/*CONSTCOND*/1)
+    {
+      tre_tnfa_transition_t *next_state;
+      int empty_br_match;
+
+      DPRINT(("start loop\n"));
+      if (state == tnfa->final)
+       {
+         DPRINT(("  match found, %d %d\n", match_eo, pos));
+         if (match_eo < pos
+             || (match_eo == pos
+                 && match_tags
+                 && tre_tag_order(tnfa->num_tags, tnfa->tag_directions,
+                                  tags, match_tags)))
+           {
+             int i;
+             /* This match wins the previous match. */
+             DPRINT(("  win previous\n"));
+             match_eo = pos;
+             if (match_tags)
+               for (i = 0; i < tnfa->num_tags; i++)
+                 match_tags[i] = tags[i];
+           }
+         /* Our TNFAs never have transitions leaving from the final state,
+            so we jump right to backtracking. */
+         goto backtrack;
+       }
+
+#ifdef TRE_DEBUG
+      DPRINT(("%3d:%2lc/%05d | %p ", pos, (tre_cint_t)next_c, (int)next_c,
+             state));
+      {
+       int i;
+       for (i = 0; i < tnfa->num_tags; i++)
+         DPRINT(("%d%s", tags[i], i < tnfa->num_tags - 1 ? ", " : ""));
+       DPRINT(("\n"));
+      }
+#endif /* TRE_DEBUG */
+
+      /* Go to the next character in the input string. */
+      empty_br_match = 0;
+      trans_i = state;
+      if (trans_i->state && trans_i->assertions & ASSERT_BACKREF)
+       {
+         /* This is a back reference state.  All transitions leaving from
+            this state have the same back reference "assertion".  Instead
+            of reading the next character, we match the back reference. */
+         int so, eo, bt = trans_i->u.backref;
+         int bt_len;
+         int result;
+
+         DPRINT(("  should match back reference %d\n", bt));
+         /* Get the substring we need to match against.  Remember to
+            turn off REG_NOSUB temporarily. */
+         tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & /*LINTED*/!REG_NOSUB,
+                         tnfa, tags, pos);
+         so = pmatch[bt].rm_so;
+         eo = pmatch[bt].rm_eo;
+         bt_len = eo - so;
+
+#ifdef TRE_DEBUG
+         {
+           int slen;
+           if (len < 0)
+             slen = bt_len;
+           else
+             slen = MIN(bt_len, len - pos);
+
+           if (type == STR_BYTE)
+             {
+               DPRINT(("  substring (len %d) is [%d, %d[: '%.*s'\n",
+                       bt_len, so, eo, bt_len, (char*)string + so));
+               DPRINT(("  current string is '%.*s'\n", slen, str_byte - 1));
+             }
+#ifdef TRE_WCHAR
+           else if (type == STR_WIDE)
+             {
+               DPRINT(("  substring (len %d) is [%d, %d[: '%.*" STRF "'\n",
+                       bt_len, so, eo, bt_len, (wchar_t*)string + so));
+               DPRINT(("  current string is '%.*" STRF "'\n",
+                       slen, str_wide - 1));
+             }
+#endif /* TRE_WCHAR */
+         }
+#endif
+
+         if (len < 0)
+           {
+             if (type == STR_USER)
+               result = str_source->compare((unsigned)so, (unsigned)pos,
+                                            (unsigned)bt_len,
+                                            str_source->context);
+#ifdef TRE_WCHAR
+             else if (type == STR_WIDE)
+               result = wcsncmp((const wchar_t*)string + so, str_wide - 1,
+                                (size_t)bt_len);
+#endif /* TRE_WCHAR */
+             else
+               result = strncmp((const char*)string + so, str_byte - 1,
+                                (size_t)bt_len);
+           }
+         else if (len - pos < bt_len)
+           result = 1;
+#ifdef TRE_WCHAR
+         else if (type == STR_WIDE)
+           result = wmemcmp((const wchar_t*)string + so, str_wide - 1,
+                            (size_t)bt_len);
+#endif /* TRE_WCHAR */
+         else
+           result = memcmp((const char*)string + so, str_byte - 1,
+                           (size_t)bt_len);
+
+         if (result == 0)
+           {
+             /* Back reference matched.  Check for infinite loop. */
+             if (bt_len == 0)
+               empty_br_match = 1;
+             if (empty_br_match && states_seen[trans_i->state_id])
+               {
+                 DPRINT(("  avoid loop\n"));
+                 goto backtrack;
+               }
+
+             states_seen[trans_i->state_id] = empty_br_match;
+
+             /* Advance in input string and resync `prev_c', `next_c'
+                and pos. */
+             DPRINT(("  back reference matched\n"));
+             str_byte += bt_len - 1;
+#ifdef TRE_WCHAR
+             str_wide += bt_len - 1;
+#endif /* TRE_WCHAR */
+             pos += bt_len - 1;
+             GET_NEXT_WCHAR();
+             DPRINT(("  pos now %d\n", pos));
+           }
+         else
+           {
+             DPRINT(("  back reference did not match\n"));
+             goto backtrack;
+           }
+       }
+      else
+       {
+         /* Check for end of string. */
+         if (len < 0)
+           {
+             if (type == STR_USER)
+               {
+                 if (str_user_end)
+                   goto backtrack;
+               }
+             else if (next_c == L'\0')
+               goto backtrack;
+           }
+         else
+           {
+             if (pos >= len)
+               goto backtrack;
+           }
+
+         /* Read the next character. */
+         GET_NEXT_WCHAR();
+       }
+
+      next_state = NULL;
+      for (trans_i = state; trans_i->state; trans_i++)
+       {
+         DPRINT(("  transition %d-%d (%c-%c) %d to %d\n",
+                 trans_i->code_min, trans_i->code_max,
+                 trans_i->code_min, trans_i->code_max,
+                 trans_i->assertions, trans_i->state_id));
+         if (trans_i->code_min <= (tre_cint_t)prev_c
+             && trans_i->code_max >= (tre_cint_t)prev_c)
+           {
+             if (trans_i->assertions
+                 && (CHECK_ASSERTIONS(trans_i->assertions)
+                     || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
+               {
+                 DPRINT(("  assertion failed\n"));
+                 continue;
+               }
+
+             if (next_state == NULL)
+               {
+                 /* First matching transition. */
+                 DPRINT(("  Next state is %d\n", trans_i->state_id));
+                 next_state = trans_i->state;
+                 next_tags = trans_i->tags;
+               }
+             else
+               {
+                 /* Second matching transition.  We may need to backtrack here
+                    to take this transition instead of the first one, so we
+                    push this transition in the backtracking stack so we can
+                    jump back here if needed. */
+                 DPRINT(("  saving state %d for backtracking\n",
+                         trans_i->state_id));
+                 BT_STACK_PUSH(pos, str_byte, str_wide, trans_i->state,
+                               trans_i->state_id, next_c, tags, mbstate);
+                 {
+                   int *tmp;
+                   for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++)
+                     stack->item.tags[*tmp] = pos;
+                 }
+#if 0 /* XXX - it's important not to look at all transitions here to keep
+        the stack small! */
+                 break;
+#endif
+               }
+           }
+       }
+
+      if (next_state != NULL)
+       {
+         /* Matching transitions were found.  Take the first one. */
+         state = next_state;
+
+         /* Update the tag values. */
+         if (next_tags)
+           while (*next_tags >= 0)
+             tags[*next_tags++] = pos;
+       }
+      else
+       {
+       backtrack:
+         /* A matching transition was not found.  Try to backtrack. */
+         if (stack->prev)
+           {
+             DPRINT(("  backtracking\n"));
+             if (stack->item.state->assertions && ASSERT_BACKREF)
+               {
+                 DPRINT(("  states_seen[%d] = 0\n",
+                         stack->item.state_id));
+                 states_seen[stack->item.state_id] = 0;
+               }
+
+             BT_STACK_POP();
+           }
+         else if (match_eo < 0)
+           {
+             /* Try starting from a later position in the input string. */
+             /* Check for end of string. */
+             if (len < 0)
+               {
+                 if (next_c == L'\0')
+                   {
+                     DPRINT(("end of string.\n"));
+                     break;
+                   }
+               }
+             else
+               {
+                 if (pos >= len)
+                   {
+                     DPRINT(("end of string.\n"));
+                     break;
+                   }
+               }
+             DPRINT(("restarting from next start position\n"));
+             next_c = next_c_start;
+#ifdef TRE_MBSTATE
+             mbstate = mbstate_start;
+#endif /* TRE_MBSTATE */
+             str_byte = str_byte_start;
+#ifdef TRE_WCHAR
+             str_wide = str_wide_start;
+#endif /* TRE_WCHAR */
+             goto retry;
+           }
+         else
+           {
+             DPRINT(("finished\n"));
+             break;
+           }
+       }
+    }
+
+  ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
+  *match_end_ofs = match_eo;
+
+ error_exit:
+  tre_bt_mem_destroy(mem);
+#ifndef TRE_USE_ALLOCA
+  if (tags)
+    xfree(tags);
+  if (pmatch)
+    xfree(pmatch);
+  if (states_seen)
+    xfree(states_seen);
+#endif /* !TRE_USE_ALLOCA */
+
+  return ret;
+}
diff --git a/contrib/tre/lib/tre-match-parallel.c b/contrib/tre/lib/tre-match-parallel.c
new file mode 100644 (file)
index 0000000..0d6ec17
--- /dev/null
@@ -0,0 +1,501 @@
+/*
+  tre-match-parallel.c - TRE parallel regex matching engine
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+/*
+  This algorithm searches for matches basically by reading characters
+  in the searched string one by one, starting at the beginning.         All
+  matching paths in the TNFA are traversed in parallel.         When two or
+  more paths reach the same state, exactly one is chosen according to
+  tag ordering rules; if returning submatches is not required it does
+  not matter which path is chosen.
+
+  The worst case time required for finding the leftmost and longest
+  match, or determining that there is no match, is always linearly
+  dependent on the length of the text being searched.
+
+  This algorithm cannot handle TNFAs with back referencing nodes.
+  See `tre-match-backtrack.c'.
+*/
+
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#ifdef TRE_USE_ALLOCA
+/* AIX requires this to be the first thing in the file.         */
+#ifndef __GNUC__
+# if HAVE_ALLOCA_H
+#  include <alloca.h>
+# else
+#  ifdef _AIX
+ #pragma alloca
+#  else
+#   ifndef alloca /* predefined by HP cc +Olibcalls */
+char *alloca ();
+#   endif
+#  endif
+# endif
+#endif
+#endif /* TRE_USE_ALLOCA */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif /* HAVE_WCTYPE_H */
+#ifndef TRE_WCHAR
+#include <ctype.h>
+#endif /* !TRE_WCHAR */
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif /* HAVE_MALLOC_H */
+
+#include "tre-internal.h"
+#include "tre-match-utils.h"
+#include "tre.h"
+#include "xmalloc.h"
+
+
+
+typedef struct {
+  tre_tnfa_transition_t *state;
+  int *tags;
+} tre_tnfa_reach_t;
+
+typedef struct {
+  int pos;
+  int **tags;
+} tre_reach_pos_t;
+
+
+#ifdef TRE_DEBUG
+static void
+tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_reach_t *reach, int num_tags)
+{
+  int i;
+
+  while (reach->state != NULL)
+    {
+      DPRINT((" %p", (void *)reach->state));
+      if (num_tags > 0)
+       {
+         DPRINT(("/"));
+         for (i = 0; i < num_tags; i++)
+           {
+             DPRINT(("%d:%d", i, reach->tags[i]));
+             if (i < (num_tags-1))
+               DPRINT((","));
+           }
+       }
+      reach++;
+    }
+  DPRINT(("\n"));
+
+}
+#endif /* TRE_DEBUG */
+
+reg_errcode_t
+tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
+                     tre_str_type_t type, int *match_tags, int eflags,
+                     int *match_end_ofs)
+{
+  /* State variables required by GET_NEXT_WCHAR. */
+  tre_char_t prev_c = 0, next_c = 0;
+  const char *str_byte = string;
+  int pos = -1;
+  unsigned int pos_add_next = 1;
+#ifdef TRE_WCHAR
+  const wchar_t *str_wide = string;
+#ifdef TRE_MBSTATE
+  mbstate_t mbstate;
+#endif /* TRE_MBSTATE */
+#endif /* TRE_WCHAR */
+  int reg_notbol = eflags & REG_NOTBOL;
+  int reg_noteol = eflags & REG_NOTEOL;
+  int reg_newline = tnfa->cflags & REG_NEWLINE;
+  int str_user_end = 0;
+
+  char *buf;
+  tre_tnfa_transition_t *trans_i;
+  tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
+  tre_reach_pos_t *reach_pos;
+  int *tag_i;
+  int num_tags, i;
+
+  int match_eo = -1;      /* end offset of match (-1 if no match found yet) */
+  int new_match = 0;
+  int *tmp_tags = NULL;
+  int *tmp_iptr;
+
+#ifdef TRE_MBSTATE
+  memset(&mbstate, '\0', sizeof(mbstate));
+#endif /* TRE_MBSTATE */
+
+  DPRINT(("tre_tnfa_run_parallel, input type %d\n", type));
+
+  if (!match_tags)
+    num_tags = 0;
+  else
+    num_tags = tnfa->num_tags;
+
+  /* Allocate memory for temporary data required for matching. This needs to
+     be done for every matching operation to be thread safe.  This allocates
+     everything in a single large block from the stack frame using alloca()
+     or with malloc() if alloca is unavailable. */
+  {
+    int tbytes, rbytes, pbytes, xbytes, total_bytes;
+    char *tmp_buf;
+    /* Compute the length of the block we need. */
+    tbytes = sizeof(*tmp_tags) * num_tags;
+    rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
+    pbytes = sizeof(*reach_pos) * tnfa->num_states;
+    xbytes = sizeof(int) * num_tags;
+    total_bytes =
+      (sizeof(long) - 1) * 4 /* for alignment paddings */
+      + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
+
+    /* Allocate the memory. */
+#ifdef TRE_USE_ALLOCA
+    buf = alloca(total_bytes);
+#else /* !TRE_USE_ALLOCA */
+    buf = xmalloc((unsigned)total_bytes);
+#endif /* !TRE_USE_ALLOCA */
+    if (buf == NULL)
+      return REG_ESPACE;
+    memset(buf, 0, (size_t)total_bytes);
+
+    /* Get the various pointers within tmp_buf (properly aligned). */
+    tmp_tags = (void *)buf;
+    tmp_buf = buf + tbytes;
+    tmp_buf += ALIGN(tmp_buf, long);
+    reach_next = (void *)tmp_buf;
+    tmp_buf += rbytes;
+    tmp_buf += ALIGN(tmp_buf, long);
+    reach = (void *)tmp_buf;
+    tmp_buf += rbytes;
+    tmp_buf += ALIGN(tmp_buf, long);
+    reach_pos = (void *)tmp_buf;
+    tmp_buf += pbytes;
+    tmp_buf += ALIGN(tmp_buf, long);
+    for (i = 0; i < tnfa->num_states; i++)
+      {
+       reach[i].tags = (void *)tmp_buf;
+       tmp_buf += xbytes;
+       reach_next[i].tags = (void *)tmp_buf;
+       tmp_buf += xbytes;
+      }
+  }
+
+  for (i = 0; i < tnfa->num_states; i++)
+    reach_pos[i].pos = -1;
+
+  /* If only one character can start a match, find it first. */
+  if (tnfa->first_char >= 0 && type == STR_BYTE && str_byte)
+    {
+      const char *orig_str = str_byte;
+      int first = tnfa->first_char;
+
+      if (len >= 0)
+       str_byte = memchr(orig_str, first, (size_t)len);
+      else
+       str_byte = strchr(orig_str, first);
+      if (str_byte == NULL)
+       {
+#ifndef TRE_USE_ALLOCA
+         if (buf)
+           xfree(buf);
+#endif /* !TRE_USE_ALLOCA */
+         return REG_NOMATCH;
+       }
+      DPRINT(("skipped %lu chars\n", (unsigned long)(str_byte - orig_str)));
+      if (str_byte >= orig_str + 1)
+       prev_c = (unsigned char)*(str_byte - 1);
+      next_c = (unsigned char)*str_byte;
+      pos = str_byte - orig_str;
+      if (len < 0 || pos < len)
+       str_byte++;
+    }
+  else
+    {
+      GET_NEXT_WCHAR();
+      pos = 0;
+    }
+
+#if 0
+  /* Skip over characters that cannot possibly be the first character
+     of a match. */
+  if (tnfa->firstpos_chars != NULL)
+    {
+      char *chars = tnfa->firstpos_chars;
+
+      if (len < 0)
+       {
+         const char *orig_str = str_byte;
+         /* XXX - use strpbrk() and wcspbrk() because they might be
+            optimized for the target architecture.  Try also strcspn()
+            and wcscspn() and compare the speeds. */
+         while (next_c != L'\0' && !chars[next_c])
+           {
+             next_c = *str_byte++;
+           }
+         prev_c = *(str_byte - 2);
+         pos += str_byte - orig_str;
+         DPRINT(("skipped %d chars\n", str_byte - orig_str));
+       }
+      else
+       {
+         while (pos <= len && !chars[next_c])
+           {
+             prev_c = next_c;
+             next_c = (unsigned char)(*str_byte++);
+             pos++;
+           }
+       }
+    }
+#endif
+
+  DPRINT(("length: %d\n", len));
+  DPRINT(("pos:chr/code | states and tags\n"));
+  DPRINT(("-------------+------------------------------------------------\n"));
+
+  reach_next_i = reach_next;
+  while (/*CONSTCOND*/1)
+    {
+      /* If no match found yet, add the initial states to `reach_next'. */
+      if (match_eo < 0)
+       {
+         DPRINT((" init >"));
+         trans_i = tnfa->initial;
+         while (trans_i->state != NULL)
+           {
+             if (reach_pos[trans_i->state_id].pos < pos)
+               {
+                 if (trans_i->assertions
+                     && CHECK_ASSERTIONS(trans_i->assertions))
+                   {
+                     DPRINT(("assertion failed\n"));
+                     trans_i++;
+                     continue;
+                   }
+
+                 DPRINT((" %p", (void *)trans_i->state));
+                 reach_next_i->state = trans_i->state;
+                 for (i = 0; i < num_tags; i++)
+                   reach_next_i->tags[i] = -1;
+                 tag_i = trans_i->tags;
+                 if (tag_i)
+                   while (*tag_i >= 0)
+                     {
+                       if (*tag_i < num_tags)
+                         reach_next_i->tags[*tag_i] = pos;
+                       tag_i++;
+                     }
+                 if (reach_next_i->state == tnfa->final)
+                   {
+                     DPRINT(("  found empty match\n"));
+                     match_eo = pos;
+                     new_match = 1;
+                     for (i = 0; i < num_tags; i++)
+                       match_tags[i] = reach_next_i->tags[i];
+                   }
+                 reach_pos[trans_i->state_id].pos = pos;
+                 reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
+                 reach_next_i++;
+               }
+             trans_i++;
+           }
+         DPRINT(("\n"));
+         reach_next_i->state = NULL;
+       }
+      else
+       {
+         if (num_tags == 0 || reach_next_i == reach_next)
+           /*¬†We have found a match. */
+           break;
+       }
+
+      /* Check for end of string. */
+      if (len < 0)
+       {
+         if (type == STR_USER)
+           {
+             if (str_user_end)
+               break;
+           }
+         else if (next_c == L'\0')
+           break;
+       }
+      else
+       {
+         if (pos >= len)
+           break;
+       }
+
+      GET_NEXT_WCHAR();
+
+#ifdef TRE_DEBUG
+      DPRINT(("%3d:%2lc/%05d |", pos - 1, (tre_cint_t)prev_c, (int)prev_c));
+      tre_print_reach(tnfa, reach_next, num_tags);
+      DPRINT(("%3d:%2lc/%05d |", pos, (tre_cint_t)next_c, (int)next_c));
+      tre_print_reach(tnfa, reach_next, num_tags);
+#endif /* TRE_DEBUG */
+
+      /* Swap `reach' and `reach_next'. */
+      reach_i = reach;
+      reach = reach_next;
+      reach_next = reach_i;
+
+      /* For each state in `reach', weed out states that don't fulfill the
+        minimal matching conditions. */
+      if (tnfa->num_minimals && new_match)
+       {
+         new_match = 0;
+         reach_next_i = reach_next;
+         for (reach_i = reach; reach_i->state; reach_i++)
+           {
+             int skip = 0;
+             for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2)
+               {
+                 int end = tnfa->minimal_tags[i];
+                 int start = tnfa->minimal_tags[i + 1];
+                 DPRINT(("  Minimal start %d, end %d\n", start, end));
+                 if (end >= num_tags)
+                   {
+                     DPRINT(("  Throwing %p out.\n", reach_i->state));
+                     skip = 1;
+                     break;
+                   }
+                 else if (reach_i->tags[start] == match_tags[start]
+                          && reach_i->tags[end] < match_tags[end])
+                   {
+                     DPRINT(("  Throwing %p out because t%d < %d\n",
+                             reach_i->state, end, match_tags[end]));
+                     skip = 1;
+                     break;
+                   }
+               }
+             if (!skip)
+               {
+                 reach_next_i->state = reach_i->state;
+                 tmp_iptr = reach_next_i->tags;
+                 reach_next_i->tags = reach_i->tags;
+                 reach_i->tags = tmp_iptr;
+                 reach_next_i++;
+               }
+           }
+         reach_next_i->state = NULL;
+
+         /* Swap `reach' and `reach_next'. */
+         reach_i = reach;
+         reach = reach_next;
+         reach_next = reach_i;
+       }
+
+      /* For each state in `reach' see if there is a transition leaving with
+        the current input symbol to a state not yet in `reach_next', and
+        add the destination states to `reach_next'. */
+      reach_next_i = reach_next;
+      for (reach_i = reach; reach_i->state; reach_i++)
+       {
+         for (trans_i = reach_i->state; trans_i->state; trans_i++)
+           {
+             /* Does this transition match the input symbol? */
+             if (trans_i->code_min <= (tre_cint_t)prev_c &&
+                 trans_i->code_max >= (tre_cint_t)prev_c)
+               {
+                 if (trans_i->assertions
+                     && (CHECK_ASSERTIONS(trans_i->assertions)
+                         || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
+                   {
+                     DPRINT(("assertion failed\n"));
+                     continue;
+                   }
+
+                 /* Compute the tags after this transition. */
+                 for (i = 0; i < num_tags; i++)
+                   tmp_tags[i] = reach_i->tags[i];
+                 tag_i = trans_i->tags;
+                 if (tag_i != NULL)
+                   while (*tag_i >= 0)
+                     {
+                       if (*tag_i < num_tags)
+                         tmp_tags[*tag_i] = pos;
+                       tag_i++;
+                     }
+
+                 if (reach_pos[trans_i->state_id].pos < pos)
+                   {
+                     /* Found an unvisited node. */
+                     reach_next_i->state = trans_i->state;
+                     tmp_iptr = reach_next_i->tags;
+                     reach_next_i->tags = tmp_tags;
+                     tmp_tags = tmp_iptr;
+                     reach_pos[trans_i->state_id].pos = pos;
+                     reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
+
+                     if (reach_next_i->state == tnfa->final
+                         && (match_eo == -1
+                             || (num_tags > 0
+                                 && reach_next_i->tags[0] <= match_tags[0])))
+                       {
+                         DPRINT(("  found match %p\n", trans_i->state));
+                         match_eo = pos;
+                         new_match = 1;
+                         for (i = 0; i < num_tags; i++)
+                           match_tags[i] = reach_next_i->tags[i];
+                       }
+                     reach_next_i++;
+
+                   }
+                 else
+                   {
+                     assert(reach_pos[trans_i->state_id].pos == pos);
+                     /* Another path has also reached this state.  We choose
+                        the winner by examining the tag values for both
+                        paths. */
+                     if (tre_tag_order(num_tags, tnfa->tag_directions,
+                                       tmp_tags,
+                                       *reach_pos[trans_i->state_id].tags))
+                       {
+                         /* The new path wins. */
+                         tmp_iptr = *reach_pos[trans_i->state_id].tags;
+                         *reach_pos[trans_i->state_id].tags = tmp_tags;
+                         if (trans_i->state == tnfa->final)
+                           {
+                             DPRINT(("  found better match\n"));
+                             match_eo = pos;
+                             new_match = 1;
+                             for (i = 0; i < num_tags; i++)
+                               match_tags[i] = tmp_tags[i];
+                           }
+                         tmp_tags = tmp_iptr;
+                       }
+                   }
+               }
+           }
+       }
+      reach_next_i->state = NULL;
+    }
+
+  DPRINT(("match end offset = %d\n", match_eo));
+
+#ifndef TRE_USE_ALLOCA
+  if (buf)
+    xfree(buf);
+#endif /* !TRE_USE_ALLOCA */
+
+  *match_end_ofs = match_eo;
+  return match_eo >= 0 ? REG_OK : REG_NOMATCH;
+}
+
+/* EOF */
diff --git a/contrib/tre/lib/tre-match-utils.h b/contrib/tre/lib/tre-match-utils.h
new file mode 100644 (file)
index 0000000..70db745
--- /dev/null
@@ -0,0 +1,215 @@
+/*
+  tre-match-utils.h - TRE matcher helper definitions
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+#define str_source ((const tre_str_source*)string)
+
+#ifdef TRE_WCHAR
+
+#ifdef TRE_MULTIBYTE
+
+/* Wide character and multibyte support. */
+
+#define GET_NEXT_WCHAR()                                                     \
+  do {                                                                       \
+    prev_c = next_c;                                                         \
+    if (type == STR_BYTE)                                                    \
+      {                                                                              \
+       pos++;                                                                \
+       if (len >= 0 && pos >= len)                                           \
+         next_c = '\0';                                                      \
+       else                                                                  \
+         next_c = (unsigned char)(*str_byte++);                              \
+      }                                                                              \
+    else if (type == STR_WIDE)                                               \
+      {                                                                              \
+       pos++;                                                                \
+       if (len >= 0 && pos >= len)                                           \
+         next_c = L'\0';                                                     \
+       else                                                                  \
+         next_c = *str_wide++;                                               \
+      }                                                                              \
+    else if (type == STR_MBS)                                                \
+      {                                                                              \
+        pos += pos_add_next;                                                 \
+       if (str_byte == NULL)                                                 \
+         next_c = L'\0';                                                     \
+       else                                                                  \
+         {                                                                   \
+           size_t w;                                                         \
+           int max;                                                          \
+           if (len >= 0)                                                     \
+             max = len - pos;                                                \
+           else                                                              \
+             max = 32;                                                       \
+           if (max <= 0)                                                     \
+             {                                                               \
+               next_c = L'\0';                                               \
+               pos_add_next = 1;                                             \
+             }                                                               \
+           else                                                              \
+             {                                                               \
+               w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate);    \
+               if (w == (size_t)-1 || w == (size_t)-2)                       \
+                 return REG_NOMATCH;                                         \
+               if (w == 0 && len >= 0)                                       \
+                 {                                                           \
+                   pos_add_next = 1;                                         \
+                   next_c = 0;                                               \
+                   str_byte++;                                               \
+                 }                                                           \
+               else                                                          \
+                 {                                                           \
+                   pos_add_next = w;                                         \
+                   str_byte += w;                                            \
+                 }                                                           \
+             }                                                               \
+         }                                                                   \
+      }                                                                              \
+    else if (type == STR_USER)                                               \
+      {                                                                              \
+        pos += pos_add_next;                                                 \
+       str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
+                                                 str_source->context);       \
+      }                                                                              \
+  } while(/*CONSTCOND*/0)
+
+#else /* !TRE_MULTIBYTE */
+
+/* Wide character support, no multibyte support. */
+
+#define GET_NEXT_WCHAR()                                                     \
+  do {                                                                       \
+    prev_c = next_c;                                                         \
+    if (type == STR_BYTE)                                                    \
+      {                                                                              \
+       pos++;                                                                \
+       if (len >= 0 && pos >= len)                                           \
+         next_c = '\0';                                                      \
+       else                                                                  \
+         next_c = (unsigned char)(*str_byte++);                              \
+      }                                                                              \
+    else if (type == STR_WIDE)                                               \
+      {                                                                              \
+       pos++;                                                                \
+       if (len >= 0 && pos >= len)                                           \
+         next_c = L'\0';                                                     \
+       else                                                                  \
+         next_c = *str_wide++;                                               \
+      }                                                                              \
+    else if (type == STR_USER)                                               \
+      {                                                                              \
+        pos += pos_add_next;                                                 \
+       str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
+                                                 str_source->context);       \
+      }                                                                              \
+  } while(/*CONSTCOND*/0)
+
+#endif /* !TRE_MULTIBYTE */
+
+#else /* !TRE_WCHAR */
+
+/* No wide character or multibyte support. */
+
+#define GET_NEXT_WCHAR()                                                     \
+  do {                                                                       \
+    prev_c = next_c;                                                         \
+    if (type == STR_BYTE)                                                    \
+      {                                                                              \
+       pos++;                                                                \
+       if (len >= 0 && pos >= len)                                           \
+         next_c = '\0';                                                      \
+       else                                                                  \
+         next_c = (unsigned char)(*str_byte++);                              \
+      }                                                                              \
+    else if (type == STR_USER)                                               \
+      {                                                                              \
+       pos += pos_add_next;                                                  \
+       str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
+                                                str_source->context);        \
+      }                                                                              \
+  } while(/*CONSTCOND*/0)
+
+#endif /* !TRE_WCHAR */
+
+
+
+#define IS_WORD_CHAR(c)         ((c) == L'_' || tre_isalnum(c))
+
+#define CHECK_ASSERTIONS(assertions)                                         \
+  (((assertions & ASSERT_AT_BOL)                                             \
+    && (pos > 0 || reg_notbol)                                               \
+    && (prev_c != L'\n' || !reg_newline))                                    \
+   || ((assertions & ASSERT_AT_EOL)                                          \
+       && (next_c != L'\0' || reg_noteol)                                    \
+       && (next_c != L'\n' || !reg_newline))                                 \
+   || ((assertions & ASSERT_AT_BOW)                                          \
+       && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))                   \
+   || ((assertions & ASSERT_AT_EOW)                                          \
+       && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))                   \
+   || ((assertions & ASSERT_AT_WB)                                           \
+       && (pos != 0 && next_c != L'\0'                                       \
+          && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c)))                  \
+   || ((assertions & ASSERT_AT_WB_NEG)                                       \
+       && (pos == 0 || next_c == L'\0'                                       \
+          || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
+
+#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)                             \
+  (((trans_i->assertions & ASSERT_CHAR_CLASS)                                 \
+       && !(tnfa->cflags & REG_ICASE)                                         \
+       && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class))                 \
+    || ((trans_i->assertions & ASSERT_CHAR_CLASS)                             \
+        && (tnfa->cflags & REG_ICASE)                                         \
+        && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class)     \
+       && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class))    \
+    || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG)                         \
+        && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
+                                      tnfa->cflags & REG_ICASE)))
+
+
+
+
+/* Returns 1 if `t1' wins `t2', 0 otherwise. */
+inline static int
+tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
+             int *t1, int *t2)
+{
+  int i;
+  for (i = 0; i < num_tags; i++)
+    {
+      if (tag_directions[i] == TRE_TAG_MINIMIZE)
+       {
+         if (t1[i] < t2[i])
+           return 1;
+         if (t1[i] > t2[i])
+           return 0;
+       }
+      else
+       {
+         if (t1[i] > t2[i])
+           return 1;
+         if (t1[i] < t2[i])
+           return 0;
+       }
+    }
+  /*  assert(0);*/
+  return 0;
+}
+
+inline static int
+tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
+{
+  DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
+  while (*classes != (tre_ctype_t)0)
+    if ((!icase && tre_isctype(wc, *classes))
+       || (icase && (tre_isctype(tre_toupper(wc), *classes)
+                     || tre_isctype(tre_tolower(wc), *classes))))
+      return 1; /* Match. */
+    else
+      classes++;
+  return 0; /* No match. */
+}
diff --git a/contrib/tre/lib/tre-mem.c b/contrib/tre/lib/tre-mem.c
new file mode 100644 (file)
index 0000000..b5fb276
--- /dev/null
@@ -0,0 +1,155 @@
+/*
+  tre-mem.c - TRE memory allocator
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+/*
+  This memory allocator is for allocating small memory blocks efficiently
+  in terms of memory overhead and execution speed.  The allocated blocks
+  cannot be freed individually, only all at once.  There can be multiple
+  allocators, though.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+#include <stdlib.h>
+#include <string.h>
+
+#include "tre-internal.h"
+#include "tre-mem.h"
+#include "xmalloc.h"
+
+
+/* Returns a new memory allocator or NULL if out of memory. */
+tre_mem_t
+tre_mem_new_impl(int provided, void *provided_block)
+{
+  tre_mem_t mem;
+  if (provided)
+    {
+      mem = provided_block;
+      memset(mem, 0, sizeof(*mem));
+    }
+  else
+    mem = xcalloc(1, sizeof(*mem));
+  if (mem == NULL)
+    return NULL;
+  return mem;
+}
+
+
+/* Frees the memory allocator and all memory allocated with it. */
+void
+tre_mem_destroy(tre_mem_t mem)
+{
+  tre_list_t *tmp, *l = mem->blocks;
+
+  while (l != NULL)
+    {
+      xfree(l->data);
+      tmp = l->next;
+      xfree(l);
+      l = tmp;
+    }
+  xfree(mem);
+}
+
+
+/* Allocates a block of `size' bytes from `mem'.  Returns a pointer to the
+   allocated block or NULL if an underlying malloc() failed. */
+void *
+tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block,
+                  int zero, size_t size)
+{
+  void *ptr;
+
+  if (mem->failed)
+    {
+      DPRINT(("tre_mem_alloc: oops, called after failure?!\n"));
+      return NULL;
+    }
+
+#ifdef MALLOC_DEBUGGING
+  if (!provided)
+    {
+      ptr = xmalloc(1);
+      if (ptr == NULL)
+       {
+         DPRINT(("tre_mem_alloc: xmalloc forced failure\n"));
+         mem->failed = 1;
+         return NULL;
+       }
+      xfree(ptr);
+    }
+#endif /* MALLOC_DEBUGGING */
+
+  if (mem->n < size)
+    {
+      /* We need more memory than is available in the current block.
+        Allocate a new block. */
+      tre_list_t *l;
+      if (provided)
+       {
+         DPRINT(("tre_mem_alloc: using provided block\n"));
+         if (provided_block == NULL)
+           {
+             DPRINT(("tre_mem_alloc: provided block was NULL\n"));
+             mem->failed = 1;
+             return NULL;
+           }
+         mem->ptr = provided_block;
+         mem->n = TRE_MEM_BLOCK_SIZE;
+       }
+      else
+       {
+         int block_size;
+         if (size * 8 > TRE_MEM_BLOCK_SIZE)
+           block_size = size * 8;
+         else
+           block_size = TRE_MEM_BLOCK_SIZE;
+         DPRINT(("tre_mem_alloc: allocating new %d byte block\n",
+                 block_size));
+         l = xmalloc(sizeof(*l));
+         if (l == NULL)
+           {
+             mem->failed = 1;
+             return NULL;
+           }
+         l->data = xmalloc(block_size);
+         if (l->data == NULL)
+           {
+             xfree(l);
+             mem->failed = 1;
+             return NULL;
+           }
+         l->next = NULL;
+         if (mem->current != NULL)
+           mem->current->next = l;
+         if (mem->blocks == NULL)
+           mem->blocks = l;
+         mem->current = l;
+         mem->ptr = l->data;
+         mem->n = block_size;
+       }
+    }
+
+  /* Make sure the next pointer will be aligned. */
+  size += ALIGN(mem->ptr + size, long);
+
+  /* Allocate from current block. */
+  ptr = mem->ptr;
+  mem->ptr += size;
+  mem->n -= size;
+
+  /* Set to zero if needed. */
+  if (zero)
+    memset(ptr, 0, size);
+
+  return ptr;
+}
+
+/* EOF */
diff --git a/contrib/tre/lib/tre-mem.h b/contrib/tre/lib/tre-mem.h
new file mode 100644 (file)
index 0000000..2859404
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+  tre-mem.h - TRE memory allocator interface
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+#ifndef TRE_MEM_H
+#define TRE_MEM_H 1
+
+#include <stdlib.h>
+
+#define TRE_MEM_BLOCK_SIZE 1024
+
+typedef struct tre_list {
+  void *data;
+  struct tre_list *next;
+} tre_list_t;
+
+typedef struct tre_mem_struct {
+  tre_list_t *blocks;
+  tre_list_t *current;
+  char *ptr;
+  size_t n;
+  int failed;
+  void **provided;
+} *tre_mem_t;
+
+
+tre_mem_t tre_mem_new_impl(int provided, void *provided_block);
+void *tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block,
+                        int zero, size_t size);
+
+/* Returns a new memory allocator or NULL if out of memory. */
+#define tre_mem_new()  tre_mem_new_impl(0, NULL)
+
+/* Allocates a block of `size' bytes from `mem'.  Returns a pointer to the
+   allocated block or NULL if an underlying malloc() failed. */
+#define tre_mem_alloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 0, size)
+
+/* Allocates a block of `size' bytes from `mem'.  Returns a pointer to the
+   allocated block or NULL if an underlying malloc() failed.  The memory
+   is set to zero. */
+#define tre_mem_calloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 1, size)
+
+#ifdef TRE_USE_ALLOCA
+/* alloca() versions.  Like above, but memory is allocated with alloca()
+   instead of malloc(). */
+
+#define tre_mem_newa() \
+  tre_mem_new_impl(1, alloca(sizeof(struct tre_mem_struct)))
+
+#define tre_mem_alloca(mem, size)                                            \
+  ((mem)->n >= (size)                                                        \
+   ? tre_mem_alloc_impl((mem), 1, NULL, 0, (size))                           \
+   : tre_mem_alloc_impl((mem), 1, alloca(TRE_MEM_BLOCK_SIZE), 0, (size)))
+#endif /* TRE_USE_ALLOCA */
+
+
+/* Frees the memory allocator and all memory allocated with it. */
+void tre_mem_destroy(tre_mem_t mem);
+
+#endif /* TRE_MEM_H */
+
+/* EOF */
diff --git a/contrib/tre/lib/tre-parse.c b/contrib/tre/lib/tre-parse.c
new file mode 100644 (file)
index 0000000..9282725
--- /dev/null
@@ -0,0 +1,1739 @@
+/*
+  tre-parse.c - Regexp parser
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+/*
+  This parser is just a simple recursive descent parser for POSIX.2
+  regexps.  The parser supports both the obsolete default syntax and
+  the "extended" syntax, and some nonstandard extensions.
+*/
+
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+
+#include "xmalloc.h"
+#include "tre-mem.h"
+#include "tre-ast.h"
+#include "tre-stack.h"
+#include "tre-parse.h"
+
+
+/* Characters with special meanings in regexp syntax. */
+#define CHAR_PIPE         L'|'
+#define CHAR_LPAREN       L'('
+#define CHAR_RPAREN       L')'
+#define CHAR_LBRACE       L'{'
+#define CHAR_RBRACE       L'}'
+#define CHAR_LBRACKET     L'['
+#define CHAR_RBRACKET     L']'
+#define CHAR_MINUS        L'-'
+#define CHAR_STAR         L'*'
+#define CHAR_QUESTIONMARK  L'?'
+#define CHAR_PLUS         L'+'
+#define CHAR_PERIOD       L'.'
+#define CHAR_COLON        L':'
+#define CHAR_EQUAL        L'='
+#define CHAR_COMMA        L','
+#define CHAR_CARET        L'^'
+#define CHAR_DOLLAR       L'$'
+#define CHAR_BACKSLASH    L'\\'
+#define CHAR_HASH         L'#'
+#define CHAR_TILDE        L'~'
+
+
+/* Some macros for expanding \w, \s, etc. */
+static const struct tre_macro_struct {
+  const char c;
+  const char *expansion;
+} tre_macros[] =
+  { {'t', "\t"},          {'n', "\n"},            {'r', "\r"},
+    {'f', "\f"},          {'a', "\a"},            {'e', "\033"},
+    {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
+    {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"},   {'D', "[^[:digit:]]"},
+    { 0, NULL }
+  };
+
+
+/* Expands a macro delimited by `regex' and `regex_end' to `buf', which
+   must have at least `len' items.  Sets buf[0] to zero if the there
+   is no match in `tre_macros'. */
+static void
+tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end,
+                tre_char_t *buf, size_t buf_len)
+{
+  int i;
+
+  buf[0] = 0;
+  if (regex >= regex_end)
+    return;
+
+  for (i = 0; tre_macros[i].expansion; i++)
+    {
+      if (tre_macros[i].c == *regex)
+       {
+         unsigned int j;
+         DPRINT(("Expanding macro '%c' => '%s'\n",
+                 tre_macros[i].c, tre_macros[i].expansion));
+         for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)
+           buf[j] = tre_macros[i].expansion[j];
+         buf[j] = 0;
+         break;
+       }
+    }
+}
+
+static reg_errcode_t
+tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i,
+        tre_ast_node_t ***items)
+{
+  reg_errcode_t status;
+  tre_ast_node_t **array = *items;
+  /* Allocate more space if necessary. */
+  if (*i >= *max_i)
+    {
+      tre_ast_node_t **new_items;
+      DPRINT(("out of array space, i = %d\n", *i));
+      /* If the array is already 1024 items large, give up -- there's
+        probably an error in the regexp (e.g. not a '\0' terminated
+        string and missing ']') */
+      if (*max_i > 1024)
+       return REG_ESPACE;
+      *max_i *= 2;
+      new_items = xrealloc(array, sizeof(*items) * *max_i);
+      if (new_items == NULL)
+       return REG_ESPACE;
+      *items = array = new_items;
+    }
+  array[*i] = tre_ast_new_literal(mem, min, max, -1);
+  status = array[*i] == NULL ? REG_ESPACE : REG_OK;
+  (*i)++;
+  return status;
+}
+
+
+/* Expands a character class to character ranges. */
+static reg_errcode_t
+tre_expand_ctype(tre_mem_t mem, tre_ctype_t class, tre_ast_node_t ***items,
+                int *i, int *max_i, int cflags)
+{
+  reg_errcode_t status = REG_OK;
+  tre_cint_t c;
+  int j, min = -1, max = 0;
+  assert(TRE_MB_CUR_MAX == 1);
+
+  DPRINT(("  expanding class to character ranges\n"));
+  for (j = 0; (j < 256) && (status == REG_OK); j++)
+    {
+      c = j;
+      if (tre_isctype(c, class)
+         || ((cflags & REG_ICASE)
+             && (tre_isctype(tre_tolower(c), class)
+                 || tre_isctype(tre_toupper(c), class))))
+{
+         if (min < 0)
+           min = c;
+         max = c;
+       }
+      else if (min >= 0)
+       {
+         DPRINT(("  range %c (%d) to %c (%d)\n", min, min, max, max));
+         status = tre_new_item(mem, min, max, i, max_i, items);
+         min = -1;
+       }
+    }
+  if (min >= 0 && status == REG_OK)
+    status = tre_new_item(mem, min, max, i, max_i, items);
+  return status;
+}
+
+
+static int
+tre_compare_items(const void *a, const void *b)
+{
+  const tre_ast_node_t *node_a = *(tre_ast_node_t * const *)a;
+  const tre_ast_node_t *node_b = *(tre_ast_node_t * const *)b;
+  tre_literal_t *l_a = node_a->obj, *l_b = node_b->obj;
+  int a_min = l_a->code_min, b_min = l_b->code_min;
+
+  if (a_min < b_min)
+    return -1;
+  else if (a_min > b_min)
+    return 1;
+  else
+    return 0;
+}
+
+#ifndef TRE_USE_SYSTEM_WCTYPE
+
+/* isalnum() and the rest may be macros, so wrap them to functions. */
+int tre_isalnum_func(tre_cint_t c) { return tre_isalnum(c); }
+int tre_isalpha_func(tre_cint_t c) { return tre_isalpha(c); }
+
+#ifdef tre_isascii
+int tre_isascii_func(tre_cint_t c) { return tre_isascii(c); }
+#else /* !tre_isascii */
+int tre_isascii_func(tre_cint_t c) { return !(c >> 7); }
+#endif /* !tre_isascii */
+
+#ifdef tre_isblank
+int tre_isblank_func(tre_cint_t c) { return tre_isblank(c); }
+#else /* !tre_isblank */
+int tre_isblank_func(tre_cint_t c) { return ((c == ' ') || (c == '\t')); }
+#endif /* !tre_isblank */
+
+int tre_iscntrl_func(tre_cint_t c) { return tre_iscntrl(c); }
+int tre_isdigit_func(tre_cint_t c) { return tre_isdigit(c); }
+int tre_isgraph_func(tre_cint_t c) { return tre_isgraph(c); }
+int tre_islower_func(tre_cint_t c) { return tre_islower(c); }
+int tre_isprint_func(tre_cint_t c) { return tre_isprint(c); }
+int tre_ispunct_func(tre_cint_t c) { return tre_ispunct(c); }
+int tre_isspace_func(tre_cint_t c) { return tre_isspace(c); }
+int tre_isupper_func(tre_cint_t c) { return tre_isupper(c); }
+int tre_isxdigit_func(tre_cint_t c) { return tre_isxdigit(c); }
+
+struct {
+  char *name;
+  int (*func)(tre_cint_t);
+} tre_ctype_map[] = {
+  { "alnum", &tre_isalnum_func },
+  { "alpha", &tre_isalpha_func },
+#ifdef tre_isascii
+  { "ascii", &tre_isascii_func },
+#endif /* tre_isascii */
+#ifdef tre_isblank
+  { "blank", &tre_isblank_func },
+#endif /* tre_isblank */
+  { "cntrl", &tre_iscntrl_func },
+  { "digit", &tre_isdigit_func },
+  { "graph", &tre_isgraph_func },
+  { "lower", &tre_islower_func },
+  { "print", &tre_isprint_func },
+  { "punct", &tre_ispunct_func },
+  { "space", &tre_isspace_func },
+  { "upper", &tre_isupper_func },
+  { "xdigit", &tre_isxdigit_func },
+  { NULL, NULL}
+};
+
+tre_ctype_t tre_ctype(const char *name)
+{
+  int i;
+  for (i = 0; tre_ctype_map[i].name != NULL; i++)
+    {
+      if (strcmp(name, tre_ctype_map[i].name) == 0)
+       return tre_ctype_map[i].func;
+    }
+  return (tre_ctype_t)0;
+}
+#endif /* !TRE_USE_SYSTEM_WCTYPE */
+
+/* Maximum number of character classes that can occur in a negated bracket
+   expression. */
+#define MAX_NEG_CLASSES 64
+
+/* Maximum length of character class names. */
+#define MAX_CLASS_NAME
+
+#define REST(re) (int)(ctx->re_end - (re)), (re)
+
+static reg_errcode_t
+tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
+                       tre_ctype_t neg_classes[], int *num_neg_classes,
+                       tre_ast_node_t ***items, int *num_items,
+                       int *items_size)
+{
+  const tre_char_t *re = ctx->re;
+  reg_errcode_t status = REG_OK;
+  tre_ctype_t class = (tre_ctype_t)0;
+  int i = *num_items;
+  int max_i = *items_size;
+  int skip;
+
+  /* Build an array of the items in the bracket expression. */
+  while (status == REG_OK)
+    {
+      skip = 0;
+      if (re == ctx->re_end)
+       {
+         status = REG_EBRACK;
+       }
+      else if (*re == CHAR_RBRACKET && re > ctx->re)
+       {
+         DPRINT(("tre_parse_bracket:   done: '%.*" STRF "'\n", REST(re)));
+         re++;
+         break;
+       }
+      else
+       {
+         tre_cint_t min = 0, max = 0;
+
+         class = (tre_ctype_t)0;
+         if (re + 2 < ctx->re_end
+             && *(re + 1) == CHAR_MINUS && *(re + 2) != CHAR_RBRACKET)
+           {
+             DPRINT(("tre_parse_bracket:  range: '%.*" STRF "'\n", REST(re)));
+             min = *re;
+             max = *(re + 2);
+             re += 3;
+             /* XXX - Should use collation order instead of encoding values
+                in character ranges. */
+             if (min > max)
+               status = REG_ERANGE;
+           }
+         else if (re + 1 < ctx->re_end
+                  && *re == CHAR_LBRACKET && *(re + 1) == CHAR_PERIOD)
+           status = REG_ECOLLATE;
+         else if (re + 1 < ctx->re_end
+                  && *re == CHAR_LBRACKET && *(re + 1) == CHAR_EQUAL)
+           status = REG_ECOLLATE;
+         else if (re + 1 < ctx->re_end
+                  && *re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON)
+           {
+             char tmp_str[64];
+             const tre_char_t *endptr = re + 2;
+             int len;
+             DPRINT(("tre_parse_bracket:  class: '%.*" STRF "'\n", REST(re)));
+             while (endptr < ctx->re_end && *endptr != CHAR_COLON)
+               endptr++;
+             if (endptr != ctx->re_end)
+               {
+                 len = MIN(endptr - re - 2, 63);
+#ifdef TRE_WCHAR
+                 {
+                   tre_char_t tmp_wcs[64];
+                   wcsncpy(tmp_wcs, re + 2, (size_t)len);
+                   tmp_wcs[len] = L'\0';
+#if defined HAVE_WCSRTOMBS
+                   {
+                     mbstate_t state;
+                     const tre_char_t *src = tmp_wcs;
+                     memset(&state, '\0', sizeof(state));
+                     len = wcsrtombs(tmp_str, &src, sizeof(tmp_str), &state);
+                   }
+#elif defined HAVE_WCSTOMBS
+                   len = wcstombs(tmp_str, tmp_wcs, 63);
+#endif /* defined HAVE_WCSTOMBS */
+                 }
+#else /* !TRE_WCHAR */
+                 strncpy(tmp_str, (const char*)re + 2, len);
+#endif /* !TRE_WCHAR */
+                 tmp_str[len] = '\0';
+                 DPRINT(("  class name: %s\n", tmp_str));
+                 class = tre_ctype(tmp_str);
+                 if (!class)
+                   status = REG_ECTYPE;
+                 /* Optimize character classes for 8 bit character sets. */
+                 if (status == REG_OK && TRE_MB_CUR_MAX == 1)
+                   {
+                     status = tre_expand_ctype(ctx->mem, class, items,
+                                               &i, &max_i, ctx->cflags);
+                     class = (tre_ctype_t)0;
+                     skip = 1;
+                   }
+                 re = endptr + 2;
+               }
+             else
+               status = REG_ECTYPE;
+             min = 0;
+             max = TRE_CHAR_MAX;
+           }
+         else
+           {
+             DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
+             if (*re == CHAR_MINUS && *(re + 1) != CHAR_RBRACKET
+                 && ctx->re != re)
+               /* Two ranges are not allowed to share and endpoint. */
+               status = REG_ERANGE;
+             min = max = *re++;
+           }
+
+         if (status != REG_OK)
+           break;
+
+         if (class && negate)
+           if (*num_neg_classes >= MAX_NEG_CLASSES)
+             status = REG_ESPACE;
+           else
+             neg_classes[(*num_neg_classes)++] = class;
+         else if (!skip)
+           {
+             status = tre_new_item(ctx->mem, min, max, &i, &max_i, items);
+             if (status != REG_OK)
+               break;
+             ((tre_literal_t*)((*items)[i-1])->obj)->u.class = class;
+           }
+
+         /* Add opposite-case counterpoints if REG_ICASE is present.
+            This is broken if there are more than two "same" characters. */
+         if (ctx->cflags & REG_ICASE && !class && status == REG_OK && !skip)
+           {
+             tre_cint_t cmin, ccurr;
+
+             DPRINT(("adding opposite-case counterpoints\n"));
+             while (min <= max)
+               {
+                 if (tre_islower(min))
+                   {
+                     cmin = ccurr = tre_toupper(min++);
+                     while (tre_islower(min) && tre_toupper(min) == ccurr + 1
+                            && min <= max)
+                       ccurr = tre_toupper(min++);
+                     status = tre_new_item(ctx->mem, cmin, ccurr,
+                                           &i, &max_i, items);
+                   }
+                 else if (tre_isupper(min))
+                   {
+                     cmin = ccurr = tre_tolower(min++);
+                     while (tre_isupper(min) && tre_tolower(min) == ccurr + 1
+                            && min <= max)
+                       ccurr = tre_tolower(min++);
+                     status = tre_new_item(ctx->mem, cmin, ccurr,
+                                           &i, &max_i, items);
+                   }
+                 else min++;
+                 if (status != REG_OK)
+                   break;
+               }
+             if (status != REG_OK)
+               break;
+           }
+       }
+    }
+  *num_items = i;
+  *items_size = max_i;
+  ctx->re = re;
+  return status;
+}
+
+static reg_errcode_t
+tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
+{
+  tre_ast_node_t *node = NULL;
+  int negate = 0;
+  reg_errcode_t status = REG_OK;
+  tre_ast_node_t **items, *u, *n;
+  int i = 0, j, max_i = 32, curr_max, curr_min;
+  tre_ctype_t neg_classes[MAX_NEG_CLASSES];
+  int num_neg_classes = 0;
+
+  /* Start off with an array of `max_i' elements. */
+  items = xmalloc(sizeof(*items) * max_i);
+  if (items == NULL)
+    return REG_ESPACE;
+
+  if (*ctx->re == CHAR_CARET)
+    {
+      DPRINT(("tre_parse_bracket: negate: '%.*" STRF "'\n", REST(ctx->re)));
+      negate = 1;
+      ctx->re++;
+    }
+
+  status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes,
+                                  &items, &i, &max_i);
+
+  if (status != REG_OK)
+    goto parse_bracket_done;
+
+  /* Sort the array if we need to negate it. */
+  if (negate)
+    qsort(items, (unsigned)i, sizeof(*items), tre_compare_items);
+
+  curr_max = curr_min = 0;
+  /* Build a union of the items in the array, negated if necessary. */
+  for (j = 0; j < i && status == REG_OK; j++)
+    {
+      int min, max;
+      tre_literal_t *l = items[j]->obj;
+      min = l->code_min;
+      max = l->code_max;
+
+      DPRINT(("item: %d - %d, class %ld, curr_max = %d\n",
+             (int)l->code_min, (int)l->code_max, (long)l->u.class, curr_max));
+
+      if (negate)
+       {
+         if (min < curr_max)
+           {
+             /* Overlap. */
+             curr_max = MAX(max + 1, curr_max);
+             DPRINT(("overlap, curr_max = %d\n", curr_max));
+             l = NULL;
+           }
+         else
+           {
+             /* No overlap. */
+             curr_max = min - 1;
+             if (curr_max >= curr_min)
+               {
+                 DPRINT(("no overlap\n"));
+                 l->code_min = curr_min;
+                 l->code_max = curr_max;
+               }
+             else
+               {
+                 DPRINT(("no overlap, zero room\n"));
+                 l = NULL;
+               }
+             curr_min = curr_max = max + 1;
+           }
+       }
+
+      if (l != NULL)
+       {
+         int k;
+         DPRINT(("creating %d - %d\n", (int)l->code_min, (int)l->code_max));
+         l->position = ctx->position;
+         if (num_neg_classes > 0)
+           {
+             l->neg_classes = tre_mem_alloc(ctx->mem,
+                                            (sizeof(l->neg_classes)
+                                             * (num_neg_classes + 1)));
+             if (l->neg_classes == NULL)
+               {
+                 status = REG_ESPACE;
+                 break;
+               }
+             for (k = 0; k < num_neg_classes; k++)
+               l->neg_classes[k] = neg_classes[k];
+             l->neg_classes[k] = (tre_ctype_t)0;
+           }
+         else
+           l->neg_classes = NULL;
+         if (node == NULL)
+           node = items[j];
+         else
+           {
+             u = tre_ast_new_union(ctx->mem, node, items[j]);
+             if (u == NULL)
+               status = REG_ESPACE;
+             node = u;
+           }
+       }
+    }
+
+  if (status != REG_OK)
+    goto parse_bracket_done;
+
+  if (negate)
+    {
+      int k;
+      DPRINT(("final: creating %d - %d\n", curr_min, (int)TRE_CHAR_MAX));
+      n = tre_ast_new_literal(ctx->mem, curr_min, TRE_CHAR_MAX, ctx->position);
+      if (n == NULL)
+       status = REG_ESPACE;
+      else
+       {
+         tre_literal_t *l = n->obj;
+         if (num_neg_classes > 0)
+           {
+             l->neg_classes = tre_mem_alloc(ctx->mem,
+                                            (sizeof(l->neg_classes)
+                                             * (num_neg_classes + 1)));
+             if (l->neg_classes == NULL)
+               {
+                 status = REG_ESPACE;
+                 goto parse_bracket_done;
+               }
+             for (k = 0; k < num_neg_classes; k++)
+               l->neg_classes[k] = neg_classes[k];
+             l->neg_classes[k] = (tre_ctype_t)0;
+           }
+         else
+           l->neg_classes = NULL;
+         if (node == NULL)
+           node = n;
+         else
+           {
+             u = tre_ast_new_union(ctx->mem, node, n);
+             if (u == NULL)
+               status = REG_ESPACE;
+             node = u;
+           }
+       }
+    }
+
+  if (status != REG_OK)
+    goto parse_bracket_done;
+
+#ifdef TRE_DEBUG
+  tre_ast_print(node);
+#endif /* TRE_DEBUG */
+
+ parse_bracket_done:
+  xfree(items);
+  ctx->position++;
+  *result = node;
+  return status;
+}
+
+
+/* Parses a positive decimal integer.  Returns -1 if the string does not
+   contain a valid number. */
+static int
+tre_parse_int(const tre_char_t **regex, const tre_char_t *regex_end)
+{
+  int num = -1;
+  const tre_char_t *r = *regex;
+  while (r < regex_end && *r >= L'0' && *r <= L'9')
+    {
+      if (num < 0)
+       num = 0;
+      num = num * 10 + *r - L'0';
+      r++;
+    }
+  *regex = r;
+  return num;
+}
+
+
+static reg_errcode_t
+tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
+{
+  int min, max, i;
+  int cost_ins, cost_del, cost_subst, cost_max;
+  int limit_ins, limit_del, limit_subst, limit_err;
+  const tre_char_t *r = ctx->re;
+  const tre_char_t *start;
+  int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
+  int approx = 0;
+  int costs_set = 0;
+  int counts_set = 0;
+
+  cost_ins = cost_del = cost_subst = cost_max = TRE_PARAM_UNSET;
+  limit_ins = limit_del = limit_subst = limit_err = TRE_PARAM_UNSET;
+
+  /* Parse number (minimum repetition count). */
+  min = -1;
+  if (r < ctx->re_end && *r >= L'0' && *r <= L'9') {
+    DPRINT(("tre_parse:          min count: '%.*" STRF "'\n", REST(r)));
+    min = tre_parse_int(&r, ctx->re_end);
+  }
+
+  /* Parse comma and second number (maximum repetition count). */
+  max = min;
+  if (r < ctx->re_end && *r == CHAR_COMMA)
+    {
+      r++;
+      DPRINT(("tre_parse:   max count: '%.*" STRF "'\n", REST(r)));
+      max = tre_parse_int(&r, ctx->re_end);
+    }
+
+  /* Check that the repeat counts are sane. */
+  if ((max >= 0 && min > max) || max > RE_DUP_MAX)
+    return REG_BADBR;
+
+
+  /*
+   '{'
+     optionally followed immediately by a number == minimum repcount
+     optionally followed by , then a number == maximum repcount
+      + then a number == maximum insertion count
+      - then a number == maximum deletion count
+      # then a number == maximum substitution count
+      ~ then a number == maximum number of errors
+      Any of +, -, # or ~ without followed by a number means that
+      the maximum count/number of errors is infinite.
+
+      An equation of the form
+       Xi + Yd + Zs < C
+      can be specified to set costs and the cost limit to a value
+      different from the default value:
+       - X is the cost of an insertion
+       - Y is the cost of a deletion
+       - Z is the cost of a substitution
+       - C is the maximum cost
+
+      If no count limit or cost is set for an operation, the operation
+      is not allowed at all.
+  */
+
+
+  do {
+    int done;
+    start = r;
+
+    /* Parse count limit settings */
+    done = 0;
+    if (!counts_set)
+      while (r + 1 < ctx->re_end && !done)
+       {
+         switch (*r)
+           {
+           case CHAR_PLUS:  /* Insert limit */
+             DPRINT(("tre_parse:   ins limit: '%.*" STRF "'\n", REST(r)));
+             r++;
+             limit_ins = tre_parse_int(&r, ctx->re_end);
+             if (limit_ins < 0)
+               limit_ins = INT_MAX;
+             counts_set = 1;
+             break;
+           case CHAR_MINUS: /* Delete limit */
+             DPRINT(("tre_parse:   del limit: '%.*" STRF "'\n", REST(r)));
+             r++;
+             limit_del = tre_parse_int(&r, ctx->re_end);
+             if (limit_del < 0)
+               limit_del = INT_MAX;
+             counts_set = 1;
+             break;
+           case CHAR_HASH:  /* Substitute limit */
+             DPRINT(("tre_parse: subst limit: '%.*" STRF "'\n", REST(r)));
+             r++;
+             limit_subst = tre_parse_int(&r, ctx->re_end);
+             if (limit_subst < 0)
+               limit_subst = INT_MAX;
+             counts_set = 1;
+             break;
+           case CHAR_TILDE: /* Maximum number of changes */
+             DPRINT(("tre_parse: count limit: '%.*" STRF "'\n", REST(r)));
+             r++;
+             limit_err = tre_parse_int(&r, ctx->re_end);
+             if (limit_err < 0)
+               limit_err = INT_MAX;
+             approx = 1;
+             break;
+           case CHAR_COMMA:
+             r++;
+             break;
+           case L' ':
+             r++;
+             break;
+           case L'}':
+             done = 1;
+             break;
+           default:
+             done = 1;
+             break;
+           }
+       }
+
+    /* Parse cost restriction equation. */
+    done = 0;
+    if (!costs_set)
+      while (r + 1 < ctx->re_end && !done)
+       {
+         switch (*r)
+           {
+           case CHAR_PLUS:
+           case L' ':
+             r++;
+             break;
+           case L'<':
+             DPRINT(("tre_parse:    max cost: '%.*" STRF "'\n", REST(r)));
+             r++;
+             while (*r == L' ')
+               r++;
+             cost_max = tre_parse_int(&r, ctx->re_end);
+             if (cost_max < 0)
+               cost_max = INT_MAX;
+             else
+               cost_max--;
+             approx = 1;
+             break;
+           case CHAR_COMMA:
+             r++;
+             done = 1;
+             break;
+           default:
+             if (*r >= L'0' && *r <= L'9')
+               {
+#ifdef TRE_DEBUG
+                 const tre_char_t *sr = r;
+#endif /* TRE_DEBUG */
+                 int cost = tre_parse_int(&r, ctx->re_end);
+                 /* XXX - make sure r is not past end. */
+                 switch (*r)
+                   {
+                   case L'i':  /* Insert cost */
+                     DPRINT(("tre_parse:    ins cost: '%.*" STRF "'\n",
+                             REST(sr)));
+                     r++;
+                     cost_ins = cost;
+                     costs_set = 1;
+                     break;
+                   case L'd':  /* Delete cost */
+                     DPRINT(("tre_parse:    del cost: '%.*" STRF "'\n",
+                             REST(sr)));
+                     r++;
+                     cost_del = cost;
+                     costs_set = 1;
+                     break;
+                   case L's':  /* Substitute cost */
+                     DPRINT(("tre_parse:  subst cost: '%.*" STRF "'\n",
+                             REST(sr)));
+                     r++;
+                     cost_subst = cost;
+                     costs_set = 1;
+                     break;
+                   default:
+                     return REG_BADBR;
+                   }
+               }
+             else
+               {
+                 done = 1;
+                 break;
+               }
+           }
+       }
+  } while (start != r);
+
+  /* Missing }. */
+  if (r >= ctx->re_end)
+    return REG_EBRACE;
+
+  /* Empty contents of {}. */
+  if (r == ctx->re)
+    return REG_BADBR;
+
+  /* Parse the ending '}' or '\}'.*/
+  if (ctx->cflags & REG_EXTENDED)
+    {
+      if (r >= ctx->re_end || *r != CHAR_RBRACE)
+       return REG_BADBR;
+      r++;
+    }
+  else
+    {
+      if (r + 1 >= ctx->re_end
+         || *r != CHAR_BACKSLASH
+         || *(r + 1) != CHAR_RBRACE)
+       return REG_BADBR;
+      r += 2;
+    }
+
+
+  /* Parse trailing '?' marking minimal repetition. */
+  if (r < ctx->re_end)
+    {
+      if (*r == CHAR_QUESTIONMARK)
+       {
+         minimal = !(ctx->cflags & REG_UNGREEDY);
+         r++;
+       }
+      else if (*r == CHAR_STAR || *r == CHAR_PLUS)
+       {
+         /* These are reserved for future extensions. */
+         return REG_BADRPT;
+       }
+    }
+
+  /* Create the AST node(s). */
+  if (min == 0 && max == 0)
+    {
+      *result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+      if (*result == NULL)
+       return REG_ESPACE;
+    }
+  else
+    {
+      if (min < 0 && max < 0)
+       /* Only approximate parameters set, no repetitions. */
+       min = max = 1;
+
+      *result = tre_ast_new_iter(ctx->mem, *result, min, max, minimal);
+      if (!*result)
+       return REG_ESPACE;
+
+      /* If approximate matching parameters are set, add them to the
+        iteration node. */
+      if (approx || costs_set || counts_set)
+       {
+         int *params;
+         tre_iteration_t *iter = (*result)->obj;
+
+         if (costs_set || counts_set)
+           {
+             if (limit_ins == TRE_PARAM_UNSET)
+               {
+                 if (cost_ins == TRE_PARAM_UNSET)
+                   limit_ins = 0;
+                 else
+                   limit_ins = INT_MAX;
+               }
+
+             if (limit_del == TRE_PARAM_UNSET)
+               {
+                 if (cost_del == TRE_PARAM_UNSET)
+                   limit_del = 0;
+                 else
+                   limit_del = INT_MAX;
+               }
+
+             if (limit_subst == TRE_PARAM_UNSET)
+               {
+                 if (cost_subst == TRE_PARAM_UNSET)
+                   limit_subst = 0;
+                 else
+                   limit_subst = INT_MAX;
+               }
+           }
+
+         if (cost_max == TRE_PARAM_UNSET)
+           cost_max = INT_MAX;
+         if (limit_err == TRE_PARAM_UNSET)
+           limit_err = INT_MAX;
+
+         ctx->have_approx = 1;
+         params = tre_mem_alloc(ctx->mem, sizeof(*params) * TRE_PARAM_LAST);
+         if (!params)
+           return REG_ESPACE;
+         for (i = 0; i < TRE_PARAM_LAST; i++)
+           params[i] = TRE_PARAM_UNSET;
+         params[TRE_PARAM_COST_INS] = cost_ins;
+         params[TRE_PARAM_COST_DEL] = cost_del;
+         params[TRE_PARAM_COST_SUBST] = cost_subst;
+         params[TRE_PARAM_COST_MAX] = cost_max;
+         params[TRE_PARAM_MAX_INS] = limit_ins;
+         params[TRE_PARAM_MAX_DEL] = limit_del;
+         params[TRE_PARAM_MAX_SUBST] = limit_subst;
+         params[TRE_PARAM_MAX_ERR] = limit_err;
+         iter->params = params;
+       }
+    }
+
+  DPRINT(("tre_parse_bound: min %d, max %d, costs [%d,%d,%d, total %d], "
+         "limits [%d,%d,%d, total %d]\n",
+         min, max, cost_ins, cost_del, cost_subst, cost_max,
+         limit_ins, limit_del, limit_subst, limit_err));
+
+
+  ctx->re = r;
+  return REG_OK;
+}
+
+typedef enum {
+  PARSE_RE = 0,
+  PARSE_ATOM,
+  PARSE_MARK_FOR_SUBMATCH,
+  PARSE_BRANCH,
+  PARSE_PIECE,
+  PARSE_CATENATION,
+  PARSE_POST_CATENATION,
+  PARSE_UNION,
+  PARSE_POST_UNION,
+  PARSE_POSTFIX,
+  PARSE_RESTORE_CFLAGS
+} tre_parse_re_stack_symbol_t;
+
+
+reg_errcode_t
+tre_parse(tre_parse_ctx_t *ctx)
+{
+  tre_ast_node_t *result = NULL;
+  tre_parse_re_stack_symbol_t symbol;
+  reg_errcode_t status = REG_OK;
+  tre_stack_t *stack = ctx->stack;
+  int bottom = tre_stack_num_objects(stack);
+  int depth = 0;
+  int temporary_cflags = 0;
+
+  DPRINT(("tre_parse: parsing '%.*" STRF "', len = %d\n",
+         ctx->len, ctx->re, ctx->len));
+
+  if (!ctx->nofirstsub)
+    {
+      STACK_PUSH(stack, int, ctx->submatch_id);
+      STACK_PUSH(stack, int, PARSE_MARK_FOR_SUBMATCH);
+      ctx->submatch_id++;
+    }
+  STACK_PUSH(stack, int, PARSE_RE);
+  ctx->re_start = ctx->re;
+  ctx->re_end = ctx->re + ctx->len;
+
+
+  /* The following is basically just a recursive descent parser.  I use
+     an explicit stack instead of recursive functions mostly because of
+     two reasons: compatibility with systems which have an overflowable
+     call stack, and efficiency (both in lines of code and speed).  */
+  while (tre_stack_num_objects(stack) > bottom && status == REG_OK)
+    {
+      if (status != REG_OK)
+       break;
+      symbol = tre_stack_pop_int(stack);
+      switch (symbol)
+       {
+       case PARSE_RE:
+         /* Parse a full regexp.  A regexp is one or more branches,
+            separated by the union operator `|'. */
+#ifdef REG_LITERAL
+         if (!(ctx->cflags & REG_LITERAL)
+             && ctx->cflags & REG_EXTENDED)
+#endif /* REG_LITERAL */
+           STACK_PUSHX(stack, int, PARSE_UNION);
+         STACK_PUSHX(stack, int, PARSE_BRANCH);
+         break;
+
+       case PARSE_BRANCH:
+         /* Parse a branch.  A branch is one or more pieces, concatenated.
+            A piece is an atom possibly followed by a postfix operator. */
+         STACK_PUSHX(stack, int, PARSE_CATENATION);
+         STACK_PUSHX(stack, int, PARSE_PIECE);
+         break;
+
+       case PARSE_PIECE:
+         /* Parse a piece.  A piece is an atom possibly followed by one
+            or more postfix operators. */
+#ifdef REG_LITERAL
+         if (!(ctx->cflags & REG_LITERAL))
+#endif /* REG_LITERAL */
+           STACK_PUSHX(stack, int, PARSE_POSTFIX);
+         STACK_PUSHX(stack, int, PARSE_ATOM);
+         break;
+
+       case PARSE_CATENATION:
+         /* If the expression has not ended, parse another piece. */
+         {
+           tre_char_t c;
+           if (ctx->re >= ctx->re_end)
+             break;
+           c = *ctx->re;
+#ifdef REG_LITERAL
+           if (!(ctx->cflags & REG_LITERAL))
+             {
+#endif /* REG_LITERAL */
+               if (ctx->cflags & REG_EXTENDED && c == CHAR_PIPE)
+                 break;
+               if ((ctx->cflags & REG_EXTENDED
+                    && c == CHAR_RPAREN && depth > 0)
+                   || (!(ctx->cflags & REG_EXTENDED)
+                       && (c == CHAR_BACKSLASH
+                           && *(ctx->re + 1) == CHAR_RPAREN)))
+                 {
+                   if (!(ctx->cflags & REG_EXTENDED) && depth == 0)
+                     status = REG_EPAREN;
+                   DPRINT(("tre_parse:   group end: '%.*" STRF "'\n",
+                           REST(ctx->re)));
+                   depth--;
+                   if (!(ctx->cflags & REG_EXTENDED))
+                     ctx->re += 2;
+                   break;
+                 }
+#ifdef REG_LITERAL
+             }
+#endif /* REG_LITERAL */
+
+#ifdef REG_RIGHT_ASSOC
+           if (ctx->cflags & REG_RIGHT_ASSOC)
+             {
+               /* Right associative concatenation. */
+               STACK_PUSHX(stack, voidptr, result);
+               STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
+               STACK_PUSHX(stack, int, PARSE_CATENATION);
+               STACK_PUSHX(stack, int, PARSE_PIECE);
+             }
+           else
+#endif /* REG_RIGHT_ASSOC */
+             {
+               /* Default case, left associative concatenation. */
+               STACK_PUSHX(stack, int, PARSE_CATENATION);
+               STACK_PUSHX(stack, voidptr, result);
+               STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
+               STACK_PUSHX(stack, int, PARSE_PIECE);
+             }
+           break;
+         }
+
+       case PARSE_POST_CATENATION:
+         {
+           tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
+           tre_ast_node_t *tmp_node;
+           tmp_node = tre_ast_new_catenation(ctx->mem, tree, result);
+           if (!tmp_node)
+             return REG_ESPACE;
+           result = tmp_node;
+           break;
+         }
+
+       case PARSE_UNION:
+         if (ctx->re >= ctx->re_end)
+           break;
+#ifdef REG_LITERAL
+         if (ctx->cflags & REG_LITERAL)
+           break;
+#endif /* REG_LITERAL */
+         switch (*ctx->re)
+           {
+           case CHAR_PIPE:
+             DPRINT(("tre_parse:       union: '%.*" STRF "'\n",
+                     REST(ctx->re)));
+             STACK_PUSHX(stack, int, PARSE_UNION);
+             STACK_PUSHX(stack, voidptr, result);
+             STACK_PUSHX(stack, int, PARSE_POST_UNION);
+             STACK_PUSHX(stack, int, PARSE_BRANCH);
+             ctx->re++;
+             break;
+
+           case CHAR_RPAREN:
+             ctx->re++;
+             break;
+
+           default:
+             break;
+           }
+         break;
+
+       case PARSE_POST_UNION:
+         {
+           tre_ast_node_t *tmp_node;
+           tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
+           tmp_node = tre_ast_new_union(ctx->mem, tree, result);
+           if (!tmp_node)
+             return REG_ESPACE;
+           result = tmp_node;
+           break;
+         }
+
+       case PARSE_POSTFIX:
+         /* Parse postfix operators. */
+         if (ctx->re >= ctx->re_end)
+           break;
+#ifdef REG_LITERAL
+         if (ctx->cflags & REG_LITERAL)
+           break;
+#endif /* REG_LITERAL */
+         switch (*ctx->re)
+           {
+           case CHAR_PLUS:
+           case CHAR_QUESTIONMARK:
+             if (!(ctx->cflags & REG_EXTENDED))
+               break;
+               /*FALLTHROUGH*/
+           case CHAR_STAR:
+             {
+               tre_ast_node_t *tmp_node;
+               int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
+               int rep_min = 0;
+               int rep_max = -1;
+#ifdef TRE_DEBUG
+               const tre_char_t *tmp_re;
+#endif
+
+               if (*ctx->re == CHAR_PLUS)
+                 rep_min = 1;
+               if (*ctx->re == CHAR_QUESTIONMARK)
+                 rep_max = 1;
+#ifdef TRE_DEBUG
+               tmp_re = ctx->re;
+#endif
+
+               if (ctx->re + 1 < ctx->re_end)
+                 {
+                   if (*(ctx->re + 1) == CHAR_QUESTIONMARK)
+                     {
+                       minimal = !(ctx->cflags & REG_UNGREEDY);
+                       ctx->re++;
+                     }
+                   else if (*(ctx->re + 1) == CHAR_STAR
+                            || *(ctx->re + 1) == CHAR_PLUS)
+                     {
+                       /* These are reserved for future extensions. */
+                       return REG_BADRPT;
+                     }
+                 }
+
+               DPRINT(("tre_parse: %s star: '%.*" STRF "'\n",
+                       minimal ? "  minimal" : "greedy", REST(tmp_re)));
+               ctx->re++;
+               tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max,
+                                           minimal);
+               if (tmp_node == NULL)
+                 return REG_ESPACE;
+               result = tmp_node;
+               STACK_PUSHX(stack, int, PARSE_POSTFIX);
+             }
+             break;
+
+           case CHAR_BACKSLASH:
+             /* "\{" is special without REG_EXTENDED */
+             if (!(ctx->cflags & REG_EXTENDED)
+                 && ctx->re + 1 < ctx->re_end
+                 && *(ctx->re + 1) == CHAR_LBRACE)
+               {
+                 ctx->re++;
+                 goto parse_brace;
+               }
+             else
+               break;
+
+           case CHAR_LBRACE:
+             /* "{" is literal without REG_EXTENDED */
+             if (!(ctx->cflags & REG_EXTENDED))
+               break;
+
+           parse_brace:
+             DPRINT(("tre_parse:       bound: '%.*" STRF "'\n",
+                     REST(ctx->re)));
+             ctx->re++;
+
+             status = tre_parse_bound(ctx, &result);
+             if (status != REG_OK)
+               return status;
+             STACK_PUSHX(stack, int, PARSE_POSTFIX);
+             break;
+           }
+         break;
+
+       case PARSE_ATOM:
+         /* Parse an atom.  An atom is a regular expression enclosed in `()',
+            an empty set of `()', a bracket expression, `.', `^', `$',
+            a `\' followed by a character, or a single character. */
+
+         /* End of regexp? (empty string). */
+         if (ctx->re >= ctx->re_end)
+           goto parse_literal;
+
+#ifdef REG_LITERAL
+         if (ctx->cflags & REG_LITERAL)
+           goto parse_literal;
+#endif /* REG_LITERAL */
+
+         switch (*ctx->re)
+           {
+           case CHAR_LPAREN:  /* parenthesized subexpression */
+
+             /* Handle "(?...)" extensions.  They work in a way similar
+                to Perls corresponding extensions. */
+             if (ctx->cflags & REG_EXTENDED
+                 && *(ctx->re + 1) == CHAR_QUESTIONMARK)
+               {
+                 int new_cflags = ctx->cflags;
+                 int bit = 1;
+                 DPRINT(("tre_parse:   extension: '%.*" STRF "\n",
+                         REST(ctx->re)));
+                 ctx->re += 2;
+                 while (/*CONSTCOND*/1)
+                   {
+                     if (*ctx->re == L'i')
+                       {
+                         DPRINT(("tre_parse:       icase: '%.*" STRF "\n",
+                                 REST(ctx->re)));
+                         if (bit)
+                           new_cflags |= REG_ICASE;
+                         else
+                           new_cflags &= ~REG_ICASE;
+                         ctx->re++;
+                       }
+                     else if (*ctx->re == L'n')
+                       {
+                         DPRINT(("tre_parse:     newline: '%.*" STRF "\n",
+                                 REST(ctx->re)));
+                         if (bit)
+                           new_cflags |= REG_NEWLINE;
+                         else
+                           new_cflags &= ~REG_NEWLINE;
+                         ctx->re++;
+                       }
+#ifdef REG_RIGHT_ASSOC
+                     else if (*ctx->re == L'r')
+                       {
+                         DPRINT(("tre_parse: right assoc: '%.*" STRF "\n",
+                                 REST(ctx->re)));
+                         if (bit)
+                           new_cflags |= REG_RIGHT_ASSOC;
+                         else
+                           new_cflags &= ~REG_RIGHT_ASSOC;
+                         ctx->re++;
+                       }
+#endif /* REG_RIGHT_ASSOC */
+#ifdef REG_UNGREEDY
+                     else if (*ctx->re == L'U')
+                       {
+                         DPRINT(("tre_parse:    ungreedy: '%.*" STRF "\n",
+                                 REST(ctx->re)));
+                         if (bit)
+                           new_cflags |= REG_UNGREEDY;
+                         else
+                           new_cflags &= ~REG_UNGREEDY;
+                         ctx->re++;
+                       }
+#endif /* REG_UNGREEDY */
+                     else if (*ctx->re == CHAR_MINUS)
+                       {
+                         DPRINT(("tre_parse:    turn off: '%.*" STRF "\n",
+                                 REST(ctx->re)));
+                         ctx->re++;
+                         bit = 0;
+                       }
+                     else if (*ctx->re == CHAR_COLON)
+                       {
+                         DPRINT(("tre_parse:    no group: '%.*" STRF "\n",
+                                 REST(ctx->re)));
+                         ctx->re++;
+                         depth++;
+                         break;
+                       }
+                     else if (*ctx->re == CHAR_HASH)
+                       {
+                         DPRINT(("tre_parse:    comment: '%.*" STRF "\n",
+                                 REST(ctx->re)));
+                         /* A comment can contain any character except a
+                            right parenthesis */
+                         while (*ctx->re != CHAR_RPAREN
+                                && ctx->re < ctx->re_end)
+                           ctx->re++;
+                         if (*ctx->re == CHAR_RPAREN && ctx->re < ctx->re_end)
+                           {
+                             ctx->re++;
+                             break;
+                           }
+                         else
+                           return REG_BADPAT;
+                       }
+                     else if (*ctx->re == CHAR_RPAREN)
+                       {
+                         ctx->re++;
+                         break;
+                       }
+                     else
+                       return REG_BADPAT;
+                   }
+
+                 /* Turn on the cflags changes for the rest of the
+                    enclosing group. */
+                 STACK_PUSHX(stack, int, ctx->cflags);
+                 STACK_PUSHX(stack, int, PARSE_RESTORE_CFLAGS);
+                 STACK_PUSHX(stack, int, PARSE_RE);
+                 ctx->cflags = new_cflags;
+                 break;
+               }
+
+             if (ctx->cflags & REG_EXTENDED
+                 || (ctx->re > ctx->re_start
+                     && *(ctx->re - 1) == CHAR_BACKSLASH))
+               {
+                 depth++;
+                 if (ctx->re + 2 < ctx->re_end
+                     && *(ctx->re + 1) == CHAR_QUESTIONMARK
+                     && *(ctx->re + 2) == CHAR_COLON)
+                   {
+                     DPRINT(("tre_parse: group begin: '%.*" STRF
+                             "', no submatch\n", REST(ctx->re)));
+                     /* Don't mark for submatching. */
+                     ctx->re += 3;
+                     STACK_PUSHX(stack, int, PARSE_RE);
+                   }
+                 else
+                   {
+                     DPRINT(("tre_parse: group begin: '%.*" STRF
+                             "', submatch %d\n", REST(ctx->re),
+                             ctx->submatch_id));
+                     ctx->re++;
+                     /* First parse a whole RE, then mark the resulting tree
+                        for submatching. */
+                     STACK_PUSHX(stack, int, ctx->submatch_id);
+                     STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH);
+                     STACK_PUSHX(stack, int, PARSE_RE);
+                     ctx->submatch_id++;
+                   }
+               }
+             else
+               goto parse_literal;
+             break;
+
+           case CHAR_RPAREN:  /* end of current subexpression */
+             if ((ctx->cflags & REG_EXTENDED && depth > 0)
+                 || (ctx->re > ctx->re_start
+                     && *(ctx->re - 1) == CHAR_BACKSLASH))
+               {
+                 DPRINT(("tre_parse:       empty: '%.*" STRF "'\n",
+                         REST(ctx->re)));
+                 /* We were expecting an atom, but instead the current
+                    subexpression was closed.  POSIX leaves the meaning of
+                    this to be implementation-defined.  We interpret this as
+                    an empty expression (which matches an empty string).  */
+                 result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+                 if (result == NULL)
+                   return REG_ESPACE;
+                 if (!(ctx->cflags & REG_EXTENDED))
+                   ctx->re--;
+               }
+             else
+               goto parse_literal;
+             break;
+
+           case CHAR_LBRACKET: /* bracket expression */
+             DPRINT(("tre_parse:     bracket: '%.*" STRF "'\n",
+                     REST(ctx->re)));
+             ctx->re++;
+             status = tre_parse_bracket(ctx, &result);
+             if (status != REG_OK)
+               return status;
+             break;
+
+           case CHAR_BACKSLASH:
+             /* If this is "\(" or "\)" chew off the backslash and
+                try again. */
+             if (!(ctx->cflags & REG_EXTENDED)
+                 && ctx->re + 1 < ctx->re_end
+                 && (*(ctx->re + 1) == CHAR_LPAREN
+                     || *(ctx->re + 1) == CHAR_RPAREN))
+               {
+                 ctx->re++;
+                 STACK_PUSHX(stack, int, PARSE_ATOM);
+                 break;
+               }
+
+             /* If a macro is used, parse the expanded macro recursively. */
+             {
+               tre_char_t buf[64];
+               tre_expand_macro(ctx->re + 1, ctx->re_end,
+                                buf, elementsof(buf));
+               if (buf[0] != 0)
+                 {
+                   tre_parse_ctx_t subctx;
+                   memcpy(&subctx, ctx, sizeof(subctx));
+                   subctx.re = buf;
+                   subctx.len = tre_strlen(buf);
+                   subctx.nofirstsub = 1;
+                   status = tre_parse(&subctx);
+                   if (status != REG_OK)
+                     return status;
+                   ctx->re += 2;
+                   ctx->position = subctx.position;
+                   result = subctx.result;
+                   break;
+                 }
+             }
+
+             if (ctx->re + 1 >= ctx->re_end)
+               /* Trailing backslash. */
+               return REG_EESCAPE;
+
+#ifdef REG_LITERAL
+             if (*(ctx->re + 1) == L'Q')
+               {
+                 DPRINT(("tre_parse: tmp literal: '%.*" STRF "'\n",
+                         REST(ctx->re)));
+                 ctx->cflags |= REG_LITERAL;
+                 temporary_cflags |= REG_LITERAL;
+                 ctx->re += 2;
+                 STACK_PUSHX(stack, int, PARSE_ATOM);
+                 break;
+               }
+#endif /* REG_LITERAL */
+
+             DPRINT(("tre_parse:  bleep: '%.*" STRF "'\n", REST(ctx->re)));
+             ctx->re++;
+             switch (*ctx->re)
+               {
+               case L'b':
+                 result = tre_ast_new_literal(ctx->mem, ASSERTION,
+                                              ASSERT_AT_WB, -1);
+                 ctx->re++;
+                 break;
+               case L'B':
+                 result = tre_ast_new_literal(ctx->mem, ASSERTION,
+                                              ASSERT_AT_WB_NEG, -1);
+                 ctx->re++;
+                 break;
+               case L'<':
+                 result = tre_ast_new_literal(ctx->mem, ASSERTION,
+                                              ASSERT_AT_BOW, -1);
+                 ctx->re++;
+                 break;
+               case L'>':
+                 result = tre_ast_new_literal(ctx->mem, ASSERTION,
+                                              ASSERT_AT_EOW, -1);
+                 ctx->re++;
+                 break;
+               case L'x':
+                 ctx->re++;
+                 if (ctx->re[0] != CHAR_LBRACE && ctx->re < ctx->re_end)
+                   {
+                     /* 8 bit hex char. */
+                     char tmp[3] = {0, 0, 0};
+                     long val;
+                     DPRINT(("tre_parse:  8 bit hex: '%.*" STRF "'\n",
+                             REST(ctx->re - 2)));
+
+                     if (tre_isxdigit(ctx->re[0]) && ctx->re < ctx->re_end)
+                       {
+                         tmp[0] = (char)ctx->re[0];
+                         ctx->re++;
+                       }
+                     if (tre_isxdigit(ctx->re[0]) && ctx->re < ctx->re_end)
+                       {
+                         tmp[1] = (char)ctx->re[0];
+                         ctx->re++;
+                       }
+                     val = strtol(tmp, NULL, 16);
+                     result = tre_ast_new_literal(ctx->mem, (int)val,
+                                                  (int)val, ctx->position);
+                     ctx->position++;
+                     break;
+                   }
+                 else if (ctx->re < ctx->re_end)
+                   {
+                     /* Wide char. */
+                     char tmp[32];
+                     long val;
+                     int i = 0;
+                     ctx->re++;
+                     while (ctx->re_end - ctx->re >= 0)
+                       {
+                         if (ctx->re[0] == CHAR_RBRACE)
+                           break;
+                         if (tre_isxdigit(ctx->re[0]))
+                           {
+                             tmp[i] = (char)ctx->re[0];
+                             i++;
+                             ctx->re++;
+                             continue;
+                           }
+                         return REG_EBRACE;
+                       }
+                     ctx->re++;
+                     tmp[i] = 0;
+                     val = strtol(tmp, NULL, 16);
+                     result = tre_ast_new_literal(ctx->mem, (int)val, (int)val,
+                                                  ctx->position);
+                     ctx->position++;
+                     break;
+                   }
+                 /*FALLTHROUGH*/
+
+               default:
+                 if (tre_isdigit(*ctx->re))
+                   {
+                     /* Back reference. */
+                     int val = *ctx->re - L'0';
+                     DPRINT(("tre_parse:     backref: '%.*" STRF "'\n",
+                             REST(ctx->re - 1)));
+                     result = tre_ast_new_literal(ctx->mem, BACKREF, val,
+                                                  ctx->position);
+                     if (result == NULL)
+                       return REG_ESPACE;
+                     ctx->position++;
+                     ctx->max_backref = MAX(val, ctx->max_backref);
+                     ctx->re++;
+                   }
+                 else
+                   {
+                     /* Escaped character. */
+                     DPRINT(("tre_parse:     escaped: '%.*" STRF "'\n",
+                             REST(ctx->re - 1)));
+                     result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
+                                                  ctx->position);
+                     ctx->position++;
+                     ctx->re++;
+                   }
+                 break;
+               }
+             if (result == NULL)
+               return REG_ESPACE;
+             break;
+
+           case CHAR_PERIOD:    /* the any-symbol */
+             DPRINT(("tre_parse:         any: '%.*" STRF "'\n",
+                     REST(ctx->re)));
+             if (ctx->cflags & REG_NEWLINE)
+               {
+                 tre_ast_node_t *tmp1;
+                 tre_ast_node_t *tmp2;
+                 tmp1 = tre_ast_new_literal(ctx->mem, 0, L'\n' - 1,
+                                            ctx->position);
+                 if (!tmp1)
+                   return REG_ESPACE;
+                 tmp2 = tre_ast_new_literal(ctx->mem, L'\n' + 1, TRE_CHAR_MAX,
+                                            ctx->position + 1);
+                 if (!tmp2)
+                   return REG_ESPACE;
+                 result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
+                 if (!result)
+                   return REG_ESPACE;
+                 ctx->position += 2;
+               }
+             else
+               {
+                 result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX,
+                                              ctx->position);
+                 if (!result)
+                   return REG_ESPACE;
+                 ctx->position++;
+               }
+             ctx->re++;
+             break;
+
+           case CHAR_CARET:     /* beginning of line assertion */
+             /* '^' has a special meaning everywhere in EREs, and in the
+                beginning of the RE and after \( is BREs. */
+             if (ctx->cflags & REG_EXTENDED
+                 || (ctx->re - 2 >= ctx->re_start
+                     && *(ctx->re - 2) == CHAR_BACKSLASH
+                     && *(ctx->re - 1) == CHAR_LPAREN)
+                 || ctx->re == ctx->re_start)
+               {
+                 DPRINT(("tre_parse:         BOL: '%.*" STRF "'\n",
+                         REST(ctx->re)));
+                 result = tre_ast_new_literal(ctx->mem, ASSERTION,
+                                              ASSERT_AT_BOL, -1);
+                 if (result == NULL)
+                   return REG_ESPACE;
+                 ctx->re++;
+               }
+             else
+               goto parse_literal;
+             break;
+
+           case CHAR_DOLLAR:    /* end of line assertion. */
+             /* '$' is special everywhere in EREs, and in the end of the
+                string and before \) is BREs. */
+             if (ctx->cflags & REG_EXTENDED
+                 || (ctx->re + 2 < ctx->re_end
+                     && *(ctx->re + 1) == CHAR_BACKSLASH
+                     && *(ctx->re + 2) == CHAR_RPAREN)
+                 || ctx->re + 1 == ctx->re_end)
+               {
+                 DPRINT(("tre_parse:         EOL: '%.*" STRF "'\n",
+                         REST(ctx->re)));
+                 result = tre_ast_new_literal(ctx->mem, ASSERTION,
+                                              ASSERT_AT_EOL, -1);
+                 if (result == NULL)
+                   return REG_ESPACE;
+                 ctx->re++;
+               }
+             else
+               goto parse_literal;
+             break;
+
+           default:
+           parse_literal:
+
+             if (temporary_cflags && ctx->re + 1 < ctx->re_end
+                 && *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == L'E')
+               {
+                 DPRINT(("tre_parse:    end tmps: '%.*" STRF "'\n",
+                         REST(ctx->re)));
+                 ctx->cflags &= ~temporary_cflags;
+                 temporary_cflags = 0;
+                 ctx->re += 2;
+                 STACK_PUSHX(stack, int, PARSE_PIECE);
+                 break;
+               }
+
+
+             /* We are expecting an atom.  If the subexpression (or the whole
+                regexp ends here, we interpret it as an empty expression
+                (which matches an empty string).  */
+             if (
+#ifdef REG_LITERAL
+                 !(ctx->cflags & REG_LITERAL) &&
+#endif /* REG_LITERAL */
+                 (ctx->re >= ctx->re_end
+                  || *ctx->re == CHAR_STAR
+                  || (ctx->cflags & REG_EXTENDED
+                      && (*ctx->re == CHAR_PIPE
+                          || *ctx->re == CHAR_LBRACE
+                          || *ctx->re == CHAR_PLUS
+                          || *ctx->re == CHAR_QUESTIONMARK))
+                  /* Test for "\)" in BRE mode. */
+                  || (!(ctx->cflags & REG_EXTENDED)
+                      && ctx->re + 1 < ctx->re_end
+                      && *ctx->re == CHAR_BACKSLASH
+                      && *(ctx->re + 1) == CHAR_LBRACE)))
+               {
+                 DPRINT(("tre_parse:       empty: '%.*" STRF "'\n",
+                         REST(ctx->re)));
+                 result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+                 if (!result)
+                   return REG_ESPACE;
+                 break;
+               }
+
+             DPRINT(("tre_parse:     literal: '%.*" STRF "'\n",
+                     REST(ctx->re)));
+             /* Note that we can't use an tre_isalpha() test here, since there
+                may be characters which are alphabetic but neither upper or
+                lower case. */
+             if (ctx->cflags & REG_ICASE
+                 && (tre_isupper(*ctx->re) || tre_islower(*ctx->re)))
+               {
+                 tre_ast_node_t *tmp1;
+                 tre_ast_node_t *tmp2;
+
+                 /* XXX - Can there be more than one opposite-case
+                    counterpoints for some character in some locale?  Or
+                    more than two characters which all should be regarded
+                    the same character if case is ignored?  If yes, there
+                    does not seem to be a portable way to detect it.  I guess
+                    that at least for multi-character collating elements there
+                    could be several opposite-case counterpoints, but they
+                    cannot be supported portably anyway. */
+                 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(*ctx->re),
+                                            tre_toupper(*ctx->re),
+                                            ctx->position);
+                 if (!tmp1)
+                   return REG_ESPACE;
+                 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(*ctx->re),
+                                            tre_tolower(*ctx->re),
+                                            ctx->position);
+                 if (!tmp2)
+                   return REG_ESPACE;
+                 result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
+                 if (!result)
+                   return REG_ESPACE;
+               }
+             else
+               {
+                 result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
+                                              ctx->position);
+                 if (!result)
+                   return REG_ESPACE;
+               }
+             ctx->position++;
+             ctx->re++;
+             break;
+           }
+         break;
+
+       case PARSE_MARK_FOR_SUBMATCH:
+         {
+           int submatch_id = tre_stack_pop_int(stack);
+
+           if (result->submatch_id >= 0)
+             {
+               tre_ast_node_t *n, *tmp_node;
+               n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+               if (n == NULL)
+                 return REG_ESPACE;
+               tmp_node = tre_ast_new_catenation(ctx->mem, n, result);
+               if (tmp_node == NULL)
+                 return REG_ESPACE;
+               tmp_node->num_submatches = result->num_submatches;
+               result = tmp_node;
+             }
+           result->submatch_id = submatch_id;
+           result->num_submatches++;
+           break;
+         }
+
+       case PARSE_RESTORE_CFLAGS:
+         ctx->cflags = tre_stack_pop_int(stack);
+         break;
+
+       default:
+         assert(0);
+         break;
+       }
+    }
+
+  /* Check for missing closing parentheses. */
+  if (depth > 0)
+    return REG_EPAREN;
+
+  if (status == REG_OK)
+    ctx->result = result;
+
+  return status;
+}
+
+/* EOF */
diff --git a/contrib/tre/lib/tre-parse.h b/contrib/tre/lib/tre-parse.h
new file mode 100644 (file)
index 0000000..8cfedf1
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+  tre-parse.c - Regexp parser definitions
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+#ifndef TRE_PARSE_H
+#define TRE_PARSE_H 1
+
+/* Parse context. */
+typedef struct {
+  /* Memory allocator. The AST is allocated using this. */
+  tre_mem_t mem;
+  /* Stack used for keeping track of regexp syntax. */
+  tre_stack_t *stack;
+  /* The parse result. */
+  tre_ast_node_t *result;
+  /* The regexp to parse and its length. */
+  const tre_char_t *re;
+  /* The first character of the entire regexp. */
+  const tre_char_t *re_start;
+  /* The first character after the end of the regexp. */
+  const tre_char_t *re_end;
+  int len;
+  /* Current submatch ID. */
+  int submatch_id;
+  /* Current position (number of literal). */
+  int position;
+  /* The highest back reference or -1 if none seen so far. */
+  int max_backref;
+  /* This flag is set if the regexp uses approximate matching. */
+  int have_approx;
+  /* Compilation flags. */
+  int cflags;
+  /* If this flag is set the top-level submatch is not captured. */
+  int nofirstsub;
+  /* The currently set approximate matching parameters. */
+  int params[TRE_PARAM_LAST];
+} tre_parse_ctx_t;
+
+/* Parses a wide character regexp pattern into a syntax tree.  This parser
+   handles both syntaxes (BRE and ERE), including the TRE extensions. */
+reg_errcode_t
+tre_parse(tre_parse_ctx_t *ctx);
+
+#endif /* TRE_PARSE_H */
+
+/* EOF */
diff --git a/contrib/tre/lib/tre-stack.c b/contrib/tre/lib/tre-stack.c
new file mode 100644 (file)
index 0000000..8ad89c8
--- /dev/null
@@ -0,0 +1,125 @@
+/*
+  tre-stack.c - Simple stack implementation
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+#include <stdlib.h>
+#include <assert.h>
+
+#include "tre-internal.h"
+#include "tre-stack.h"
+#include "xmalloc.h"
+
+union tre_stack_item {
+  void *voidptr_value;
+  int int_value;
+};
+
+struct tre_stack_rec {
+  int size;
+  int max_size;
+  int increment;
+  int ptr;
+  union tre_stack_item *stack;
+};
+
+
+tre_stack_t *
+tre_stack_new(int size, int max_size, int increment)
+{
+  tre_stack_t *s;
+
+  s = xmalloc(sizeof(*s));
+  if (s != NULL)
+    {
+      s->stack = xmalloc(sizeof(*s->stack) * size);
+      if (s->stack == NULL)
+       {
+         xfree(s);
+         return NULL;
+       }
+      s->size = size;
+      s->max_size = max_size;
+      s->increment = increment;
+      s->ptr = 0;
+    }
+  return s;
+}
+
+void
+tre_stack_destroy(tre_stack_t *s)
+{
+  xfree(s->stack);
+  xfree(s);
+}
+
+int
+tre_stack_num_objects(tre_stack_t *s)
+{
+  return s->ptr;
+}
+
+static reg_errcode_t
+tre_stack_push(tre_stack_t *s, union tre_stack_item value)
+{
+  if (s->ptr < s->size)
+    {
+      s->stack[s->ptr] = value;
+      s->ptr++;
+    }
+  else
+    {
+      if (s->size >= s->max_size)
+       {
+         DPRINT(("tre_stack_push: stack full\n"));
+         return REG_ESPACE;
+       }
+      else
+       {
+         union tre_stack_item *new_buffer;
+         int new_size;
+         DPRINT(("tre_stack_push: trying to realloc more space\n"));
+         new_size = s->size + s->increment;
+         if (new_size > s->max_size)
+           new_size = s->max_size;
+         new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
+         if (new_buffer == NULL)
+           {
+             DPRINT(("tre_stack_push: realloc failed.\n"));
+             return REG_ESPACE;
+           }
+         DPRINT(("tre_stack_push: realloc succeeded.\n"));
+         assert(new_size > s->size);
+         s->size = new_size;
+         s->stack = new_buffer;
+         tre_stack_push(s, value);
+       }
+    }
+  return REG_OK;
+}
+
+#define define_pushf(typetag, type)  \
+  declare_pushf(typetag, type) {     \
+    union tre_stack_item item;      \
+    item.typetag ## _value = value;  \
+    return tre_stack_push(s, item);  \
+}
+
+define_pushf(int, int)
+define_pushf(voidptr, void *)
+
+#define define_popf(typetag, type)                 \
+  declare_popf(typetag, type) {                            \
+    return s->stack[--s->ptr].typetag ## _value;    \
+  }
+
+define_popf(int, int)
+define_popf(voidptr, void *)
+
+/* EOF */
diff --git a/contrib/tre/lib/tre-stack.h b/contrib/tre/lib/tre-stack.h
new file mode 100644 (file)
index 0000000..7d11c5e
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+  tre-stack.h: Stack definitions
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+
+#ifndef TRE_STACK_H
+#define TRE_STACK_H 1
+
+#include "tre.h"
+
+typedef struct tre_stack_rec tre_stack_t;
+
+/* Creates a new stack object. `size' is initial size in bytes, `max_size'
+   is maximum size, and `increment' specifies how much more space will be
+   allocated with realloc() if all space gets used up. Returns the stack
+   object or NULL if out of memory. */
+tre_stack_t *
+tre_stack_new(int size, int max_size, int increment);
+
+/* Frees the stack object. */
+void
+tre_stack_destroy(tre_stack_t *s);
+
+/* Returns the current number of objects in the stack. */
+int
+tre_stack_num_objects(tre_stack_t *s);
+
+/* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
+   `value' on top of stack `s'.  Returns REG_ESPACE if out of memory.
+   This tries to realloc() more space before failing if maximum size
+   has not yet been reached.  Returns REG_OK if successful. */
+#define declare_pushf(typetag, type)                                         \
+  reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
+
+declare_pushf(voidptr, void *);
+declare_pushf(int, int);
+
+/* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
+   element off of stack `s' and returns it.  The stack must not be
+   empty. */
+#define declare_popf(typetag, type)              \
+  type tre_stack_pop_ ## typetag(tre_stack_t *s)
+
+declare_popf(voidptr, void *);
+declare_popf(int, int);
+
+/* Just to save some typing. */
+#define STACK_PUSH(s, typetag, value)                                        \
+  do                                                                         \
+    {                                                                        \
+      status = tre_stack_push_ ## typetag(s, value);                         \
+    }                                                                        \
+  while (/*CONSTCOND*/0)
+
+#define STACK_PUSHX(s, typetag, value)                                       \
+  {                                                                          \
+    status = tre_stack_push_ ## typetag(s, value);                           \
+    if (status != REG_OK)                                                    \
+      break;                                                                 \
+  }
+
+#define STACK_PUSHR(s, typetag, value)                                       \
+  {                                                                          \
+    reg_errcode_t _status;                                                   \
+    _status = tre_stack_push_ ## typetag(s, value);                          \
+    if (_status != REG_OK)                                                   \
+      return _status;                                                        \
+  }
+
+#endif /* TRE_STACK_H */
+
+/* EOF */
diff --git a/contrib/tre/lib/xmalloc.h b/contrib/tre/lib/xmalloc.h
new file mode 100644 (file)
index 0000000..ce310af
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+  xmalloc.h - Simple malloc debugging library API
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+#ifndef _XMALLOC_H
+#define _XMALLOC_H 1
+
+void *xmalloc_impl(size_t size, const char *file, int line, const char *func);
+void *xcalloc_impl(size_t nmemb, size_t size, const char *file, int line,
+                  const char *func);
+void xfree_impl(void *ptr, const char *file, int line, const char *func);
+void *xrealloc_impl(void *ptr, size_t new_size, const char *file, int line,
+                   const char *func);
+int xmalloc_dump_leaks(void);
+void xmalloc_configure(int fail_after);
+
+
+#ifndef XMALLOC_INTERNAL
+#ifdef MALLOC_DEBUGGING
+
+/* Version 2.4 and later of GCC define a magical variable `__PRETTY_FUNCTION__'
+   which contains the name of the function currently being defined.
+#  define __XMALLOC_FUNCTION    __PRETTY_FUNCTION__
+   This is broken in G++ before version 2.6.
+   C9x has a similar variable called __func__, but prefer the GCC one since
+   it demangles C++ function names.  */
+# ifdef __GNUC__
+#  if __GNUC__ > 2 || (__GNUC__ == 2 \
+                      && __GNUC_MINOR__ >= (defined __cplusplus ? 6 : 4))
+#   define __XMALLOC_FUNCTION   __PRETTY_FUNCTION__
+#  else
+#   define __XMALLOC_FUNCTION   ((const char *) 0)
+#  endif
+# else
+#  if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L
+#   define __XMALLOC_FUNCTION   __func__
+#  else
+#   define __XMALLOC_FUNCTION   ((const char *) 0)
+#  endif
+# endif
+
+#define xmalloc(size) xmalloc_impl(size, __FILE__, __LINE__, \
+                                  __XMALLOC_FUNCTION)
+#define xcalloc(nmemb, size) xcalloc_impl(nmemb, size, __FILE__, __LINE__, \
+                                         __XMALLOC_FUNCTION)
+#define xfree(ptr) xfree_impl(ptr, __FILE__, __LINE__, __XMALLOC_FUNCTION)
+#define xrealloc(ptr, new_size) xrealloc_impl(ptr, new_size, __FILE__, \
+                                             __LINE__, __XMALLOC_FUNCTION)
+#undef malloc
+#undef calloc
+#undef free
+#undef realloc
+
+#define malloc USE_XMALLOC_INSTEAD_OF_MALLOC
+#define calloc USE_XCALLOC_INSTEAD_OF_CALLOC
+#define free   USE_XFREE_INSTEAD_OF_FREE
+#define realloc USE_XREALLOC_INSTEAD_OF_REALLOC
+
+#else /* !MALLOC_DEBUGGING */
+
+#include <stdlib.h>
+
+#define xmalloc(size) malloc(size)
+#define xcalloc(nmemb, size) calloc(nmemb, size)
+#define xfree(ptr) free(ptr)
+#define xrealloc(ptr, new_size) realloc(ptr, new_size)
+
+#endif /* !MALLOC_DEBUGGING */
+#endif /* !XMALLOC_INTERNAL */
+
+#endif /* _XMALLOC_H */
+
+/* EOF */