Merge from vendor branch SENDMAIL:
[dragonfly.git] / games / quiz / rxp.c
1 /*-
2  * Copyright (c) 1991, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
7  * Commodore Business Machines.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed by the University of
20  *      California, Berkeley and its contributors.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  * @(#)rxp.c    8.1 (Berkeley) 5/31/93
38  * $FreeBSD: src/games/quiz/rxp.c,v 1.5 1999/12/12 02:29:54 billf Exp $
39  * $DragonFly: src/games/quiz/rxp.c,v 1.4 2005/04/25 16:10:24 liamfoy Exp $
40  */
41
42 /*
43  * regular expression parser
44  *
45  * external functions and return values are:
46  * rxp_compile(s)
47  *      TRUE    success
48  *      FALSE   parse failure; error message will be in char rxperr[]
49  * metas are:
50  *      {...}   optional pattern, equialent to [...|]
51  *      |       alternate pattern
52  *      [...]   pattern delimiters
53  *
54  * rxp_match(s)
55  *      TRUE    string s matches compiled pattern
56  *      FALSE   match failure or regexp error
57  *
58  * rxp_expand()
59  *      char *  reverse-engineered regular expression string
60  *      NULL    regexp error
61  */
62
63 #include <stdio.h>
64 #include <ctype.h>
65 #include "quiz.h"
66                                         /* regexp tokens,       arg */
67 #define LIT     (-1)                    /* literal character,   char */
68 #define SOT     (-2)                    /* start text anchor,   - */
69 #define EOT     (-3)                    /* end text anchor,     - */
70 #define GRP_S   (-4)                    /* start alternate grp, ptr_to_end */
71 #define GRP_E   (-5)                    /* end group,           - */
72 #define ALT_S   (-6)                    /* alternate starts,    ptr_to_next */
73 #define ALT_E   (-7)                    /* alternate ends,      - */
74 #define END     (-8)                    /* end of regexp,       - */
75
76 typedef short Rxp_t;                    /* type for regexp tokens */
77
78 static Rxp_t rxpbuf[RXP_LINE_SZ];       /* compiled regular expression buffer */
79 char rxperr[128];                       /* parser error message */
80
81 static int       rxp__compile (char *, int);
82 static char     *rxp__expand (int);
83 static int       rxp__match (char *, int, Rxp_t *, Rxp_t *, char *);
84
85 int
86 rxp_compile(char *s)
87 {
88         return (rxp__compile(s, TRUE));
89 }
90
91 static int
92 rxp__compile(char *s, int first)
93 {
94         static Rxp_t *rp;
95         static char *sp;
96         Rxp_t *grp_ptr;
97         Rxp_t *alt_ptr;
98         int esc, err;
99
100         esc = 0;
101         if (first) {
102                 rp = rxpbuf;
103                 sp = s;
104                 *rp++ = SOT;    /* auto-anchor: pat is really ^pat$ */
105                 *rp++ = GRP_S;  /* auto-group: ^pat$ is really ^[pat]$ */
106                 *rp++ = 0;
107         }
108         *rp++ = ALT_S;
109         alt_ptr = rp;
110         *rp++ = 0;
111         for (; *sp; ++sp) {
112                 if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
113                         (void)snprintf(rxperr, sizeof(rxperr),
114                             "regular expression too long %s", s);
115                         return (FALSE);
116                 }
117                 if (*sp == ':' && !esc)
118                         break;
119                 if (esc) {
120                         *rp++ = LIT;
121                         *rp++ = *sp;
122                         esc = 0;
123                 }
124                 else switch (*sp) {
125                 case '\\':
126                         esc = 1;
127                         break;
128                 case '{':
129                 case '[':
130                         *rp++ = GRP_S;
131                         grp_ptr = rp;
132                         *rp++ = 0;
133                         sp++;
134                         if ((err = rxp__compile(s, FALSE)) != TRUE)
135                                 return (err);
136                         *rp++ = GRP_E;
137                         *grp_ptr = rp - rxpbuf;
138                         break;
139                 case '}':
140                 case ']':
141                 case '|':
142                         *rp++ = ALT_E;
143                         *alt_ptr = rp - rxpbuf;
144                         if (*sp != ']') {
145                                 *rp++ = ALT_S;
146                                 alt_ptr = rp;
147                                 *rp++ = 0;
148                         }
149                         if (*sp != '|') {
150                                 if (*sp != ']') {
151                                         *rp++ = ALT_E;
152                                         *alt_ptr = rp - rxpbuf;
153                                 }
154                                 if (first) {
155                                         (void)snprintf(rxperr, sizeof(rxperr),
156                                             "unmatched alternator in regexp %s",
157                                              s);
158                                         return (FALSE);
159                                 }
160                                 return (TRUE);
161                         }
162                         break;
163                 default:
164                         *rp++ = LIT;
165                         *rp++ = *sp;
166                         esc = 0;
167                         break;
168                 }
169         }
170         if (!first) {
171                 (void)snprintf(rxperr, sizeof(rxperr),
172                     "unmatched alternator in regexp %s", s);
173                 return (FALSE);
174         }
175         *rp++ = ALT_E;
176         *alt_ptr = rp - rxpbuf;
177         *rp++ = GRP_E;
178         *(rxpbuf + 2) = rp - rxpbuf;
179         *rp++ = EOT;
180         *rp = END;
181         return (TRUE);
182 }
183
184 /*
185  * match string against compiled regular expression
186  */
187 int
188 rxp_match(char *s)
189 {
190         return (rxp__match(s, TRUE, NULL, NULL, NULL));
191 }
192
193 /*
194  * jump to j_succ on successful alt match
195  * jump to j_fail on failed match
196  * reset sp to sp_fail on failed match
197  */
198 static int
199 rxp__match(char *s, int first, Rxp_t *j_succ, Rxp_t *j_fail, char *sp_fail)
200 {
201         static Rxp_t *rp;
202         static char *sp;
203         int ch;
204         Rxp_t *grp_end;
205         int err;
206
207         grp_end = NULL;
208         if (first) {
209                 rp = rxpbuf;
210                 sp = s;
211         }
212         while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
213                 switch(*rp) {
214                 case LIT:
215                         rp++;
216                         ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
217                         if (ch != *sp++) {
218                                 rp = j_fail;
219                                 sp = sp_fail;
220                                 return (TRUE);
221                         }
222                         rp++;
223                         break;
224                 case SOT:
225                         if (sp != s)
226                                 return (FALSE);
227                         rp++;
228                         break;
229                 case EOT:
230                         if (*sp != 0)
231                                 return (FALSE);
232                         rp++;
233                         break;
234                 case GRP_S:
235                         rp++;
236                         grp_end = rxpbuf + *rp++;
237                         break;
238                 case ALT_S:
239                         rp++;
240                         if ((err = rxp__match(sp,
241                             FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
242                                 return (err);
243                         break;
244                 case ALT_E:
245                         rp = j_succ;
246                         return (TRUE);
247                 case GRP_E:
248                 default:
249                         return (FALSE);
250                 }
251         return (*rp != END ? FALSE : TRUE);
252 }
253
254 /*
255  * Reverse engineer the regular expression, by picking first of all alternates.
256  */
257 char *
258 rxp_expand(void)
259 {
260         return (rxp__expand(TRUE));
261 }
262
263 static char *
264 rxp__expand(int first)
265 {
266         static char buf[RXP_LINE_SZ/2];
267         static Rxp_t *rp;
268         static char *bp;
269         Rxp_t *grp_ptr;
270         char *err;
271
272         if (first) {
273                 rp = rxpbuf;
274                 bp = buf;
275         }
276         while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
277                 switch(*rp) {
278                 case LIT:
279                         rp++;
280                         *bp++ = *rp++;
281                         break;
282                 case GRP_S:
283                         rp++;
284                         grp_ptr = rxpbuf + *rp;
285                         rp++;
286                         if ((err = rxp__expand(FALSE)) == NULL)
287                                 return (err);
288                         rp = grp_ptr;
289                         break;
290                 case ALT_E:
291                         return (buf);
292                 case ALT_S:
293                         rp++;
294                         /* FALLTHROUGH */
295                 case SOT:
296                 case EOT:
297                 case GRP_E:
298                         rp++;
299                         break;
300                 default:
301                         return (NULL);
302                 }
303         if (first) {
304                 if (*rp != END)
305                         return (NULL);
306                 *bp = '\0';
307         }
308         return (buf);
309 }