Initial import from FreeBSD RELENG_4:
[dragonfly.git] / games / quiz / rxp.c
1 /*-
2  * Copyright (c) 1991, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
7  * Commodore Business Machines.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed by the University of
20  *      California, Berkeley and its contributors.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)rxp.c       8.1 (Berkeley) 5/31/93";
41 #endif
42 static const char rcsid[] =
43  "$FreeBSD: src/games/quiz/rxp.c,v 1.5 1999/12/12 02:29:54 billf Exp $";
44 #endif /* not lint */
45
46 /*
47  * regular expression parser
48  *
49  * external functions and return values are:
50  * rxp_compile(s)
51  *      TRUE    success
52  *      FALSE   parse failure; error message will be in char rxperr[]
53  * metas are:
54  *      {...}   optional pattern, equialent to [...|]
55  *      |       alternate pattern
56  *      [...]   pattern delimiters
57  *
58  * rxp_match(s)
59  *      TRUE    string s matches compiled pattern
60  *      FALSE   match failure or regexp error
61  *
62  * rxp_expand()
63  *      char *  reverse-engineered regular expression string
64  *      NULL    regexp error
65  */
66
67 #include <stdio.h>
68 #include <ctype.h>
69 #include "quiz.h"
70                                         /* regexp tokens,       arg */
71 #define LIT     (-1)                    /* literal character,   char */
72 #define SOT     (-2)                    /* start text anchor,   - */
73 #define EOT     (-3)                    /* end text anchor,     - */
74 #define GRP_S   (-4)                    /* start alternate grp, ptr_to_end */
75 #define GRP_E   (-5)                    /* end group,           - */
76 #define ALT_S   (-6)                    /* alternate starts,    ptr_to_next */
77 #define ALT_E   (-7)                    /* alternate ends,      - */
78 #define END     (-8)                    /* end of regexp,       - */
79
80 typedef short Rxp_t;                    /* type for regexp tokens */
81
82 static Rxp_t rxpbuf[RXP_LINE_SZ];       /* compiled regular expression buffer */
83 char rxperr[128];                       /* parser error message */
84
85 static int       rxp__compile __P((char *, int));
86 static char     *rxp__expand __P((int));
87 static int       rxp__match __P((char *, int, Rxp_t *, Rxp_t *, char *));
88
89 int
90 rxp_compile(s)
91         char *  s;
92 {
93         return (rxp__compile(s, TRUE));
94 }
95
96 static int
97 rxp__compile(s, first)
98         char *s;
99         int first;
100 {
101         static Rxp_t *rp;
102         static char *sp;
103         Rxp_t *grp_ptr;
104         Rxp_t *alt_ptr;
105         int esc, err;
106
107         esc = 0;
108         if (first) {
109                 rp = rxpbuf;
110                 sp = s;
111                 *rp++ = SOT;    /* auto-anchor: pat is really ^pat$ */
112                 *rp++ = GRP_S;  /* auto-group: ^pat$ is really ^[pat]$ */
113                 *rp++ = 0;
114         }
115         *rp++ = ALT_S;
116         alt_ptr = rp;
117         *rp++ = 0;
118         for (; *sp; ++sp) {
119                 if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
120                         (void)snprintf(rxperr, sizeof(rxperr),
121                             "regular expression too long %s", s);
122                         return (FALSE);
123                 }
124                 if (*sp == ':' && !esc)
125                         break;
126                 if (esc) {
127                         *rp++ = LIT;
128                         *rp++ = *sp;
129                         esc = 0;
130                 }
131                 else switch (*sp) {
132                 case '\\':
133                         esc = 1;
134                         break;
135                 case '{':
136                 case '[':
137                         *rp++ = GRP_S;
138                         grp_ptr = rp;
139                         *rp++ = 0;
140                         sp++;
141                         if ((err = rxp__compile(s, FALSE)) != TRUE)
142                                 return (err);
143                         *rp++ = GRP_E;
144                         *grp_ptr = rp - rxpbuf;
145                         break;
146                 case '}':
147                 case ']':
148                 case '|':
149                         *rp++ = ALT_E;
150                         *alt_ptr = rp - rxpbuf;
151                         if (*sp != ']') {
152                                 *rp++ = ALT_S;
153                                 alt_ptr = rp;
154                                 *rp++ = 0;
155                         }
156                         if (*sp != '|') {
157                                 if (*sp != ']') {
158                                         *rp++ = ALT_E;
159                                         *alt_ptr = rp - rxpbuf;
160                                 }
161                                 if (first) {
162                                         (void)snprintf(rxperr, sizeof(rxperr),
163                                             "unmatched alternator in regexp %s",
164                                              s);
165                                         return (FALSE);
166                                 }
167                                 return (TRUE);
168                         }
169                         break;
170                 default:
171                         *rp++ = LIT;
172                         *rp++ = *sp;
173                         esc = 0;
174                         break;
175                 }
176         }
177         if (!first) {
178                 (void)snprintf(rxperr, sizeof(rxperr),
179                     "unmatched alternator in regexp %s", s);
180                 return (FALSE);
181         }
182         *rp++ = ALT_E;
183         *alt_ptr = rp - rxpbuf;
184         *rp++ = GRP_E;
185         *(rxpbuf + 2) = rp - rxpbuf;
186         *rp++ = EOT;
187         *rp = END;
188         return (TRUE);
189 }
190
191 /*
192  * match string against compiled regular expression
193  */
194 int
195 rxp_match(s)
196         char *  s;
197 {
198         return (rxp__match(s, TRUE, NULL, NULL, NULL));
199 }
200
201 static int
202 rxp__match(s, first, j_succ, j_fail, sp_fail)
203         char *s;
204         int first;
205         Rxp_t *j_succ;          /* jump here on successful alt match */
206         Rxp_t *j_fail;          /* jump here on failed match */
207         char *sp_fail;          /* reset sp to here on failed match */
208 {
209         static Rxp_t *rp;
210         static char *sp;
211         int ch;
212         Rxp_t *grp_end;
213         int err;
214
215         grp_end = NULL;
216         if (first) {
217                 rp = rxpbuf;
218                 sp = s;
219         }
220         while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
221                 switch(*rp) {
222                 case LIT:
223                         rp++;
224                         ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
225                         if (ch != *sp++) {
226                                 rp = j_fail;
227                                 sp = sp_fail;
228                                 return (TRUE);
229                         }
230                         rp++;
231                         break;
232                 case SOT:
233                         if (sp != s)
234                                 return (FALSE);
235                         rp++;
236                         break;
237                 case EOT:
238                         if (*sp != 0)
239                                 return (FALSE);
240                         rp++;
241                         break;
242                 case GRP_S:
243                         rp++;
244                         grp_end = rxpbuf + *rp++;
245                         break;
246                 case ALT_S:
247                         rp++;
248                         if ((err = rxp__match(sp,
249                             FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
250                                 return (err);
251                         break;
252                 case ALT_E:
253                         rp = j_succ;
254                         return (TRUE);
255                 case GRP_E:
256                 default:
257                         return (FALSE);
258                 }
259         return (*rp != END ? FALSE : TRUE);
260 }
261
262 /*
263  * Reverse engineer the regular expression, by picking first of all alternates.
264  */
265 char *
266 rxp_expand()
267 {
268         return (rxp__expand(TRUE));
269 }
270
271 static char *
272 rxp__expand(first)
273         int first;
274 {
275         static char buf[RXP_LINE_SZ/2];
276         static Rxp_t *rp;
277         static char *bp;
278         Rxp_t *grp_ptr;
279         char *err;
280
281         if (first) {
282                 rp = rxpbuf;
283                 bp = buf;
284         }
285         while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
286                 switch(*rp) {
287                 case LIT:
288                         rp++;
289                         *bp++ = *rp++;
290                         break;
291                 case GRP_S:
292                         rp++;
293                         grp_ptr = rxpbuf + *rp;
294                         rp++;
295                         if ((err = rxp__expand(FALSE)) == NULL)
296                                 return (err);
297                         rp = grp_ptr;
298                         break;
299                 case ALT_E:
300                         return (buf);
301                 case ALT_S:
302                         rp++;
303                         /* FALLTHROUGH */
304                 case SOT:
305                 case EOT:
306                 case GRP_E:
307                         rp++;
308                         break;
309                 default:
310                         return (NULL);
311                 }
312         if (first) {
313                 if (*rp != END)
314                         return (NULL);
315                 *bp = '\0';
316         }
317         return (buf);
318 }