Merge branch 'vendor/GCC47'
[dragonfly.git] / contrib / libedit / src / tokenizer.c
1 /*      $NetBSD: tokenizer.c,v 1.21 2011/08/16 16:25:15 christos Exp $  */
2
3 /*-
4  * Copyright (c) 1992, 1993
5  *      The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Christos Zoulas of Cornell University.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #ifndef NARROWCHAR
36 #include "config.h"
37 #endif
38
39 #if !defined(lint) && !defined(SCCSID)
40 #if 0
41 static char sccsid[] = "@(#)tokenizer.c 8.1 (Berkeley) 6/4/93";
42 #else
43 __RCSID("$NetBSD: tokenizer.c,v 1.21 2011/08/16 16:25:15 christos Exp $");
44 #endif
45 #endif /* not lint && not SCCSID */
46
47 /* We build this file twice, once as NARROW, once as WIDE. */
48 /*
49  * tokenize.c: Bourne shell like tokenizer
50  */
51 #include <string.h>
52 #include <stdlib.h>
53 #include "histedit.h"
54 #include "chartype.h"
55
56 typedef enum {
57         Q_none, Q_single, Q_double, Q_one, Q_doubleone
58 } quote_t;
59
60 #define TOK_KEEP        1
61 #define TOK_EAT         2
62
63 #define WINCR           20
64 #define AINCR           10
65
66 #define IFS             STR("\t \n")
67
68 #define tok_malloc(a)           malloc(a)
69 #define tok_free(a)             free(a)
70 #define tok_realloc(a, b)       realloc(a, b)
71 #define tok_strdup(a)           Strdup(a)
72
73
74 struct TYPE(tokenizer) {
75         Char    *ifs;           /* In field separator                    */
76         size_t   argc, amax;    /* Current and maximum number of args    */
77         Char   **argv;          /* Argument list                         */
78         Char    *wptr, *wmax;   /* Space and limit on the word buffer    */
79         Char    *wstart;        /* Beginning of next word                */
80         Char    *wspace;        /* Space of word buffer                  */
81         quote_t  quote;         /* Quoting state                         */
82         int      flags;         /* flags;                                */
83 };
84
85
86 private void FUN(tok,finish)(TYPE(Tokenizer) *);
87
88
89 /* FUN(tok,finish)():
90  *      Finish a word in the tokenizer.
91  */
92 private void
93 FUN(tok,finish)(TYPE(Tokenizer) *tok)
94 {
95
96         *tok->wptr = '\0';
97         if ((tok->flags & TOK_KEEP) || tok->wptr != tok->wstart) {
98                 tok->argv[tok->argc++] = tok->wstart;
99                 tok->argv[tok->argc] = NULL;
100                 tok->wstart = ++tok->wptr;
101         }
102         tok->flags &= ~TOK_KEEP;
103 }
104
105
106 /* FUN(tok,init)():
107  *      Initialize the tokenizer
108  */
109 public TYPE(Tokenizer) *
110 FUN(tok,init)(const Char *ifs)
111 {
112         TYPE(Tokenizer) *tok = tok_malloc(sizeof(*tok));
113
114         if (tok == NULL)
115                 return NULL;
116         tok->ifs = tok_strdup(ifs ? ifs : IFS);
117         if (tok->ifs == NULL) {
118                 tok_free(tok);
119                 return NULL;
120         }
121         tok->argc = 0;
122         tok->amax = AINCR;
123         tok->argv = tok_malloc(sizeof(*tok->argv) * tok->amax);
124         if (tok->argv == NULL) {
125                 tok_free(tok->ifs);
126                 tok_free(tok);
127                 return NULL;
128         }
129         tok->argv[0] = NULL;
130         tok->wspace = tok_malloc(WINCR * sizeof(*tok->wspace));
131         if (tok->wspace == NULL) {
132                 tok_free(tok->argv);
133                 tok_free(tok->ifs);
134                 tok_free(tok);
135                 return NULL;
136         }
137         tok->wmax = tok->wspace + WINCR;
138         tok->wstart = tok->wspace;
139         tok->wptr = tok->wspace;
140         tok->flags = 0;
141         tok->quote = Q_none;
142
143         return tok;
144 }
145
146
147 /* FUN(tok,reset)():
148  *      Reset the tokenizer
149  */
150 public void
151 FUN(tok,reset)(TYPE(Tokenizer) *tok)
152 {
153
154         tok->argc = 0;
155         tok->wstart = tok->wspace;
156         tok->wptr = tok->wspace;
157         tok->flags = 0;
158         tok->quote = Q_none;
159 }
160
161
162 /* FUN(tok,end)():
163  *      Clean up
164  */
165 public void
166 FUN(tok,end)(TYPE(Tokenizer) *tok)
167 {
168
169         tok_free(tok->ifs);
170         tok_free(tok->wspace);
171         tok_free(tok->argv);
172         tok_free(tok);
173 }
174
175
176
177 /* FUN(tok,line)():
178  *      Bourne shell (sh(1)) like tokenizing
179  *      Arguments:
180  *              tok     current tokenizer state (setup with FUN(tok,init)())
181  *              line    line to parse
182  *      Returns:
183  *              -1      Internal error
184  *               3      Quoted return
185  *               2      Unmatched double quote
186  *               1      Unmatched single quote
187  *               0      Ok
188  *      Modifies (if return value is 0):
189  *              argc    number of arguments
190  *              argv    argument array
191  *              cursorc if !NULL, argv element containing cursor
192  *              cursorv if !NULL, offset in argv[cursorc] of cursor
193  */
194 public int
195 FUN(tok,line)(TYPE(Tokenizer) *tok, const TYPE(LineInfo) *line,
196     int *argc, const Char ***argv, int *cursorc, int *cursoro)
197 {
198         const Char *ptr;
199         int cc, co;
200
201         cc = co = -1;
202         ptr = line->buffer;
203         for (ptr = line->buffer; ;ptr++) {
204                 if (ptr >= line->lastchar)
205                         ptr = STR("");
206                 if (ptr == line->cursor) {
207                         cc = (int)tok->argc;
208                         co = (int)(tok->wptr - tok->wstart);
209                 }
210                 switch (*ptr) {
211                 case '\'':
212                         tok->flags |= TOK_KEEP;
213                         tok->flags &= ~TOK_EAT;
214                         switch (tok->quote) {
215                         case Q_none:
216                                 tok->quote = Q_single;  /* Enter single quote
217                                                          * mode */
218                                 break;
219
220                         case Q_single:  /* Exit single quote mode */
221                                 tok->quote = Q_none;
222                                 break;
223
224                         case Q_one:     /* Quote this ' */
225                                 tok->quote = Q_none;
226                                 *tok->wptr++ = *ptr;
227                                 break;
228
229                         case Q_double:  /* Stay in double quote mode */
230                                 *tok->wptr++ = *ptr;
231                                 break;
232
233                         case Q_doubleone:       /* Quote this ' */
234                                 tok->quote = Q_double;
235                                 *tok->wptr++ = *ptr;
236                                 break;
237
238                         default:
239                                 return -1;
240                         }
241                         break;
242
243                 case '"':
244                         tok->flags &= ~TOK_EAT;
245                         tok->flags |= TOK_KEEP;
246                         switch (tok->quote) {
247                         case Q_none:    /* Enter double quote mode */
248                                 tok->quote = Q_double;
249                                 break;
250
251                         case Q_double:  /* Exit double quote mode */
252                                 tok->quote = Q_none;
253                                 break;
254
255                         case Q_one:     /* Quote this " */
256                                 tok->quote = Q_none;
257                                 *tok->wptr++ = *ptr;
258                                 break;
259
260                         case Q_single:  /* Stay in single quote mode */
261                                 *tok->wptr++ = *ptr;
262                                 break;
263
264                         case Q_doubleone:       /* Quote this " */
265                                 tok->quote = Q_double;
266                                 *tok->wptr++ = *ptr;
267                                 break;
268
269                         default:
270                                 return -1;
271                         }
272                         break;
273
274                 case '\\':
275                         tok->flags |= TOK_KEEP;
276                         tok->flags &= ~TOK_EAT;
277                         switch (tok->quote) {
278                         case Q_none:    /* Quote next character */
279                                 tok->quote = Q_one;
280                                 break;
281
282                         case Q_double:  /* Quote next character */
283                                 tok->quote = Q_doubleone;
284                                 break;
285
286                         case Q_one:     /* Quote this, restore state */
287                                 *tok->wptr++ = *ptr;
288                                 tok->quote = Q_none;
289                                 break;
290
291                         case Q_single:  /* Stay in single quote mode */
292                                 *tok->wptr++ = *ptr;
293                                 break;
294
295                         case Q_doubleone:       /* Quote this \ */
296                                 tok->quote = Q_double;
297                                 *tok->wptr++ = *ptr;
298                                 break;
299
300                         default:
301                                 return -1;
302                         }
303                         break;
304
305                 case '\n':
306                         tok->flags &= ~TOK_EAT;
307                         switch (tok->quote) {
308                         case Q_none:
309                                 goto tok_line_outok;
310
311                         case Q_single:
312                         case Q_double:
313                                 *tok->wptr++ = *ptr;    /* Add the return */
314                                 break;
315
316                         case Q_doubleone:   /* Back to double, eat the '\n' */
317                                 tok->flags |= TOK_EAT;
318                                 tok->quote = Q_double;
319                                 break;
320
321                         case Q_one:     /* No quote, more eat the '\n' */
322                                 tok->flags |= TOK_EAT;
323                                 tok->quote = Q_none;
324                                 break;
325
326                         default:
327                                 return 0;
328                         }
329                         break;
330
331                 case '\0':
332                         switch (tok->quote) {
333                         case Q_none:
334                                 /* Finish word and return */
335                                 if (tok->flags & TOK_EAT) {
336                                         tok->flags &= ~TOK_EAT;
337                                         return 3;
338                                 }
339                                 goto tok_line_outok;
340
341                         case Q_single:
342                                 return 1;
343
344                         case Q_double:
345                                 return 2;
346
347                         case Q_doubleone:
348                                 tok->quote = Q_double;
349                                 *tok->wptr++ = *ptr;
350                                 break;
351
352                         case Q_one:
353                                 tok->quote = Q_none;
354                                 *tok->wptr++ = *ptr;
355                                 break;
356
357                         default:
358                                 return -1;
359                         }
360                         break;
361
362                 default:
363                         tok->flags &= ~TOK_EAT;
364                         switch (tok->quote) {
365                         case Q_none:
366                                 if (Strchr(tok->ifs, *ptr) != NULL)
367                                         FUN(tok,finish)(tok);
368                                 else
369                                         *tok->wptr++ = *ptr;
370                                 break;
371
372                         case Q_single:
373                         case Q_double:
374                                 *tok->wptr++ = *ptr;
375                                 break;
376
377
378                         case Q_doubleone:
379                                 *tok->wptr++ = '\\';
380                                 tok->quote = Q_double;
381                                 *tok->wptr++ = *ptr;
382                                 break;
383
384                         case Q_one:
385                                 tok->quote = Q_none;
386                                 *tok->wptr++ = *ptr;
387                                 break;
388
389                         default:
390                                 return -1;
391
392                         }
393                         break;
394                 }
395
396                 if (tok->wptr >= tok->wmax - 4) {
397                         size_t size = (size_t)(tok->wmax - tok->wspace + WINCR);
398                         Char *s = tok_realloc(tok->wspace,
399                             size * sizeof(*s));
400                         if (s == NULL)
401                                 return -1;
402
403                         if (s != tok->wspace) {
404                                 size_t i;
405                                 for (i = 0; i < tok->argc; i++) {
406                                     tok->argv[i] =
407                                         (tok->argv[i] - tok->wspace) + s;
408                                 }
409                                 tok->wptr = (tok->wptr - tok->wspace) + s;
410                                 tok->wstart = (tok->wstart - tok->wspace) + s;
411                                 tok->wspace = s;
412                         }
413                         tok->wmax = s + size;
414                 }
415                 if (tok->argc >= tok->amax - 4) {
416                         Char **p;
417                         tok->amax += AINCR;
418                         p = tok_realloc(tok->argv, tok->amax * sizeof(*p));
419                         if (p == NULL)
420                                 return -1;
421                         tok->argv = p;
422                 }
423         }
424  tok_line_outok:
425         if (cc == -1 && co == -1) {
426                 cc = (int)tok->argc;
427                 co = (int)(tok->wptr - tok->wstart);
428         }
429         if (cursorc != NULL)
430                 *cursorc = cc;
431         if (cursoro != NULL)
432                 *cursoro = co;
433         FUN(tok,finish)(tok);
434         *argv = (const Char **)tok->argv;
435         *argc = (int)tok->argc;
436         return 0;
437 }
438
439 /* FUN(tok,str)():
440  *      Simpler version of tok_line, taking a NUL terminated line
441  *      and splitting into words, ignoring cursor state.
442  */
443 public int
444 FUN(tok,str)(TYPE(Tokenizer) *tok, const Char *line, int *argc,
445     const Char ***argv)
446 {
447         TYPE(LineInfo) li;
448
449         memset(&li, 0, sizeof(li));
450         li.buffer = line;
451         li.cursor = li.lastchar = Strchr(line, '\0');
452         return FUN(tok,line(tok, &li, argc, argv, NULL, NULL));
453 }