2 * Copyright (c) 1992, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Christos Zoulas of Cornell University.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * @(#)tokenizer.c 8.1 (Berkeley) 6/4/93
33 * $NetBSD: tokenizer.c,v 1.14 2003/12/05 13:37:48 lukem Exp $
34 * $DragonFly: src/lib/libedit/tokenizer.c,v 1.5 2005/11/13 11:58:30 corecode Exp $
40 * tokenize.c: Bourne shell like tokenizer
47 Q_none, Q_single, Q_double, Q_one, Q_doubleone
58 #define tok_strdup(a) strdup(a)
59 #define tok_malloc(a) malloc(a)
60 #define tok_free(a) free(a)
61 #define tok_realloc(a, b) realloc(a, b)
65 char *ifs; /* In field separator */
66 int argc, amax; /* Current and maximum number of args */
67 char **argv; /* Argument list */
68 char *wptr, *wmax; /* Space and limit on the word buffer */
69 char *wstart; /* Beginning of next word */
70 char *wspace; /* Space of word buffer */
71 quote_t quote; /* Quoting state */
72 int flags; /* flags; */
76 private void tok_finish(Tokenizer *);
80 * Finish a word in the tokenizer.
83 tok_finish(Tokenizer *tok)
87 if ((tok->flags & TOK_KEEP) || tok->wptr != tok->wstart) {
88 tok->argv[tok->argc++] = tok->wstart;
89 tok->argv[tok->argc] = NULL;
90 tok->wstart = ++tok->wptr;
92 tok->flags &= ~TOK_KEEP;
97 * Initialize the tokenizer
100 tok_init(const char *ifs)
102 Tokenizer *tok = (Tokenizer *) tok_malloc(sizeof(Tokenizer));
106 tok->ifs = tok_strdup(ifs ? ifs : IFS);
107 if (tok->ifs == NULL) {
108 tok_free((ptr_t)tok);
113 tok->argv = (char **) tok_malloc(sizeof(char *) * tok->amax);
114 if (tok->argv == NULL) {
115 tok_free((ptr_t)tok->ifs);
116 tok_free((ptr_t)tok);
120 tok->wspace = (char *) tok_malloc(WINCR);
121 if (tok->wspace == NULL) {
122 tok_free((ptr_t)tok->argv);
123 tok_free((ptr_t)tok->ifs);
124 tok_free((ptr_t)tok);
127 tok->wmax = tok->wspace + WINCR;
128 tok->wstart = tok->wspace;
129 tok->wptr = tok->wspace;
138 * Reset the tokenizer
141 tok_reset(Tokenizer *tok)
145 tok->wstart = tok->wspace;
146 tok->wptr = tok->wspace;
156 tok_end(Tokenizer *tok)
159 tok_free((ptr_t) tok->ifs);
160 tok_free((ptr_t) tok->wspace);
161 tok_free((ptr_t) tok->argv);
162 tok_free((ptr_t) tok);
168 * Bourne shell (sh(1)) like tokenizing
170 * tok current tokenizer state (setup with tok_init())
175 * 2 Unmatched double quote
176 * 1 Unmatched single quote
178 * Modifies (if return value is 0):
179 * argc number of arguments
180 * argv argument array
181 * cursorc if !NULL, argv element containing cursor
182 * cursorv if !NULL, offset in argv[cursorc] of cursor
185 tok_line(Tokenizer *tok, const LineInfo *line,
186 int *argc, const char ***argv, int *cursorc, int *cursoro)
193 for (ptr = line->buffer; ;ptr++) {
194 if (ptr >= line->lastchar)
196 if (ptr == line->cursor) {
198 co = tok->wptr - tok->wstart;
202 tok->flags |= TOK_KEEP;
203 tok->flags &= ~TOK_EAT;
204 switch (tok->quote) {
206 tok->quote = Q_single; /* Enter single quote
210 case Q_single: /* Exit single quote mode */
214 case Q_one: /* Quote this ' */
219 case Q_double: /* Stay in double quote mode */
223 case Q_doubleone: /* Quote this ' */
224 tok->quote = Q_double;
234 tok->flags &= ~TOK_EAT;
235 tok->flags |= TOK_KEEP;
236 switch (tok->quote) {
237 case Q_none: /* Enter double quote mode */
238 tok->quote = Q_double;
241 case Q_double: /* Exit double quote mode */
245 case Q_one: /* Quote this " */
250 case Q_single: /* Stay in single quote mode */
254 case Q_doubleone: /* Quote this " */
255 tok->quote = Q_double;
265 tok->flags |= TOK_KEEP;
266 tok->flags &= ~TOK_EAT;
267 switch (tok->quote) {
268 case Q_none: /* Quote next character */
272 case Q_double: /* Quote next character */
273 tok->quote = Q_doubleone;
276 case Q_one: /* Quote this, restore state */
281 case Q_single: /* Stay in single quote mode */
285 case Q_doubleone: /* Quote this \ */
286 tok->quote = Q_double;
296 tok->flags &= ~TOK_EAT;
297 switch (tok->quote) {
303 *tok->wptr++ = *ptr; /* Add the return */
306 case Q_doubleone: /* Back to double, eat the '\n' */
307 tok->flags |= TOK_EAT;
308 tok->quote = Q_double;
311 case Q_one: /* No quote, more eat the '\n' */
312 tok->flags |= TOK_EAT;
322 switch (tok->quote) {
324 /* Finish word and return */
325 if (tok->flags & TOK_EAT) {
326 tok->flags &= ~TOK_EAT;
338 tok->quote = Q_double;
353 tok->flags &= ~TOK_EAT;
354 switch (tok->quote) {
356 if (strchr(tok->ifs, *ptr) != NULL)
370 tok->quote = Q_double;
386 if (tok->wptr >= tok->wmax - 4) {
387 size_t size = tok->wmax - tok->wspace + WINCR;
388 char *s = (char *) tok_realloc(tok->wspace, size);
392 if (s != tok->wspace) {
394 for (i = 0; i < tok->argc; i++) {
396 (tok->argv[i] - tok->wspace) + s;
398 tok->wptr = (tok->wptr - tok->wspace) + s;
399 tok->wstart = (tok->wstart - tok->wspace) + s;
402 tok->wmax = s + size;
404 if (tok->argc >= tok->amax - 4) {
407 p = (char **) tok_realloc(tok->argv,
408 tok->amax * sizeof(char *));
415 if (cc == -1 && co == -1) {
417 co = tok->wptr - tok->wstart;
424 *argv = (const char **)tok->argv;
430 * Simpler version of tok_line, taking a NUL terminated line
431 * and splitting into words, ignoring cursor state.
434 tok_str(Tokenizer *tok, const char *line, int *argc, const char ***argv)
438 memset(&li, 0, sizeof(li));
440 li.cursor = li.lastchar = strchr(line, '\0');
441 return (tok_line(tok, &li, argc, argv, NULL, NULL));