1 /* $Header: /p/tcsh/cvsroot/tcsh/sh.glob.c,v 3.76 2008/06/19 15:20:56 christos Exp $ */
3 * sh.glob.c: Regular expression expansion
6 * Copyright (c) 1980, 1991 The Regents of the University of California.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 RCSID("$tcsh: sh.glob.c,v 3.76 2008/06/19 15:20:56 christos Exp $")
45 #define G_NONE 0 /* No globbing needed */
46 #define G_GLOB 1 /* string contains *?[] characters */
47 #define G_CSH 2 /* string contains ~`{ characters */
49 #define GLOBSPACE 100 /* Alloc increment */
59 * globbing is now done in two stages. In the first pass we expand
60 * csh globbing idioms ~`{ and then we proceed doing the normal
61 * globbing if needed ?*[
63 * Csh type globbing is handled in globexpand() and the rest is
64 * handled in glob() which is part of the 4.4BSD libc.
67 static Char *globtilde (Char *);
68 static Char *handleone (Char *, Char **, int);
69 static Char **libglob (Char **);
70 static Char **globexpand (Char **, int);
71 static int globbrace (const Char *, Char ***);
72 static void expbrace (Char ***, Char ***, int);
73 static void pword (struct blk_buf *, struct Strbuf *);
74 static void backeval (struct blk_buf *, struct Strbuf *, Char *,
79 Char *name, *u, *home, *res;
82 for (s++; *s && *s != '/' && *s != ':'; s++)
84 name = Strnsave(u + 1, s - (u + 1));
85 cleanup_push(name, xfree);
88 if (adrof(STRnonomatch)) {
93 stderror(ERR_UNKUSER, short2str(name));
98 if (home[0] == '/' && home[1] == '\0' && s[0] == '/')
101 res = Strspl(home, s);
107 /* Returns a newly allocated string, old or NULL */
116 * kfk - 17 Jan 1984 - stack hack allows user to get at arbitrary dir names
117 * in stack. PWP: let =foobar pass through (for X windows)
119 if (old[1] == '-' && (old[2] == '\0' || old[2] == '/')) {
121 const Char *olddir = varval (STRowd);
123 if (olddir && *olddir &&
124 !dcwd->di_next->di_name && !dcwd->di_prev->di_name)
125 return Strspl(olddir, &old[2]);
129 else if (Isdigit(old[1])) {
132 for (b = &old[2]; Isdigit(*b); b++)
133 dig = dig * 10 + (*b - '0');
134 if (*b != '\0' && *b != '/')
135 /* =<number>foobar */
145 return Strspl(dir, b);
149 globbrace(const Char *s, Char ***bl)
151 struct Strbuf gbuf = Strbuf_INIT;
152 struct blk_buf bb = BLK_BUF_INIT;
154 const Char *p, *pm, *pe, *pl;
157 /* copy part up to the brace */
158 for (p = s; *p != LBRC; p++)
162 /* check for balanced braces */
163 for (i = 0, pe = ++p; *pe; pe++)
165 /* Ignore everything between [] */
166 for (++pe; *pe != RBRK && *pe != EOS; pe++)
171 else if (*pe == LBRC)
173 else if (*pe == RBRC) {
179 if (i != 0 || *pe == '\0')
182 Strbuf_appendn(&gbuf, s, prefix_len);
184 for (i = 0, pl = pm = p; pm <= pe; pm++)
187 for (++pm; *pm != RBRK && *pm != EOS; pm++)
208 gbuf.len = prefix_len;
209 Strbuf_appendn(&gbuf, pl, pm - pl);
210 Strbuf_append(&gbuf, pe + 1);
211 Strbuf_terminate(&gbuf);
212 bb_append(&bb, Strsave(gbuf.s));
219 *bl = bb_finish(&bb);
226 expbrace(Char ***nvp, Char ***elp, int size)
228 Char **vl, **el, **nv, *s;
234 el = vl + blklen(vl);
236 for (s = *vl; s; s = *++vl) {
239 /* leave {} untouched for find */
240 if (s[0] == '{' && (s[1] == '\0' || (s[1] == '}' && s[2] == '\0')))
242 if (Strchr(s, '{') != NULL) {
246 if ((len = globbrace(s, &bl)) < 0)
247 stderror(ERR_MISSING, -len);
254 if (&el[len] >= &nv[size]) {
256 l = &el[len] - &nv[size];
257 size += GLOBSPACE > l ? GLOBSPACE : l;
260 nv = xrealloc(nv, size * sizeof(Char *));
261 *nvp = nv; /* To keep cleanups working */
274 for (bp = el; bp != vp; bp--)
284 for (bp = bl + 1; *bp; *vp++ = *bp++)
295 globexpand(Char **v, int noglob)
298 Char ***fnv, **vl, **el;
299 int size = GLOBSPACE;
302 fnv = xmalloc(sizeof(Char ***));
303 *fnv = vl = xmalloc(sizeof(Char *) * size);
305 cleanup_push(fnv, blk_indirect_cleanup);
308 * Step 1: expand backquotes.
310 while ((s = *v++) != '\0') {
311 if (Strchr(s, '`')) {
315 expanded = dobackp(s, 0);
316 for (i = 0; expanded[i] != NULL; i++) {
318 if (vl == &(*fnv)[size]) {
320 *fnv = xrealloc(*fnv, size * sizeof(Char *));
321 vl = &(*fnv)[size - GLOBSPACE];
328 if (vl == &(*fnv)[size]) {
330 *fnv = xrealloc(*fnv, size * sizeof(Char *));
331 vl = &(*fnv)[size - GLOBSPACE];
341 * Step 2: expand braces
344 expbrace(fnv, &el, size);
351 for (s = *vl; s; s = *++vl)
358 if ((ns = globequal(s)) == NULL) {
359 if (!adrof(STRnonomatch))
360 stderror(ERR_DEEP); /* Error */
363 /* Expansion succeeded */
374 * Step 4: expand .. if the variable symlinks==expand is set
376 if (symlinks == SYM_EXPAND) {
377 for (s = *vl; s; s = *++vl) {
378 *vl = dnormalize(s, 1);
392 handleone(Char *str, Char **vl, int action)
399 setname(short2str(str));
401 stderror(ERR_NAME | ERR_AMBIG);
405 for (t = vl; (p = *t++) != NULL; chars++)
407 str = xmalloc(chars * sizeof(Char));
408 for (t = vl, strp = str; (p = *t++) != '\0'; chars++) {
410 *strp++ = *p++ & TRIM;
417 str = Strsave(strip(*vl));
429 int gflgs = GLOB_QUOTE | GLOB_NOMAGIC | GLOB_ALTNOT;
432 int nonomatch = adrof(STRnonomatch) != 0, magic = 0, match = 0;
442 gflgs |= GLOB_NOCHECK;
445 ptr = short2qstr(*vl);
446 switch (glob(ptr, gflgs, 0, &globv)) {
450 stderror(ERR_NAME | ERR_GLOB);
459 if (globv.gl_flags & GLOB_MAGCHAR) {
460 match |= (globv.gl_matchc != 0);
463 gflgs |= GLOB_APPEND;
466 vl = (globv.gl_pathc == 0 || (magic && !match && !nonomatch)) ?
467 NULL : blk2short(globv.gl_pathv);
473 globone(Char *str, int action)
475 Char *v[2], **vl, **vo;
478 noglob = adrof(STRnoglob) != 0;
483 return (strip(Strsave(str)));
487 * Expand back-quote, tilde and brace
489 vo = globexpand(v, noglob);
490 if (noglob || (gflg & G_GLOB) == 0) {
494 cleanup_push(vo, blk_cleanup);
496 else if (noglob || (gflg & G_GLOB) == 0)
497 return (strip(Strsave(str)));
509 setname(short2str(str));
510 stderror(ERR_NAME | ERR_NOMATCH);
515 return (Strsave(STRNULL));
518 return (handleone(str, vl, action));
527 globall(Char **v, int gflg)
535 noglob = adrof(STRnoglob) != 0;
539 * Expand back-quote, tilde and brace
541 vl = vo = globexpand(v, noglob);
543 vl = vo = saveblk(v);
545 if (!noglob && (gflg & G_GLOB)) {
546 cleanup_push(vo, blk_cleanup);
559 glob_all_or_error(Char **v)
565 v = globall(v, gflag);
567 stderror(ERR_NAME | ERR_NOMATCH);
576 rscan(Char **t, void (*f) (Char))
580 while ((p = *t++) != '\0')
590 while ((p = *t++) != '\0')
602 while ((p = *t++) != '\0') {
603 if (*p == '~' || *p == '=')
605 else if (*p == '{' &&
606 (p[1] == '\0' || (p[1] == '}' && p[2] == '\0')))
613 * We do want to expand echo `echo '*'`, so we don't\
614 * use this piece of code anymore.
617 while (*p && *p != '`')
619 if (*p) /* Quoted chars */
624 if (!*p) /* The matching ` */
632 else if (symlinks == SYM_EXPAND &&
633 p[1] && ISDOTDOT(p) && (p == *(t-1) || *(p-1) == '/') )
642 * Command substitute cp. If literal, then this is a substitution from a
643 * << redirection, and so we should not crunch blanks and tabs, separating
644 * words only at newlines.
647 dobackp(Char *cp, int literal)
649 struct Strbuf word = Strbuf_INIT;
650 struct blk_buf bb = BLK_BUF_INIT;
653 cleanup_push(&bb, bb_cleanup);
654 cleanup_push(&word, Strbuf_cleanup);
656 for (lp = cp; *lp != '\0' && *lp != '`'; lp++)
658 Strbuf_appendn(&word, cp, lp - cp);
662 for (rp = lp; *rp && *rp != '`'; rp++)
671 stderror(ERR_UNMATCHED, '`');
673 ep = Strnsave(lp, rp - lp);
674 cleanup_push(ep, xfree);
675 backeval(&bb, &word, ep, literal);
683 return bb_finish(&bb);
688 backeval(struct blk_buf *bb, struct Strbuf *word, Char *cp, int literal)
692 struct command faket;
695 Char *fakecom[2], ibuf[BUFSIZE];
700 quoted = (literal || (cp[0] & QUOTE)) ? QUOTE : 0;
701 faket.t_dtyp = NODE_COMMAND;
702 faket.t_dflg = F_BACKQ;
706 faket.t_dcom = fakecom;
707 fakecom[0] = STRfakecom1;
711 * We do the psave job to temporarily change the current job so that the
712 * following fork is considered a separate job. This is so that when
713 * backquotes are used in a builtin function that calls glob the "current
714 * job" is not corrupted. We only need one level of pushed jobs as long as
715 * we are sure to fork here.
718 cleanup_push(&faket, psavejob_cleanup); /* faket is only a marker */
721 * It would be nicer if we could integrate this redirection more with the
722 * routines in sh.sem.c by doing a fake execute on a builtin function that
726 cleanup_push(&pvec[0], open_cleanup);
727 cleanup_push(&pvec[1], open_cleanup);
728 if (pfork(&faket, -1) == 0) {
734 (void) dmove(pvec[1], 1);
735 (void) dmove(SHDIAG, 2);
739 for (arginp = cp; *cp; cp++) {
741 if (is_set(STRcsubstnonl) && (*cp == '\n' || *cp == '\r'))
746 * In the child ``forget'' everything about current aliases or
754 omark = cleanup_push_mark();
763 cleanup_pop_mark(omark);
773 cleanup_push(¶ml, lex_cleanup);
777 t = syntax(paraml.next, ¶ml, 0);
778 cleanup_push(t, syntax_cleanup);
782 signal(SIGTSTP, SIG_IGN);
785 signal(SIGTTIN, SIG_IGN);
788 signal(SIGTTOU, SIG_IGN);
790 execute(t, -1, NULL, NULL, TRUE);
792 cleanup_until(¶ml);
795 cleanup_until(&pvec[1]);
808 icnt = xread(pvec[0], tmp, tibuf + BUFSIZE - tmp);
819 while (tmp < tibuf + icnt) {
822 len = normal_mbtowc(&ip[i], tmp, tibuf + icnt - tmp);
825 if (!eof && (size_t)(tibuf + icnt - tmp) < MB_CUR_MAX) {
826 break; /* Maybe a partial character */
828 ip[i] = (unsigned char) *tmp | INVALID_BYTE; /* Error */
836 memmove (tibuf, tmp, tibuf + icnt - tmp);
837 tmp = tibuf + (tibuf + icnt - tmp);
846 #if defined(WINNT_NATIVE) || defined(__CYGWIN__)
849 #endif /* WINNT_NATIVE || __CYGWIN__ */
852 * Continue around the loop one more time, so that we can eat
853 * the last newline without terminating this word.
858 if (!quoted && (c == ' ' || c == '\t'))
861 Strbuf_append1(word, c | quoted);
864 * Unless at end-of-file, we will form a new word here if there were
865 * characters in the word, or in any case when we take text literally.
866 * If we didn't make empty words here when literal was set then we
867 * would lose blank lines.
869 if (c != 0 && (cnt || literal))
874 cleanup_until(&pvec[0]);
876 cleanup_until(&faket); /* psavejob_cleanup(); */
880 pword(struct blk_buf *bb, struct Strbuf *word)
884 s = Strbuf_finish(word);
890 Gmatch(const Char *string, const Char *pattern)
892 return Gnmatch(string, pattern, NULL);
896 Gnmatch(const Char *string, const Char *pattern, const Char **endstr)
899 const Char *tstring = string;
900 int gpol = 1, gres = 0;
902 if (*pattern == '^') {
907 fblk = xmalloc(sizeof(Char ***));
908 *fblk = xmalloc(GLOBSPACE * sizeof(Char *));
909 (*fblk)[0] = Strsave(pattern);
912 cleanup_push(fblk, blk_indirect_cleanup);
913 expbrace(fblk, NULL, GLOBSPACE);
916 /* Exact matches only */
917 for (p = *fblk; *p; p++)
918 gres |= t_pmatch(string, *p, &tstring, 1) == 2 ? 1 : 0;
922 /* partial matches */
923 end = Strend(string);
924 for (p = *fblk; *p; p++)
925 if (t_pmatch(string, *p, &tstring, 1) != 0) {
934 return(gres == gpol);
938 * Return 2 on exact match,
939 * Return 1 on substring match.
940 * Return 0 on no match.
941 * *estr will point to the end of the longest exact or substring match.
944 t_pmatch(const Char *string, const Char *pattern, const Char **estr, int cs)
946 Char stringc, patternc, rangec;
947 int match, negate_range;
948 const Char *pestr, *nstring;
950 for (nstring = string;; string = nstring) {
951 stringc = *nstring++ & TRIM;
952 patternc = *pattern++ & TRIM;
956 return (stringc == '\0' ? 2 : 1);
963 *estr = Strend(string);
969 switch(t_pmatch(string, pattern, estr, cs)) {
973 pestr = *estr;/*FIXME: does not guarantee longest match */
978 abort(); /* Cannot happen */
980 stringc = *string++ & TRIM;
994 if ((negate_range = (*pattern == '^')) != 0)
996 while ((rangec = *pattern++ & TRIM) != '\0') {
1001 if (*pattern == '-' && pattern[1] != ']') {
1004 rangec2 = *pattern++ & TRIM;
1005 match = (globcharcoll(stringc, rangec2, 0) <= 0 &&
1006 globcharcoll(rangec, stringc, 0) <= 0);
1009 match = (stringc == rangec);
1012 stderror(ERR_NAME | ERR_MISSING, ']');
1013 if ((!match) && (stringc == '\0'))
1015 if (match == negate_range)
1019 if (cs ? patternc != stringc
1020 : Tolower(patternc) != Tolower(stringc))