2 * sh.glob.c: Regular expression expansion
5 * Copyright (c) 1980, 1991 The Regents of the University of California.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 #define G_NONE 0 /* No globbing needed */
42 #define G_GLOB 1 /* string contains *?[] characters */
43 #define G_CSH 2 /* string contains ~`{ characters */
45 #define GLOBSPACE 100 /* Alloc increment */
55 * globbing is now done in two stages. In the first pass we expand
56 * csh globbing idioms ~`{ and then we proceed doing the normal
57 * globbing if needed ?*[
59 * Csh type globbing is handled in globexpand() and the rest is
60 * handled in glob() which is part of the 4.4BSD libc.
63 static Char *globtilde (Char *);
64 static Char *handleone (Char *, Char **, int);
65 static Char **libglob (Char **);
66 static Char **globexpand (Char **, int);
67 static int globbrace (const Char *, Char ***);
68 static void expbrace (Char ***, Char ***, int);
69 static void pword (struct blk_buf *, struct Strbuf *);
70 static void backeval (struct blk_buf *, struct Strbuf *, Char *,
75 Char *name, *u, *home, *res;
78 for (s++; *s && *s != '/' && *s != ':'; s++)
80 name = Strnsave(u + 1, s - (u + 1));
81 cleanup_push(name, xfree);
84 if (adrof(STRnonomatch)) {
89 stderror(ERR_UNKUSER, short2str(name));
94 if (home[0] == '/' && home[1] == '\0' && s[0] == '/')
97 res = Strspl(home, s);
103 /* Returns a newly allocated string, old or NULL */
112 * kfk - 17 Jan 1984 - stack hack allows user to get at arbitrary dir names
113 * in stack. PWP: let =foobar pass through (for X windows)
115 if (old[1] == '-' && (old[2] == '\0' || old[2] == '/')) {
117 const Char *olddir = varval (STRowd);
119 if (olddir && *olddir &&
120 !dcwd->di_next->di_name && !dcwd->di_prev->di_name)
121 return Strspl(olddir, &old[2]);
125 else if (Isdigit(old[1])) {
128 for (b = &old[2]; Isdigit(*b); b++)
129 dig = dig * 10 + (*b - '0');
130 if (*b != '\0' && *b != '/')
131 /* =<number>foobar */
141 return Strspl(dir, b);
145 globbrace(const Char *s, Char ***bl)
147 struct Strbuf gbuf = Strbuf_INIT;
148 struct blk_buf bb = BLK_BUF_INIT;
150 const Char *p, *pm, *pe, *pl;
153 /* copy part up to the brace */
154 for (p = s; *p != LBRC; p++)
158 /* check for balanced braces */
159 for (i = 0, pe = ++p; *pe; pe++)
161 /* Ignore everything between [] */
162 for (++pe; *pe != RBRK && *pe != EOS; pe++)
167 else if (*pe == LBRC)
169 else if (*pe == RBRC) {
175 if (i != 0 || *pe == '\0')
178 Strbuf_appendn(&gbuf, s, prefix_len);
180 for (i = 0, pl = pm = p; pm <= pe; pm++)
183 for (++pm; *pm != RBRK && *pm != EOS; pm++)
204 gbuf.len = prefix_len;
205 Strbuf_appendn(&gbuf, pl, pm - pl);
206 Strbuf_append(&gbuf, pe + 1);
207 Strbuf_terminate(&gbuf);
208 bb_append(&bb, Strsave(gbuf.s));
215 *bl = bb_finish(&bb);
222 expbrace(Char ***nvp, Char ***elp, int size)
224 Char **vl, **el, **nv, *s;
230 el = vl + blklen(vl);
232 for (s = *vl; s; s = *++vl) {
235 /* leave {} untouched for find */
236 if (s[0] == '{' && (s[1] == '\0' || (s[1] == '}' && s[2] == '\0')))
238 if (Strchr(s, '{') != NULL) {
242 if ((len = globbrace(s, &bl)) < 0)
243 stderror(ERR_MISSING, -len);
250 if (&el[len] >= &nv[size]) {
252 l = &el[len] - &nv[size];
253 size += GLOBSPACE > l ? GLOBSPACE : l;
256 nv = xrealloc(nv, size * sizeof(Char *));
257 *nvp = nv; /* To keep cleanups working */
270 for (bp = el; bp != vp; bp--)
280 for (bp = bl + 1; *bp; *vp++ = *bp++)
291 globexpand(Char **v, int noglob)
294 Char ***fnv, **vl, **el;
295 int size = GLOBSPACE;
298 fnv = xmalloc(sizeof(Char ***));
299 *fnv = vl = xmalloc(sizeof(Char *) * size);
301 cleanup_push(fnv, blk_indirect_cleanup);
304 * Step 1: expand backquotes.
306 while ((s = *v++) != NULL) {
307 if (Strchr(s, '`')) {
311 expanded = dobackp(s, 0);
312 for (i = 0; expanded[i] != NULL; i++) {
314 if (vl == &(*fnv)[size]) {
316 *fnv = xrealloc(*fnv, size * sizeof(Char *));
317 vl = &(*fnv)[size - GLOBSPACE];
324 if (vl == &(*fnv)[size]) {
326 *fnv = xrealloc(*fnv, size * sizeof(Char *));
327 vl = &(*fnv)[size - GLOBSPACE];
337 * Step 2: expand braces
340 expbrace(fnv, &el, size);
347 for (s = *vl; s; s = *++vl)
354 if ((ns = globequal(s)) == NULL) {
355 if (!adrof(STRnonomatch))
356 stderror(ERR_DEEP); /* Error */
359 /* Expansion succeeded */
370 * Step 4: expand .. if the variable symlinks==expand is set
372 if (symlinks == SYM_EXPAND) {
373 for (s = *vl; s; s = *++vl) {
374 *vl = dnormalize(s, 1);
388 handleone(Char *str, Char **vl, int action)
395 setname(short2str(str));
397 stderror(ERR_NAME | ERR_AMBIG);
401 for (t = vl; (p = *t++) != NULL; chars++)
403 str = xmalloc(chars * sizeof(Char));
404 for (t = vl, strp = str; (p = *t++) != NULL; chars++) {
406 *strp++ = *p++ & TRIM;
413 str = Strsave(strip(*vl));
425 int gflgs = GLOB_QUOTE | GLOB_NOMAGIC | GLOB_ALTNOT;
428 int nonomatch = adrof(STRnonomatch) != 0, magic = 0, match = 0;
430 if (adrof(STRglobdot))
433 if (adrof(STRglobstar))
444 gflgs |= GLOB_NOCHECK;
447 ptr = short2qstr(*vl);
448 switch (glob(ptr, gflgs, 0, &globv)) {
452 stderror(ERR_NAME | ERR_GLOB);
461 if (globv.gl_flags & GLOB_MAGCHAR) {
462 match |= (globv.gl_matchc != 0);
465 gflgs |= GLOB_APPEND;
468 vl = (globv.gl_pathc == 0 || (magic && !match && !nonomatch)) ?
469 NULL : blk2short(globv.gl_pathv);
475 globone(Char *str, int action)
477 Char *v[2], **vl, **vo;
480 noglob = adrof(STRnoglob) != 0;
485 return (strip(Strsave(str)));
489 * Expand back-quote, tilde and brace
491 vo = globexpand(v, noglob);
492 if (noglob || (gflg & G_GLOB) == 0) {
496 cleanup_push(vo, blk_cleanup);
498 else if (noglob || (gflg & G_GLOB) == 0)
499 return (strip(Strsave(str)));
511 setname(short2str(str));
512 stderror(ERR_NAME | ERR_NOMATCH);
515 if (vl && vl[0] == NULL) {
517 return (Strsave(STRNULL));
520 return (handleone(str, vl, action));
529 globall(Char **v, int gflg)
537 noglob = adrof(STRnoglob) != 0;
541 * Expand back-quote, tilde and brace
543 vl = vo = globexpand(v, noglob);
545 vl = vo = saveblk(v);
547 if (!noglob && (gflg & G_GLOB)) {
548 cleanup_push(vo, blk_cleanup);
561 glob_all_or_error(Char **v)
567 v = globall(v, gflag);
569 stderror(ERR_NAME | ERR_NOMATCH);
578 rscan(Char **t, void (*f) (Char))
582 while ((p = *t++) != NULL)
592 while ((p = *t++) != NULL)
594 #if INVALID_BYTE != 0
595 if ((*p & INVALID_BYTE) != INVALID_BYTE) /* *p < INVALID_BYTE */
609 while ((p = *t++) != NULL) {
610 if (*p == '~' || *p == '=')
612 else if (*p == '{' &&
613 (p[1] == '\0' || (p[1] == '}' && p[2] == '\0')))
620 * We do want to expand echo `echo '*'`, so we don't\
621 * use this piece of code anymore.
624 while (*p && *p != '`')
626 if (*p) /* Quoted chars */
631 if (!*p) /* The matching ` */
639 else if (symlinks == SYM_EXPAND &&
640 p[1] && ISDOTDOT(p) && (p == *(t-1) || *(p-1) == '/') )
649 * Command substitute cp. If literal, then this is a substitution from a
650 * << redirection, and so we should not crunch blanks and tabs, separating
651 * words only at newlines.
654 dobackp(Char *cp, int literal)
656 struct Strbuf word = Strbuf_INIT;
657 struct blk_buf bb = BLK_BUF_INIT;
660 cleanup_push(&bb, bb_cleanup);
661 cleanup_push(&word, Strbuf_cleanup);
663 for (lp = cp; *lp != '\0' && *lp != '`'; lp++)
665 Strbuf_appendn(&word, cp, lp - cp);
669 for (rp = lp; *rp && *rp != '`'; rp++)
678 stderror(ERR_UNMATCHED, '`');
680 ep = Strnsave(lp, rp - lp);
681 cleanup_push(ep, xfree);
682 backeval(&bb, &word, ep, literal);
690 return bb_finish(&bb);
695 backeval(struct blk_buf *bb, struct Strbuf *word, Char *cp, int literal)
699 struct command faket;
702 Char *fakecom[2], ibuf[BUFSIZE];
707 for (ip = cp; (*ip & QUOTE) != 0; ip++)
709 quoted = *ip == '\0';
712 faket.t_dtyp = NODE_COMMAND;
713 faket.t_dflg = F_BACKQ;
717 faket.t_dcom = fakecom;
718 fakecom[0] = STRfakecom1;
722 * We do the psave job to temporarily change the current job so that the
723 * following fork is considered a separate job. This is so that when
724 * backquotes are used in a builtin function that calls glob the "current
725 * job" is not corrupted. We only need one level of pushed jobs as long as
726 * we are sure to fork here.
729 cleanup_push(&faket, psavejob_cleanup); /* faket is only a marker */
732 * It would be nicer if we could integrate this redirection more with the
733 * routines in sh.sem.c by doing a fake execute on a builtin function that
737 cleanup_push(&pvec[0], open_cleanup);
738 cleanup_push(&pvec[1], open_cleanup);
739 if (pfork(&faket, -1) == 0) {
745 (void) dmove(pvec[1], 1);
746 (void) dmove(SHDIAG, 2);
750 for (arginp = cp; *cp; cp++) {
752 if (is_set(STRcsubstnonl) && (*cp == '\n' || *cp == '\r'))
757 * In the child ``forget'' everything about current aliases or
765 omark = cleanup_push_mark();
768 struct wordent paraml1;
777 cleanup_pop_mark(omark);
787 (void) lex(¶ml1);
788 cleanup_push(¶ml1, lex_cleanup);
792 t = syntax(paraml1.next, ¶ml1, 0);
793 cleanup_push(t, syntax_cleanup);
794 /* The F_BACKQ flag must set so the job output is correct if
795 * printexitvalue is set. If it's not set, the job output
796 * will have "Exit N" appended where N is the exit status. */
798 t->t_dflg = F_BACKQ|F_NOFORK;
802 signal(SIGTSTP, SIG_IGN);
805 signal(SIGTTIN, SIG_IGN);
808 signal(SIGTTOU, SIG_IGN);
810 execute(t, -1, NULL, NULL, TRUE);
812 cleanup_until(¶ml1);
815 cleanup_until(&pvec[1]);
824 icnt = wide_read(pvec[0], ibuf, BUFSIZE, 0);
834 #if defined(WINNT_NATIVE) || defined(__CYGWIN__)
837 #endif /* WINNT_NATIVE || __CYGWIN__ */
840 * Continue around the loop one more time, so that we can eat
841 * the last newline without terminating this word.
846 if (!quoted && (c == ' ' || c == '\t'))
849 if (c == '\\' || quoted)
851 Strbuf_append1(word, c);
854 * Unless at end-of-file, we will form a new word here if there were
855 * characters in the word, or in any case when we take text literally.
856 * If we didn't make empty words here when literal was set then we
857 * would lose blank lines.
859 if (c != 0 && (cnt || literal))
864 cleanup_until(&pvec[0]);
866 cleanup_until(&faket); /* psavejob_cleanup(); */
870 pword(struct blk_buf *bb, struct Strbuf *word)
874 s = Strbuf_finish(word);
880 Gmatch(const Char *string, const Char *pattern)
882 return Gnmatch(string, pattern, NULL);
886 Gnmatch(const Char *string, const Char *pattern, const Char **endstr)
889 const Char *tstring = string;
890 int gpol = 1, gres = 0;
892 if (*pattern == '^') {
897 fblk = xmalloc(sizeof(Char ***));
898 *fblk = xmalloc(GLOBSPACE * sizeof(Char *));
899 (*fblk)[0] = Strsave(pattern);
902 cleanup_push(fblk, blk_indirect_cleanup);
903 expbrace(fblk, NULL, GLOBSPACE);
906 /* Exact matches only */
907 for (p = *fblk; *p; p++)
908 gres |= t_pmatch(string, *p, &tstring, 1) == 2 ? 1 : 0;
912 /* partial matches */
913 end = Strend(string);
914 for (p = *fblk; *p; p++)
915 if (t_pmatch(string, *p, &tstring, 1) != 0) {
924 return(gres == gpol);
928 * Return 2 on exact match,
929 * Return 1 on substring match.
930 * Return 0 on no match.
931 * *estr will point to the end of the longest exact or substring match.
934 t_pmatch(const Char *string, const Char *pattern, const Char **estr, int cs)
936 Char stringc, patternc, rangec;
937 int match, negate_range;
938 const Char *pestr, *nstring;
940 for (nstring = string;; string = nstring) {
941 stringc = *nstring++ & TRIM;
942 patternc = *pattern++ & TRIM;
946 return (stringc == '\0' ? 2 : 1);
953 *estr = Strend(string);
959 switch(t_pmatch(string, pattern, estr, cs)) {
963 pestr = *estr;/*FIXME: does not guarantee longest match */
968 abort(); /* Cannot happen */
970 stringc = *string++ & TRIM;
984 if ((negate_range = (*pattern == '^')) != 0)
986 while ((rangec = *pattern++ & TRIM) != '\0') {
991 if (*pattern == '-' && pattern[1] != ']') {
994 rangec2 = *pattern++ & TRIM;
995 match = (globcharcoll(stringc, rangec2, 0) <= 0 &&
996 globcharcoll(rangec, stringc, 0) <= 0);
999 match = (stringc == rangec);
1002 stderror(ERR_NAME | ERR_MISSING, ']');
1003 if ((!match) && (stringc == '\0'))
1005 if (match == negate_range)
1009 if (cs ? patternc != stringc
1010 : Tolower(patternc) != Tolower(stringc))