1 /* $Header: /src/pub/tcsh/sh.glob.c,v 3.54 2002/07/04 19:28:29 christos Exp $ */
3 * sh.glob.c: Regular expression expansion
6 * Copyright (c) 1980, 1991 The Regents of the University of California.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 RCSID("$Id: sh.glob.c,v 3.54 2002/07/04 19:28:29 christos Exp $")
43 static int pargsiz, gargsiz;
48 #define G_NONE 0 /* No globbing needed */
49 #define G_GLOB 1 /* string contains *?[] characters */
50 #define G_CSH 2 /* string contains ~`{ characters */
52 #define GLOBSPACE 100 /* Alloc increment */
53 #define LONGBSIZE 10240 /* Backquote expansion buffer size */
68 * globbing is now done in two stages. In the first pass we expand
69 * csh globbing idioms ~`{ and then we proceed doing the normal
70 * globbing if needed ?*[
72 * Csh type globbing is handled in globexpand() and the rest is
73 * handled in glob() which is part of the 4.4BSD libc.
76 static Char *globtilde __P((Char **, Char *));
77 static Char *handleone __P((Char *, Char **, int));
78 static Char **libglob __P((Char **));
79 static Char **globexpand __P((Char **));
80 static int globbrace __P((Char *, Char *, Char ***));
81 static void expbrace __P((Char ***, Char ***, int));
82 static int pmatch __P((Char *, Char *, Char **));
83 static void pword __P((int));
84 static void psave __P((int));
85 static void backeval __P((Char *, bool));
91 Char gbuf[BUFSIZE], *gstart, *b, *u, *e;
99 for (b = gstart, e = &gbuf[BUFSIZE - 1];
100 *s && *s != '/' && *s != ':' && b < e;
104 if (gethdir(gstart)) {
105 if (adrof(STRnonomatch))
109 stderror(ERR_UNKUSER, short2str(gstart));
111 stderror(ERR_NOHOME);
113 b = &gstart[Strlen(gstart)];
115 slash = gstart[0] == '/' && gstart[1] == '\0';
123 if (slash && gstart[1] == '/')
126 return (Strsave(gstart));
137 * kfk - 17 Jan 1984 - stack hack allows user to get at arbitrary dir names
138 * in stack. PWP: let =foobar pass through (for X windows)
140 if (old[1] == '-' && (old[2] == '\0' || old[2] == '/')) {
145 else if (Isdigit(old[1])) {
148 for (b = &old[2]; Isdigit(*b); b++)
149 dig = dig * 10 + (*b - '0');
150 if (*b != '\0' && *b != '/')
151 /* =<number>foobar */
158 if (!getstakd(new, dig))
161 /* Copy the rest of the string */
162 for (d = &new[Strlen(new)];
163 d < &new[BUFSIZE - 1] && (*d++ = *b++) != '\0';)
175 Char *pm, *pe, *lm, *pl;
178 int size = GLOBSPACE;
180 nv = vl = (Char **) xmalloc((size_t) (sizeof(Char *) * size));
184 /* copy part up to the brace */
185 for (lm = gbuf, p = s; *p != LBRC; *lm++ = *p++)
188 /* check for balanced braces */
189 for (i = 0, pe = ++p; *pe; pe++)
191 if (Ismbyte1(*pe) && *(pe + 1) != EOS)
194 #endif /* DSPMBYTE */
196 /* Ignore everything between [] */
197 for (++pe; *pe != RBRK && *pe != EOS; pe++)
199 if (Ismbyte1(*pe) && *(pe + 1) != EOS)
202 #endif /* DSPMBYTE */
209 else if (*pe == LBRC)
211 else if (*pe == RBRC) {
217 if (i != 0 || *pe == '\0') {
222 for (i = 0, pl = pm = p; pm <= pe; pm++)
224 if (Ismbyte1(*pm) && pm + 1 <= pe)
227 #endif /* DSPMBYTE */
230 for (++pm; *pm != RBRK && *pm != EOS; pm++)
232 if (Ismbyte1(*pm) && *(pm + 1) != EOS)
235 #endif /* DSPMBYTE */
259 (void) Strcpy(lm, pl);
260 (void) Strcat(gbuf, pe + 1);
262 *vl++ = Strsave(gbuf);
265 if (vl == &nv[size]) {
267 nv = (Char **) xrealloc((ptr_t) nv,
268 (size_t) (size * sizeof(Char *)));
269 vl = &nv[size - GLOBSPACE];
283 expbrace(nvp, elp, size)
287 Char **vl, **el, **nv, *s;
293 for (el = vl; *el; el++)
296 for (s = *vl; s; s = *++vl) {
300 /* leave {} untouched for find */
301 if (s[0] == '{' && (s[1] == '\0' || (s[1] == '}' && s[2] == '\0')))
303 if ((b = Strchr(s, '{')) != NULL) {
307 #if defined (DSPMBYTE)
308 if (b != s && Ismbyte2(*b) && Ismbyte1(*(b-1))) {
309 /* The "{" is the 2nd byte of a MB character */
312 #endif /* DSPMBYTE */
313 if ((len = globbrace(s, b, &bl)) < 0) {
315 stderror(ERR_MISSING, -len);
323 if (&el[len] >= &nv[size]) {
325 l = (int) (&el[len] - &nv[size]);
326 size += GLOBSPACE > l ? GLOBSPACE : l;
329 nv = (Char **) xrealloc((ptr_t) nv,
330 (size_t) (size * sizeof(Char *)));
343 for (bp = el; bp != vp; bp--)
353 for (bp = bl + 1; *bp; *vp++ = *bp++)
369 Char **nv, **vl, **el;
370 int size = GLOBSPACE;
373 nv = vl = (Char **) xmalloc((size_t) (sizeof(Char *) * size));
377 * Step 1: expand backquotes.
379 while ((s = *v++) != '\0') {
380 if (Strchr(s, '`')) {
383 (void) dobackp(s, 0);
384 for (i = 0; i < pargc; i++) {
386 if (vl == &nv[size]) {
388 nv = (Char **) xrealloc((ptr_t) nv,
389 (size_t) (size * sizeof(Char *)));
390 vl = &nv[size - GLOBSPACE];
393 xfree((ptr_t) pargv);
398 if (vl == &nv[size]) {
400 nv = (Char **) xrealloc((ptr_t) nv,
401 (size_t) (size * sizeof(Char *)));
402 vl = &nv[size - GLOBSPACE];
412 * Step 2: expand braces
415 expbrace(&nv, &el, size);
422 for (s = *vl; s; s = *++vl)
424 Char gp[BUFSIZE], *ns;
426 *vl = globtilde(nv, s);
429 if ((ns = globequal(gp, s)) == NULL) {
430 if (!adrof(STRnonomatch)) {
437 /* Expansion succeeded */
448 * Step 4: expand .. if the variable symlinks==expand is set
450 if (symlinks == SYM_EXPAND) {
451 for (s = *vl; s; s = *++vl) {
452 *vl = dnormalize(s, 1);
462 handleone(str, vl, action)
473 setname(short2str(str));
475 stderror(ERR_NAME | ERR_AMBIG);
479 for (t = vlp; (p = *t++) != '\0'; chars++)
482 str = (Char *)xmalloc((size_t)(chars * sizeof(Char)));
483 for (t = vlp, strp = str; (p = *t++) != '\0'; chars++) {
485 *strp++ = *p++ & TRIM;
492 str = Strsave(strip(*vlp));
505 int gflgs = GLOB_QUOTE | GLOB_NOMAGIC | GLOB_ALTNOT;
508 int nonomatch = adrof(STRnonomatch) != 0, magic = 0, match = 0;
518 gflgs |= GLOB_NOCHECK;
521 ptr = short2qstr(*vl);
522 switch (glob(ptr, gflgs, 0, &globv)) {
526 stderror(ERR_NAME | ERR_GLOB);
535 if (globv.gl_flags & GLOB_MAGCHAR) {
536 match |= (globv.gl_matchc != 0);
539 gflgs |= GLOB_APPEND;
542 vl = (globv.gl_pathc == 0 || (magic && !match && !nonomatch)) ?
543 NULL : blk2short(globv.gl_pathv);
554 Char *v[2], **vl, **vo;
557 noglob = adrof(STRnoglob) != 0;
564 return (strip(Strsave(str)));
568 * Expand back-quote, tilde and brace
571 if (noglob || (gflg & G_GLOB) == 0) {
574 return (Strsave(STRNULL));
577 return (handleone(str, vo, action));
585 else if (noglob || (gflg & G_GLOB) == 0)
586 return (strip(Strsave(str)));
591 if ((gflg & G_CSH) && vl != vo)
594 setname(short2str(str));
595 stderror(ERR_NAME | ERR_NOMATCH);
599 return (Strsave(STRNULL));
602 return (handleone(str, vl, action));
619 gargc = blklen(gargv);
623 noglob = adrof(STRnoglob) != 0;
627 * Expand back-quote, tilde and brace
629 vl = vo = globexpand(v);
631 vl = vo = saveblk(v);
633 if (!noglob && (gflg & G_GLOB)) {
641 gargc = vl ? blklen(vl) : 0;
649 gargv = (Char **) xmalloc((size_t) (sizeof(Char *) * gargsiz));
657 void (*f) __P((int));
661 while ((p = *t++) != '\0')
672 while ((p = *t++) != '\0')
681 register Char *p, *c;
683 while ((p = *t++) != '\0') {
684 if (*p == '~' || *p == '=')
686 else if (*p == '{' &&
687 (p[1] == '\0' || (p[1] == '}' && p[2] == '\0')))
690 * The following line used to be *(c = p++), but hp broke their
691 * optimizer in 9.01, so we break the assignment into two pieces
692 * The careful reader here will note that *most* compiler workarounds
693 * in tcsh are either for apollo/DomainOS or hpux. Is it a coincidence?
695 while ( *(c = p) != '\0') {
701 * We do want to expand echo `echo '*'`, so we don't\
702 * use this piece of code anymore.
704 while (*p && *p != '`')
706 if (*p) /* Quoted chars */
711 if (*p) /* The matching ` */
721 else if (symlinks == SYM_EXPAND &&
722 *p && ISDOTDOT(c) && (c == *(t-1) || *(c-1) == '/') )
729 * Command substitute cp. If literal, then this is a substitution from a
730 * << redirection, and so we should not crunch blanks and tabs, separating
731 * words only at newlines.
738 register Char *lp, *rp;
739 Char *ep, word[LONGBSIZE];
748 pargv = (Char **) xmalloc((size_t) (sizeof(Char *) * pargsiz));
750 pargcp = pargs = word;
752 pnleft = LONGBSIZE - 4;
754 #if defined(DSPMBYTE)
755 for (lp = cp;; lp++) { /* } */
757 (lp-1 < cp || !Ismbyte2(*lp) || !Ismbyte1(*(lp-1)))) {
761 for (lp = cp; *lp != '`'; lp++) {
762 #endif /* DSPMBYTE */
771 for (rp = lp; *rp && *rp != '`'; rp++)
778 oops: stderror(ERR_UNMATCHED, '`');
781 backeval(ep, literal);
788 backeval(cp, literal)
792 register int icnt, c;
794 struct command faket;
797 Char *fakecom[2], ibuf[BUFSIZE];
802 quoted = (literal || (cp[0] & QUOTE)) ? QUOTE : 0;
803 faket.t_dtyp = NODE_COMMAND;
804 faket.t_dflg = F_BACKQ;
808 faket.t_dcom = fakecom;
809 fakecom[0] = STRfakecom1;
813 * We do the psave job to temporarily change the current job so that the
814 * following fork is considered a separate job. This is so that when
815 * backquotes are used in a builtin function that calls glob the "current
816 * job" is not corrupted. We only need one level of pushed jobs as long as
817 * we are sure to fork here.
822 * It would be nicer if we could integrate this redirection more with the
823 * routines in sh.sem.c by doing a fake execute on a builtin function that
827 if (pfork(&faket, -1) == 0) {
830 (void) close(pvec[0]);
831 (void) dmove(pvec[1], 1);
832 (void) dmove(SHDIAG, 2);
836 * Bugfix for nested backquotes by Michael Greim <greim@sbsvax.UUCP>,
837 * posted to comp.bugs.4bsd 12 Sep. 1989.
839 if (pargv) /* mg, 21.dec.88 */
840 blkfree(pargv), pargv = 0, pargsiz = 0;
843 for (arginp = cp; *cp; cp++) {
845 if (*cp == '\n' || *cp == '\r')
850 * In the child ``forget'' everything about current aliases or
861 t = syntax(paraml.next, ¶ml, 0);
865 t->t_dflg |= F_NOFORK;
867 (void) sigignore(SIGTSTP);
870 (void) sigignore(SIGTTIN);
873 (void) sigignore(SIGTTOU);
875 execute(t, -1, NULL, NULL, TRUE);
879 (void) close(pvec[1]);
891 icnt = read(pvec[0], tibuf, BUFSIZE);
892 while (icnt == -1 && errno == EINTR);
897 for (i = 0; i < icnt; i++)
898 ip[i] = (unsigned char) tibuf[i];
909 #endif /* WINNT_NATIVE */
912 * Continue around the loop one more time, so that we can eat
913 * the last newline without terminating this word.
918 if (!quoted && (c == ' ' || c == '\t'))
924 * Unless at end-of-file, we will form a new word here if there were
925 * characters in the word, or in any case when we take text literally.
926 * If we didn't make empty words here when literal was set then we
927 * would lose blank lines.
929 if (c != -1 && (cnt || literal))
933 (void) close(pvec[0]);
943 stderror(ERR_WTOOLONG);
944 *pargcp++ = (Char) c;
952 if (pargc == pargsiz - 1) {
953 pargsiz += GLOBSPACE;
954 pargv = (Char **) xrealloc((ptr_t) pargv,
955 (size_t) (pargsiz * sizeof(Char *)));
957 pargv[pargc++] = Strsave(pargs);
964 Gmatch(string, pattern)
965 Char *string, *pattern;
967 return Gnmatch(string, pattern, NULL);
971 Gnmatch(string, pattern, endstr)
972 Char *string, *pattern, **endstr;
974 Char **blk, **p, *tstring = string;
975 int gpol = 1, gres = 0;
977 if (*pattern == '^') {
982 blk = (Char **) xmalloc((size_t) (GLOBSPACE * sizeof(Char *)));
983 blk[0] = Strsave(pattern);
986 expbrace(&blk, NULL, GLOBSPACE);
989 /* Exact matches only */
990 for (p = blk; *p; p++)
991 gres |= pmatch(string, *p, &tstring) == 2 ? 1 : 0;
993 /* partial matches */
994 int minc = 0x7fffffff;
995 for (p = blk; *p; p++)
996 if (pmatch(string, *p, &tstring) != 0) {
997 int t = (int) (tstring - string);
999 if (minc == -1 || minc > t)
1002 *endstr = string + minc;
1006 return(gres == gpol);
1010 * Return 2 on exact match,
1011 * Return 1 on substring match.
1012 * Return 0 on no match.
1013 * *estr will point to the end of the longest exact or substring match.
1016 pmatch(string, pattern, estr)
1017 register Char *string, *pattern, **estr;
1019 register Char stringc, patternc;
1020 int match, negate_range;
1021 Char rangec, *oestr, *pestr;
1024 stringc = *string & TRIM;
1026 * apollo compiler bug: switch (patternc = *pattern++) dies
1028 patternc = *pattern++;
1032 return (stringc == 0 ? 2 : 1);
1040 while (*string) string++;
1048 switch(pmatch(string, pattern, estr)) {
1057 abort(); /* Cannot happen */
1074 if ((negate_range = (*pattern == '^')) != 0)
1076 while ((rangec = *pattern++) != '\0') {
1081 if (rangec == '-' && *(pattern-2) != '[' && *pattern != ']') {
1082 match = (globcharcoll(stringc, *pattern & TRIM) <= 0 &&
1083 globcharcoll(*(pattern-2) & TRIM, stringc) <= 0);
1087 match = (stringc == (rangec & TRIM));
1090 stderror(ERR_NAME | ERR_MISSING, ']');
1091 if (match == negate_range)
1096 if ((patternc & TRIM) != stringc)
1108 register Char *p, *q;
1115 n = (int) ((p - s1) + (q - s2) - 1);
1116 if (++gargc >= gargsiz) {
1117 gargsiz += GLOBSPACE;
1118 gargv = (Char **) xrealloc((ptr_t) gargv,
1119 (size_t) (gargsiz * sizeof(Char *)));
1122 p = gargv[gargc - 1] = (Char *) xmalloc((size_t) (n * sizeof(Char)));
1123 for (q = s1; (*p++ = *q++) != '\0';)
1125 for (p--, q = s2; (*p++ = *q++) != '\0';)
1129 #if defined(FILEC) && defined(TIOCSTI)
1132 register Char **a, **b;
1134 if (!a) /* check for NULL */
1139 if (!*a) /* check for NULL */
1140 return (*b ? 1 : 0);
1144 return (int) collate(*a, *b);