2 * Copyright (c) 1989 The Regents of the University of California.
5 * This code is derived from software contributed to Berkeley by
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #if defined(LIBC_SCCS) && !defined(lint)
33 static char sccsid[] = "@(#)glob.c 5.12 (Berkeley) 6/24/91";
34 #endif /* LIBC_SCCS and not lint */
36 * Glob: the interface is a superset of the one defined in POSIX 1003.2,
39 * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
41 * Optional extra services, controlled by flags not defined by POSIX:
44 * Escaping convention: \ inhibits any special meaning the following
45 * character might have (except \ at end of string is retained).
47 * Set in gl_flags if pattern contained a globbing character.
49 * Use ^ instead of ! for "not".
51 * Number of matches in the current invocation of glob.
55 #pragma warning(disable:4244)
56 #endif /* WINNT_NATIVE */
64 #define mblen(_s,_n) mbrlen((_s),(_n),NULL)
75 #define S_ISDIR(a) (((a) & S_IFMT) == S_IFDIR)
78 #if !defined(S_ISLNK) && defined(S_IFLNK)
79 #define S_ISLNK(a) (((a) & S_IFMT) == S_IFLNK)
82 #if !defined(S_ISLNK) && !defined(lstat)
86 typedef unsigned short Char;
88 static int glob1 (Char *, glob_t *, int);
89 static int glob2 (struct strbuf *, const Char *, glob_t *, int);
90 static int glob3 (struct strbuf *, const Char *, const Char *,
91 const Char *, glob_t *, int);
92 static void globextend (const char *, glob_t *);
93 static int match (const char *, const Char *, const Char *,
95 static int compare (const void *, const void *);
96 static DIR *Opendir (const char *);
98 static int Lstat (const char *, struct stat *);
100 static int Stat (const char *, struct stat *sb);
101 static Char *Strchr (Char *, int);
103 static void qprintf (const char *, const Char *);
119 #define UNDERSCORE '_'
121 #define M_META 0x8000
122 #define M_PROTECT 0x4000
123 #define M_MASK 0xffff
124 #define M_ASCII 0x00ff
126 #define LCHAR(c) ((c)&M_ASCII)
127 #define META(c) ((c)|M_META)
128 #define M_ALL META('*')
129 #define M_END META(']')
130 #define M_NOT META('!')
131 #define M_ALTNOT META('^')
132 #define M_ONE META('?')
133 #define M_RNG META('-')
134 #define M_SET META('[')
135 #define ismeta(c) (((c)&M_META) != 0)
138 globcharcoll(__Char c1, __Char c2, int cs)
140 #if defined(NLS) && defined(LC_COLLATE) && defined(HAVE_STRCOLL)
141 # if defined(WIDE_STRINGS)
142 wchar_t s1[2], s2[2];
150 /* This should not be here, but I'll rather leave it in than engage in
151 a LC_COLLATE flamewar about a shell I don't use... */
152 if (iswlower(c1) && iswupper(c2))
154 if (iswupper(c1) && iswlower(c2))
159 s1[1] = s2[1] = '\0';
160 return wcscoll(s1, s2);
161 # else /* not WIDE_STRINGS */
167 * From kevin lyda <kevin@suberic.net>:
168 * strcoll does not guarantee case sorting, so we pre-process now:
171 c1 = islower(c1) ? c1 : tolower(c1);
172 c2 = islower(c2) ? c2 : tolower(c2);
174 if (islower(c1) && isupper(c2))
176 if (isupper(c1) && islower(c2))
181 s1[1] = s2[1] = '\0';
182 return strcoll(s1, s2);
190 * Need to dodge two kernel bugs:
191 * opendir("") != opendir(".")
192 * NAMEI_BUG: on plain files trailing slashes are ignored in some kernels.
193 * POSIX specifies that they should be ignored in directories.
197 Opendir(const char *str)
199 #if defined(hpux) || defined(__hpux)
204 return (opendir("."));
205 #if defined(hpux) || defined(__hpux)
207 * Opendir on some device files hangs, so avoid it
209 if (stat(str, &st) == -1 || !S_ISDIR(st.st_mode))
217 Lstat(const char *fn, struct stat *sb)
223 if (*fn != 0 && strend(fn)[-1] == '/' && !S_ISDIR(sb->st_mode))
225 # endif /* NAMEI_BUG */
233 Stat(const char *fn, struct stat *sb)
239 if (*fn != 0 && strend(fn)[-1] == '/' && !S_ISDIR(sb->st_mode))
241 #endif /* NAMEI_BUG */
246 Strchr(Char *str, int ch)
257 qprintf(const char *pre, const Char *s)
263 xprintf("%c", *p & 0xff);
264 xprintf("\n%s", pre);
266 xprintf("%c", *p & M_PROTECT ? '"' : ' ');
267 xprintf("\n%s", pre);
269 xprintf("%c", *p & M_META ? '_' : ' ');
275 compare(const void *p, const void *q)
277 #if defined(NLS) && defined(HAVE_STRCOLL)
278 return (strcoll(*(char *const *) p, *(char *const *) q));
280 return (strcmp(*(char *const *) p, *(char *const *) q));
281 #endif /* NLS && HAVE_STRCOLL */
285 * The main glob() routine: compiles the pattern (optionally processing
286 * quotes), calls glob1() to do the real pattern matching, and finally
287 * sorts the list (unless unsorted operation is requested). Returns 0
288 * if things went well, nonzero if errors occurred. It is not an error
289 * to find no matches.
292 glob(const char *pattern, int flags, int (*errfunc) (const char *, int),
296 Char *bufnext, m_not;
297 const unsigned char *patnext;
299 Char *qpatnext, *patbuf;
302 patnext = (const unsigned char *) pattern;
303 if (!(flags & GLOB_APPEND)) {
305 pglob->gl_pathv = NULL;
306 if (!(flags & GLOB_DOOFFS))
309 pglob->gl_flags = flags & ~GLOB_MAGCHAR;
310 pglob->gl_errfunc = errfunc;
311 oldpathc = pglob->gl_pathc;
312 pglob->gl_matchc = 0;
314 if (pglob->gl_flags & GLOB_ALTNOT) {
323 patbuf = xmalloc((strlen(pattern) + 1) * sizeof(*patbuf));
326 no_match = *patnext == not;
330 if (flags & GLOB_QUOTE) {
331 /* Protect the quoted characters */
332 while ((c = *patnext++) != EOS) {
336 len = mblen((const char *)(patnext - 1), MB_LEN_MAX);
338 TCSH_IGNORE(mblen(NULL, 0));
340 *bufnext++ = (Char) c;
342 *bufnext++ = (Char) (*patnext++ | M_PROTECT);
344 #endif /* WIDE_STRINGS */
346 if ((c = *patnext++) == EOS) {
350 *bufnext++ = (Char) (c | M_PROTECT);
353 *bufnext++ = (Char) c;
357 while ((c = *patnext++) != EOS)
358 *bufnext++ = (Char) c;
363 while ((c = *qpatnext++) != EOS) {
369 if (*qpatnext == EOS ||
370 Strchr(qpatnext + 1, RBRACKET) == NULL) {
371 *bufnext++ = LBRACKET;
376 pglob->gl_flags |= GLOB_MAGCHAR;
382 *bufnext++ = LCHAR(c);
383 if (*qpatnext == RANGE &&
384 (c = qpatnext[1]) != RBRACKET) {
386 *bufnext++ = LCHAR(c);
389 } while ((c = *qpatnext++) != RBRACKET);
393 pglob->gl_flags |= GLOB_MAGCHAR;
397 pglob->gl_flags |= GLOB_MAGCHAR;
398 /* collapse adjacent stars to one [or three if globstar],
399 * to avoid exponential behavior
401 if (bufnext == patbuf || bufnext[-1] != M_ALL ||
402 ((flags & GLOB_STAR) != 0 &&
403 (bufnext - 1 == patbuf || bufnext[-2] != M_ALL ||
404 bufnext - 2 == patbuf || bufnext[-3] != M_ALL)))
408 *bufnext++ = LCHAR(c);
414 qprintf("patbuf=", patbuf);
417 if ((err = glob1(patbuf, pglob, no_match)) != 0) {
423 * If there was no match we are going to append the pattern
424 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
425 * and the pattern did not contain any magic characters
426 * GLOB_NOMAGIC is there just for compatibility with csh.
428 if (pglob->gl_pathc == oldpathc &&
429 ((flags & GLOB_NOCHECK) ||
430 ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) {
431 if (!(flags & GLOB_QUOTE))
432 globextend(pattern, pglob);
437 /* copy pattern, interpreting quotes */
438 copy = xmalloc(strlen(pattern) + 1);
441 while (*src != EOS) {
442 /* Don't interpret quotes. The spec does not say we should do */
450 globextend(copy, pglob);
456 else if (!(flags & GLOB_NOSORT) && (pglob->gl_pathc != oldpathc))
457 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
458 pglob->gl_pathc - oldpathc, sizeof(char *), compare);
464 glob1(Char *pattern, glob_t *pglob, int no_match)
466 struct strbuf pathbuf = strbuf_INIT;
470 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4.
474 err = glob2(&pathbuf, pattern, pglob, no_match);
480 * functions glob2 and glob3 are mutually recursive; there is one level
481 * of recursion for each segment in the pattern that contains one or
482 * more meta characters.
485 glob2(struct strbuf *pathbuf, const Char *pattern, glob_t *pglob, int no_match)
493 * loop over pattern segments until end of pattern or until segment with
494 * meta character found.
498 if (*pattern == EOS) { /* end of pattern? */
499 strbuf_terminate(pathbuf);
501 if (Lstat(pathbuf->s, &sbuf))
504 if (((pglob->gl_flags & GLOB_MARK) &&
505 pathbuf->s[pathbuf->len - 1] != SEP) &&
506 (S_ISDIR(sbuf.st_mode)
508 || (S_ISLNK(sbuf.st_mode) &&
509 (Stat(pathbuf->s, &sbuf) == 0) &&
510 S_ISDIR(sbuf.st_mode))
513 strbuf_append1(pathbuf, SEP);
514 strbuf_terminate(pathbuf);
517 globextend(pathbuf->s, pglob);
521 /* find end of next segment, tentatively copy to pathbuf */
523 orig_len = pathbuf->len;
524 while (*p != EOS && *p != SEP) {
527 strbuf_append1(pathbuf, *p++);
530 if (!anymeta) { /* no expansion, do next segment */
532 while (*pattern == SEP)
533 strbuf_append1(pathbuf, *pattern++);
535 else { /* need expansion, recurse */
536 pathbuf->len = orig_len;
537 return (glob3(pathbuf, pattern, p, pattern, pglob, no_match));
544 One_Char_mbtowc(__Char *pwc, const Char *s, size_t n)
547 char buf[MB_LEN_MAX], *p;
552 while (p < buf + n && (*p++ = LCHAR(*s++)) != 0)
554 return one_mbtowc(pwc, buf, n);
562 glob3(struct strbuf *pathbuf, const Char *pattern, const Char *restpattern,
563 const Char *pglobstar, glob_t *pglob, int no_match)
569 Char m_not = (pglob->gl_flags & GLOB_ALTNOT) ? M_ALTNOT : M_NOT;
572 int chase_symlinks = 0;
573 const Char *termstar = NULL;
575 strbuf_terminate(pathbuf);
576 orig_len = pathbuf->len;
579 while (pglobstar < restpattern) {
581 size_t width = One_Char_mbtowc(&wc, pglobstar, MB_LEN_MAX);
582 if ((pglobstar[0] & M_MASK) == M_ALL &&
583 (pglobstar[width] & M_MASK) == M_ALL) {
585 chase_symlinks = (pglobstar[2 * width] & M_MASK) == M_ALL;
586 termstar = pglobstar + (2 + chase_symlinks) * width;
593 err = pglobstar==pattern && termstar==restpattern ?
594 *restpattern == EOS ?
595 glob2(pathbuf, restpattern - 1, pglob, no_match) :
596 glob2(pathbuf, restpattern + 1, pglob, no_match) :
597 glob3(pathbuf, pattern, restpattern, termstar, pglob, no_match);
600 pathbuf->len = orig_len;
601 strbuf_terminate(pathbuf);
604 if (*pathbuf->s && (Lstat(pathbuf->s, &sbuf) || !S_ISDIR(sbuf.st_mode)
606 && ((globstar && !chase_symlinks) || !S_ISLNK(sbuf.st_mode))
611 if (!(dirp = Opendir(pathbuf->s))) {
612 /* todo: don't call for ENOENT or ENOTDIR? */
613 if ((pglob->gl_errfunc && (*pglob->gl_errfunc) (pathbuf->s, errno)) ||
614 (pglob->gl_flags & GLOB_ERR))
620 /* search directory for matching names */
621 while ((dp = readdir(dirp)) != NULL) {
622 /* initial DOT must be matched literally */
623 if (dp->d_name[0] == DOT && *pattern != DOT)
624 if (!(pglob->gl_flags & GLOB_DOT) || !dp->d_name[1] ||
625 (dp->d_name[1] == DOT && !dp->d_name[2]))
626 continue; /*unless globdot and not . or .. */
627 pathbuf->len = orig_len;
628 strbuf_append(pathbuf, dp->d_name);
629 strbuf_terminate(pathbuf);
633 if (!chase_symlinks &&
634 (Lstat(pathbuf->s, &sbuf) || S_ISLNK(sbuf.st_mode)))
637 if (match(pathbuf->s + orig_len, pattern, termstar,
638 (int)m_not) == no_match)
640 strbuf_append1(pathbuf, SEP);
641 strbuf_terminate(pathbuf);
642 if ((err = glob2(pathbuf, pglobstar, pglob, no_match)) != 0)
645 if (match(pathbuf->s + orig_len, pattern, restpattern,
646 (int) m_not) == no_match)
648 if ((err = glob2(pathbuf, restpattern, pglob, no_match)) != 0)
652 /* todo: check error from readdir? */
659 * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
660 * add the new item, and update gl_pathc.
662 * This assumes the BSD realloc, which only copies the block when its size
663 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
666 * Return 0 if new item added, error code if memory couldn't be allocated.
668 * Invariant of the glob_t structure:
669 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
670 * gl_pathv points to (gl_offs + gl_pathc + 1) items.
673 globextend(const char *path, glob_t *pglob)
679 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
680 pathv = xrealloc(pglob->gl_pathv, newsize);
682 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
683 /* first time around -- clear initial gl_offs items */
684 pathv += pglob->gl_offs;
685 for (i = pglob->gl_offs; --i >= 0;)
688 pglob->gl_pathv = pathv;
690 pathv[pglob->gl_offs + pglob->gl_pathc++] = strsave(path);
691 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
695 * pattern matching function for filenames.
698 match(const char *name, const Char *pat, const Char *patend, int m_not)
700 int ok, negate_range;
702 const char *nameNext, *nameStart, *nameEnd;
706 nameStart = nameNext = name;
709 while (pat < patend || *name) {
713 c = *pat; /* Only for M_MASK bits */
717 pwk = One_Char_mbtowc(&wc, pat, MB_LEN_MAX);
718 lwk = one_mbtowc(&wk, name, MB_LEN_MAX);
719 switch (c & M_MASK) {
721 while ((*(pat + pwk) & M_MASK) == M_ALL) {
723 pwk = One_Char_mbtowc(&wc, pat, MB_LEN_MAX);
726 nameNext = name + lwk;
740 pwk = One_Char_mbtowc(&wc, pat, MB_LEN_MAX);
742 if ((negate_range = ((*pat & M_MASK) == m_not)) != 0) {
744 pwk = One_Char_mbtowc(&wc, pat, MB_LEN_MAX);
747 while ((*pat & M_MASK) != M_END) {
748 if ((*pat & M_MASK) == M_RNG) {
752 pwk = One_Char_mbtowc(&wc2, pat, MB_LEN_MAX);
753 if (globcharcoll(wc1, wk, 0) <= 0 &&
754 globcharcoll(wk, wc2, 0) <= 0)
760 pwk = One_Char_mbtowc(&wc, pat, MB_LEN_MAX);
763 pwk = One_Char_mbtowc(&wc, pat, MB_LEN_MAX);
764 if (ok == negate_range)
768 if (*name == EOS || samecase(wk) != samecase(wc))
774 if (nameNext != nameStart
775 && (nameEnd == NULL || nameNext <= nameEnd)) {
785 /* free allocated data belonging to a glob_t structure */
787 globfree(glob_t *pglob)
792 if (pglob->gl_pathv != NULL) {
793 pp = pglob->gl_pathv + pglob->gl_offs;
794 for (i = pglob->gl_pathc; i--; ++pp)
796 xfree(*pp), *pp = NULL;
797 xfree(pglob->gl_pathv), pglob->gl_pathv = NULL;