Import tcsh-6.17.00
[dragonfly.git] / contrib / tcsh-6 / glob.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Guido van Rossum.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32#if defined(LIBC_SCCS) && !defined(lint)
33static char sccsid[] = "@(#)glob.c 5.12 (Berkeley) 6/24/91";
34#endif /* LIBC_SCCS and not lint */
35/*
36 * Glob: the interface is a superset of the one defined in POSIX 1003.2,
37 * draft 9.
38 *
39 * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
40 *
41 * Optional extra services, controlled by flags not defined by POSIX:
42 *
43 * GLOB_QUOTE:
44 * Escaping convention: \ inhibits any special meaning the following
45 * character might have (except \ at end of string is retained).
46 * GLOB_MAGCHAR:
47 * Set in gl_flags if pattern contained a globbing character.
48 * GLOB_ALTNOT:
49 * Use ^ instead of ! for "not".
50 * gl_matchc:
51 * Number of matches in the current invocation of glob.
52 */
53
54#ifdef WINNT_NATIVE
55 #pragma warning(disable:4244)
56#endif /* WINNT_NATIVE */
57
58#define Char __Char
59#include "sh.h"
60#include "glob.h"
61
62#undef Char
63#undef QUOTE
64#undef TILDE
65#undef META
66#undef ismeta
67#undef Strchr
68
69#ifndef S_ISDIR
70#define S_ISDIR(a) (((a) & S_IFMT) == S_IFDIR)
71#endif
72
73#if !defined(S_ISLNK) && defined(S_IFLNK)
74#define S_ISLNK(a) (((a) & S_IFMT) == S_IFLNK)
75#endif
76
77#if !defined(S_ISLNK) && !defined(lstat)
78#define lstat stat
79#endif
80
81typedef unsigned short Char;
82
83static int glob1 (Char *, glob_t *, int);
84static int glob2 (struct strbuf *, const Char *, glob_t *, int);
85static int glob3 (struct strbuf *, const Char *, const Char *,
86 glob_t *, int);
87static void globextend (const char *, glob_t *);
88static int match (const char *, const Char *, const Char *,
89 int);
90static int compare (const void *, const void *);
91static DIR *Opendir (const char *);
92#ifdef S_IFLNK
93static int Lstat (const char *, struct stat *);
94#endif
95static int Stat (const char *, struct stat *sb);
96static Char *Strchr (Char *, int);
97#ifdef DEBUG
98static void qprintf (const Char *);
99#endif
100
101#define DOLLAR '$'
102#define DOT '.'
103#define EOS '\0'
104#define LBRACKET '['
105#define NOT '!'
106#define ALTNOT '^'
107#define QUESTION '?'
108#define QUOTE '\\'
109#define RANGE '-'
110#define RBRACKET ']'
111#define SEP '/'
112#define STAR '*'
113#define TILDE '~'
114#define UNDERSCORE '_'
115
116#define M_META 0x8000
117#define M_PROTECT 0x4000
118#define M_MASK 0xffff
119#define M_ASCII 0x00ff
120
121#define LCHAR(c) ((c)&M_ASCII)
122#define META(c) ((c)|M_META)
123#define M_ALL META('*')
124#define M_END META(']')
125#define M_NOT META('!')
126#define M_ALTNOT META('^')
127#define M_ONE META('?')
128#define M_RNG META('-')
129#define M_SET META('[')
130#define ismeta(c) (((c)&M_META) != 0)
131
132int
133globcharcoll(__Char c1, __Char c2, int cs)
134{
135#if defined(NLS) && defined(LC_COLLATE) && defined(HAVE_STRCOLL)
136# if defined(WIDE_STRINGS)
137 wchar_t s1[2], s2[2];
138
139 if (c1 == c2)
140 return (0);
141 if (cs) {
142 c1 = towlower(c1);
143 c2 = towlower(c2);
144 } else {
145 /* This should not be here, but I'll rather leave it in than engage in
146 a LC_COLLATE flamewar about a shell I don't use... */
147 if (iswlower(c1) && iswupper(c2))
148 return (1);
149 if (iswupper(c1) && iswlower(c2))
150 return (-1);
151 }
152 s1[0] = c1;
153 s2[0] = c2;
154 s1[1] = s2[1] = '\0';
155 return wcscoll(s1, s2);
156# else /* not WIDE_STRINGS */
157 char s1[2], s2[2];
158
159 if (c1 == c2)
160 return (0);
161 /*
162 * From kevin lyda <kevin@suberic.net>:
163 * strcoll does not guarantee case sorting, so we pre-process now:
164 */
165 if (cs) {
166 c1 = islower(c1) ? c1 : tolower(c1);
167 c2 = islower(c2) ? c2 : tolower(c2);
168 } else {
169 if (islower(c1) && isupper(c2))
170 return (1);
171 if (isupper(c1) && islower(c2))
172 return (-1);
173 }
174 s1[0] = c1;
175 s2[0] = c2;
176 s1[1] = s2[1] = '\0';
177 return strcoll(s1, s2);
178# endif
179#else
180 return (c1 - c2);
181#endif
182}
183
184/*
185 * Need to dodge two kernel bugs:
186 * opendir("") != opendir(".")
187 * NAMEI_BUG: on plain files trailing slashes are ignored in some kernels.
188 * POSIX specifies that they should be ignored in directories.
189 */
190
191static DIR *
192Opendir(const char *str)
193{
194#if defined(hpux) || defined(__hpux)
195 struct stat st;
196#endif
197
198 if (!*str)
199 return (opendir("."));
200#if defined(hpux) || defined(__hpux)
201 /*
202 * Opendir on some device files hangs, so avoid it
203 */
204 if (stat(str, &st) == -1 || !S_ISDIR(st.st_mode))
205 return NULL;
206#endif
207 return opendir(str);
208}
209
210#ifdef S_IFLNK
211static int
212Lstat(const char *fn, struct stat *sb)
213{
214 int st;
215
216 st = lstat(fn, sb);
217# ifdef NAMEI_BUG
218 if (*fn != 0 && strend(fn)[-1] == '/' && !S_ISDIR(sb->st_mode))
219 st = -1;
220# endif /* NAMEI_BUG */
221 return st;
222}
223#else
224#define Lstat Stat
225#endif /* S_IFLNK */
226
227static int
228Stat(const char *fn, struct stat *sb)
229{
230 int st;
231
232 st = stat(fn, sb);
233#ifdef NAMEI_BUG
234 if (*fn != 0 && strend(fn)[-1] == '/' && !S_ISDIR(sb->st_mode))
235 st = -1;
236#endif /* NAMEI_BUG */
237 return st;
238}
239
240static Char *
241Strchr(Char *str, int ch)
242{
243 do
244 if (*str == ch)
245 return (str);
246 while (*str++);
247 return (NULL);
248}
249
250#ifdef DEBUG
251static void
252qprintf(const Char *s)
253{
254 const Char *p;
255
256 for (p = s; *p; p++)
257 printf("%c", *p & 0xff);
258 printf("\n");
259 for (p = s; *p; p++)
260 printf("%c", *p & M_PROTECT ? '"' : ' ');
261 printf("\n");
262 for (p = s; *p; p++)
263 printf("%c", *p & M_META ? '_' : ' ');
264 printf("\n");
265}
266#endif /* DEBUG */
267
268static int
269compare(const void *p, const void *q)
270{
271#if defined(NLS) && defined(HAVE_STRCOLL)
272 return (strcoll(*(char *const *) p, *(char *const *) q));
273#else
274 return (strcmp(*(char *const *) p, *(char *const *) q));
275#endif /* NLS && HAVE_STRCOLL */
276}
277
278/*
279 * The main glob() routine: compiles the pattern (optionally processing
280 * quotes), calls glob1() to do the real pattern matching, and finally
281 * sorts the list (unless unsorted operation is requested). Returns 0
282 * if things went well, nonzero if errors occurred. It is not an error
283 * to find no matches.
284 */
285int
286glob(const char *pattern, int flags, int (*errfunc) (const char *, int),
287 glob_t *pglob)
288{
289 int err, oldpathc;
290 Char *bufnext, m_not;
291 const unsigned char *patnext;
292 int c, not;
293 Char *qpatnext, *patbuf;
294 int no_match;
295
296 patnext = (const unsigned char *) pattern;
297 if (!(flags & GLOB_APPEND)) {
298 pglob->gl_pathc = 0;
299 pglob->gl_pathv = NULL;
300 if (!(flags & GLOB_DOOFFS))
301 pglob->gl_offs = 0;
302 }
303 pglob->gl_flags = flags & ~GLOB_MAGCHAR;
304 pglob->gl_errfunc = errfunc;
305 oldpathc = pglob->gl_pathc;
306 pglob->gl_matchc = 0;
307
308 if (pglob->gl_flags & GLOB_ALTNOT) {
309 not = ALTNOT;
310 m_not = M_ALTNOT;
311 }
312 else {
313 not = NOT;
314 m_not = M_NOT;
315 }
316
317 patbuf = xmalloc((strlen(pattern) + 1) * sizeof(*patbuf));
318 bufnext = patbuf;
319
320 no_match = *patnext == not;
321 if (no_match)
322 patnext++;
323
324 if (flags & GLOB_QUOTE) {
325 /* Protect the quoted characters */
326 while ((c = *patnext++) != EOS) {
327#ifdef WIDE_STRINGS
328 int len;
329
330 len = mblen((const char *)(patnext - 1), MB_LEN_MAX);
331 if (len == -1)
332 (void)mblen(NULL, 0);
333 else if (len > 1) {
334 *bufnext++ = (Char) c;
335 while (--len != 0)
336 *bufnext++ = (Char) (*patnext++ | M_PROTECT);
337 } else
338#endif /* WIDE_STRINGS */
339 if (c == QUOTE) {
340 if ((c = *patnext++) == EOS) {
341 c = QUOTE;
342 --patnext;
343 }
344 *bufnext++ = (Char) (c | M_PROTECT);
345 }
346 else
347 *bufnext++ = (Char) c;
348 }
349 }
350 else
351 while ((c = *patnext++) != EOS)
352 *bufnext++ = (Char) c;
353 *bufnext = EOS;
354
355 bufnext = patbuf;
356 qpatnext = patbuf;
357 while ((c = *qpatnext++) != EOS) {
358 switch (c) {
359 case LBRACKET:
360 c = *qpatnext;
361 if (c == not)
362 ++qpatnext;
363 if (*qpatnext == EOS ||
364 Strchr(qpatnext + 1, RBRACKET) == NULL) {
365 *bufnext++ = LBRACKET;
366 if (c == not)
367 --qpatnext;
368 break;
369 }
370 pglob->gl_flags |= GLOB_MAGCHAR;
371 *bufnext++ = M_SET;
372 if (c == not)
373 *bufnext++ = m_not;
374 c = *qpatnext++;
375 do {
376 *bufnext++ = LCHAR(c);
377 if (*qpatnext == RANGE &&
378 (c = qpatnext[1]) != RBRACKET) {
379 *bufnext++ = M_RNG;
380 *bufnext++ = LCHAR(c);
381 qpatnext += 2;
382 }
383 } while ((c = *qpatnext++) != RBRACKET);
384 *bufnext++ = M_END;
385 break;
386 case QUESTION:
387 pglob->gl_flags |= GLOB_MAGCHAR;
388 *bufnext++ = M_ONE;
389 break;
390 case STAR:
391 pglob->gl_flags |= GLOB_MAGCHAR;
392 /* collapse adjacent stars to one, to avoid
393 * exponential behavior
394 */
395 if (bufnext == patbuf || bufnext[-1] != M_ALL)
396 *bufnext++ = M_ALL;
397 break;
398 default:
399 *bufnext++ = LCHAR(c);
400 break;
401 }
402 }
403 *bufnext = EOS;
404#ifdef DEBUG
405 qprintf(patbuf);
406#endif
407
408 if ((err = glob1(patbuf, pglob, no_match)) != 0) {
409 xfree(patbuf);
410 return (err);
411 }
412
413 /*
414 * If there was no match we are going to append the pattern
415 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
416 * and the pattern did not contain any magic characters
417 * GLOB_NOMAGIC is there just for compatibility with csh.
418 */
419 if (pglob->gl_pathc == oldpathc &&
420 ((flags & GLOB_NOCHECK) ||
421 ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) {
422 if (!(flags & GLOB_QUOTE))
423 globextend(pattern, pglob);
424 else {
425 char *copy, *dest;
426 const char *src;
427
428 /* copy pattern, interpreting quotes */
429 copy = xmalloc(strlen(pattern) + 1);
430 dest = copy;
431 src = pattern;
432 while (*src != EOS) {
433 if (*src == QUOTE) {
434 if (*++src == EOS)
435 --src;
436 }
437 *dest++ = *src++;
438 }
439 *dest = EOS;
440 globextend(copy, pglob);
441 xfree(copy);
442 }
443 xfree(patbuf);
444 return 0;
445 }
446 else if (!(flags & GLOB_NOSORT) && (pglob->gl_pathc != oldpathc))
447 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
448 pglob->gl_pathc - oldpathc, sizeof(char *), compare);
449 xfree(patbuf);
450 return (0);
451}
452
453static int
454glob1(Char *pattern, glob_t *pglob, int no_match)
455{
456 struct strbuf pathbuf = strbuf_INIT;
457 int err;
458
459 /*
460 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4.
461 */
462 if (*pattern == EOS)
463 return (0);
464 err = glob2(&pathbuf, pattern, pglob, no_match);
465 xfree(pathbuf.s);
466 return err;
467}
468
469/*
470 * functions glob2 and glob3 are mutually recursive; there is one level
471 * of recursion for each segment in the pattern that contains one or
472 * more meta characters.
473 */
474static int
475glob2(struct strbuf *pathbuf, const Char *pattern, glob_t *pglob, int no_match)
476{
477 struct stat sbuf;
478 int anymeta;
479 const Char *p;
480 size_t orig_len;
481
482 /*
483 * loop over pattern segments until end of pattern or until segment with
484 * meta character found.
485 */
486 anymeta = 0;
487 for (;;) {
488 if (*pattern == EOS) { /* end of pattern? */
489 strbuf_terminate(pathbuf);
490
491 if (Lstat(pathbuf->s, &sbuf))
492 return (0);
493
494 if (((pglob->gl_flags & GLOB_MARK) &&
495 pathbuf->s[pathbuf->len - 1] != SEP) &&
496 (S_ISDIR(sbuf.st_mode)
497#ifdef S_IFLNK
498 || (S_ISLNK(sbuf.st_mode) &&
499 (Stat(pathbuf->s, &sbuf) == 0) &&
500 S_ISDIR(sbuf.st_mode))
501#endif
502 )) {
503 strbuf_append1(pathbuf, SEP);
504 strbuf_terminate(pathbuf);
505 }
506 ++pglob->gl_matchc;
507 globextend(pathbuf->s, pglob);
508 return 0;
509 }
510
511 /* find end of next segment, tentatively copy to pathbuf */
512 p = pattern;
513 orig_len = pathbuf->len;
514 while (*p != EOS && *p != SEP) {
515 if (ismeta(*p))
516 anymeta = 1;
517 strbuf_append1(pathbuf, *p++);
518 }
519
520 if (!anymeta) { /* no expansion, do next segment */
521 pattern = p;
522 while (*pattern == SEP)
523 strbuf_append1(pathbuf, *pattern++);
524 }
525 else { /* need expansion, recurse */
526 pathbuf->len = orig_len;
527 return (glob3(pathbuf, pattern, p, pglob, no_match));
528 }
529 }
530 /* NOTREACHED */
531}
532
533
534static int
535glob3(struct strbuf *pathbuf, const Char *pattern, const Char *restpattern,
536 glob_t *pglob, int no_match)
537{
538 DIR *dirp;
539 struct dirent *dp;
540 int err;
541 Char m_not = (pglob->gl_flags & GLOB_ALTNOT) ? M_ALTNOT : M_NOT;
542 size_t orig_len;
543
544 strbuf_terminate(pathbuf);
545 errno = 0;
546
547 if (!(dirp = Opendir(pathbuf->s))) {
548 /* todo: don't call for ENOENT or ENOTDIR? */
549 if ((pglob->gl_errfunc && (*pglob->gl_errfunc) (pathbuf->s, errno)) ||
550 (pglob->gl_flags & GLOB_ERR))
551 return (GLOB_ABEND);
552 else
553 return (0);
554 }
555
556 err = 0;
557
558 orig_len = pathbuf->len;
559 /* search directory for matching names */
560 while ((dp = readdir(dirp)) != NULL) {
561 /* initial DOT must be matched literally */
562 if (dp->d_name[0] == DOT && *pattern != DOT)
563 continue;
564 pathbuf->len = orig_len;
565 strbuf_append(pathbuf, dp->d_name);
566 strbuf_terminate(pathbuf);
567 if (match(pathbuf->s + orig_len, pattern, restpattern, (int) m_not)
568 == no_match)
569 continue;
570 err = glob2(pathbuf, restpattern, pglob, no_match);
571 if (err)
572 break;
573 }
574 /* todo: check error from readdir? */
575 closedir(dirp);
576 return (err);
577}
578
579
580/*
581 * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
582 * add the new item, and update gl_pathc.
583 *
584 * This assumes the BSD realloc, which only copies the block when its size
585 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
586 * behavior.
587 *
588 * Return 0 if new item added, error code if memory couldn't be allocated.
589 *
590 * Invariant of the glob_t structure:
591 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
592 * gl_pathv points to (gl_offs + gl_pathc + 1) items.
593 */
594static void
595globextend(const char *path, glob_t *pglob)
596{
597 char **pathv;
598 int i;
599 size_t newsize;
600
601 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
602 pathv = xrealloc(pglob->gl_pathv, newsize);
603
604 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
605 /* first time around -- clear initial gl_offs items */
606 pathv += pglob->gl_offs;
607 for (i = pglob->gl_offs; --i >= 0;)
608 *--pathv = NULL;
609 }
610 pglob->gl_pathv = pathv;
611
612 pathv[pglob->gl_offs + pglob->gl_pathc++] = strsave(path);
613 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
614}
615
616static size_t
617One_Char_mbtowc(__Char *pwc, const Char *s, size_t n)
618{
619#ifdef WIDE_STRINGS
620 char buf[MB_LEN_MAX], *p;
621
622 if (n > MB_LEN_MAX)
623 n = MB_LEN_MAX;
624 p = buf;
625 while (p < buf + n && (*p++ = LCHAR(*s++)) != 0)
626 ;
627 return one_mbtowc(pwc, buf, n);
628#else
629 *pwc = *s & CHAR;
630 return 1;
631#endif
632}
633
634/*
635 * pattern matching function for filenames. Each occurrence of the *
636 * pattern causes a recursion level.
637 */
638static int
639match(const char *name, const Char *pat, const Char *patend, int m_not)
640{
641 int ok, negate_range;
642 Char c;
643
644 while (pat < patend) {
645 size_t lwk;
646 __Char wc, wk;
647
648 c = *pat; /* Only for M_MASK bits */
649 pat += One_Char_mbtowc(&wc, pat, MB_LEN_MAX);
650 lwk = one_mbtowc(&wk, name, MB_LEN_MAX);
651 switch (c & M_MASK) {
652 case M_ALL:
653 if (pat == patend)
654 return (1);
655 for (;;) {
656 if (match(name, pat, patend, m_not))
657 return (1);
658 if (*name == EOS)
659 break;
660 name += lwk;
661 lwk = one_mbtowc(&wk, name, MB_LEN_MAX);
662 }
663 return (0);
664 case M_ONE:
665 if (*name == EOS)
666 return (0);
667 name += lwk;
668 break;
669 case M_SET:
670 ok = 0;
671 if (*name == EOS)
672 return (0);
673 name += lwk;
674 if ((negate_range = ((*pat & M_MASK) == m_not)) != 0)
675 ++pat;
676 while ((*pat & M_MASK) != M_END) {
677 pat += One_Char_mbtowc(&wc, pat, MB_LEN_MAX);
678 if ((*pat & M_MASK) == M_RNG) {
679 __Char wc2;
680
681 pat++;
682 pat += One_Char_mbtowc(&wc2, pat, MB_LEN_MAX);
683 if (globcharcoll(wc, wk, 0) <= 0 &&
684 globcharcoll(wk, wc2, 0) <= 0)
685 ok = 1;
686 } else if (wc == wk)
687 ok = 1;
688 }
689 pat += One_Char_mbtowc(&wc, pat, MB_LEN_MAX);
690 if (ok == negate_range)
691 return (0);
692 break;
693 default:
694 name += lwk;
695 if (samecase(wk) != samecase(wc))
696 return (0);
697 break;
698 }
699 }
700 return (*name == EOS);
701}
702
703/* free allocated data belonging to a glob_t structure */
704void
705globfree(glob_t *pglob)
706{
707 int i;
708 char **pp;
709
710 if (pglob->gl_pathv != NULL) {
711 pp = pglob->gl_pathv + pglob->gl_offs;
712 for (i = pglob->gl_pathc; i--; ++pp)
713 if (*pp)
714 xfree(*pp), *pp = NULL;
715 xfree(pglob->gl_pathv), pglob->gl_pathv = NULL;
716 }
717}