/*- * Copyright (c) 2003-2007 Tim Kientzle * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD$"); #ifdef HAVE_STRING_H #include #endif #ifdef HAVE_WCHAR_H #include #endif #include "archive_pathmatch.h" /* * Check whether a character 'c' is matched by a list specification [...]: * * Leading '!' or '^' negates the class. * * - is a range of characters * * \ removes any special meaning for * * Some interesting boundary cases: * a-d-e is one range (a-d) followed by two single characters - and e. * \a-\d is same as a-d * a\-d is three single characters: a, d, - * Trailing - is not special (so [a-] is two characters a and -). * Initial - is not special ([a-] is same as [-a] is same as [\\-a]) * This function never sees a trailing \. * [] always fails * [!] always succeeds */ static int pm_list(const char *start, const char *end, const char c, int flags) { const char *p = start; char rangeStart = '\0', nextRangeStart; int match = 1, nomatch = 0; /* This will be used soon... */ (void)flags; /* UNUSED */ /* If this is a negated class, return success for nomatch. */ if ((*p == '!' || *p == '^') && p < end) { match = 0; nomatch = 1; ++p; } while (p < end) { nextRangeStart = '\0'; switch (*p) { case '-': /* Trailing or initial '-' is not special. */ if ((rangeStart == '\0') || (p == end - 1)) { if (*p == c) return (match); } else { char rangeEnd = *++p; if (rangeEnd == '\\') rangeEnd = *++p; if ((rangeStart <= c) && (c <= rangeEnd)) return (match); } break; case '\\': ++p; /* Fall through */ default: if (*p == c) return (match); nextRangeStart = *p; /* Possible start of range. */ } rangeStart = nextRangeStart; ++p; } return (nomatch); } static int pm_list_w(const wchar_t *start, const wchar_t *end, const wchar_t c, int flags) { const wchar_t *p = start; wchar_t rangeStart = L'\0', nextRangeStart; int match = 1, nomatch = 0; /* This will be used soon... */ (void)flags; /* UNUSED */ /* If this is a negated class, return success for nomatch. */ if ((*p == L'!' || *p == L'^') && p < end) { match = 0; nomatch = 1; ++p; } while (p < end) { nextRangeStart = L'\0'; switch (*p) { case L'-': /* Trailing or initial '-' is not special. */ if ((rangeStart == L'\0') || (p == end - 1)) { if (*p == c) return (match); } else { wchar_t rangeEnd = *++p; if (rangeEnd == L'\\') rangeEnd = *++p; if ((rangeStart <= c) && (c <= rangeEnd)) return (match); } break; case L'\\': ++p; /* Fall through */ default: if (*p == c) return (match); nextRangeStart = *p; /* Possible start of range. */ } rangeStart = nextRangeStart; ++p; } return (nomatch); } /* * If s is pointing to "./", ".//", "./././" or the like, skip it. */ static const char * pm_slashskip(const char *s) { while ((*s == '/') || (s[0] == '.' && s[1] == '/') || (s[0] == '.' && s[1] == '\0')) ++s; return (s); } static const wchar_t * pm_slashskip_w(const wchar_t *s) { while ((*s == L'/') || (s[0] == L'.' && s[1] == L'/') || (s[0] == L'.' && s[1] == L'\0')) ++s; return (s); } static int pm(const char *p, const char *s, int flags) { const char *end; /* * Ignore leading './', './/', '././', etc. */ if (s[0] == '.' && s[1] == '/') s = pm_slashskip(s + 1); if (p[0] == '.' && p[1] == '/') p = pm_slashskip(p + 1); for (;;) { switch (*p) { case '\0': if (s[0] == '/') { if (flags & PATHMATCH_NO_ANCHOR_END) return (1); /* "dir" == "dir/" == "dir/." */ s = pm_slashskip(s); } return (*s == '\0'); case '?': /* ? always succeeds, unless we hit end of 's' */ if (*s == '\0') return (0); break; case '*': /* "*" == "**" == "***" ... */ while (*p == '*') ++p; /* Trailing '*' always succeeds. */ if (*p == '\0') return (1); while (*s) { if (archive_pathmatch(p, s, flags)) return (1); ++s; } return (0); case '[': /* * Find the end of the [...] character class, * ignoring \] that might occur within the class. */ end = p + 1; while (*end != '\0' && *end != ']') { if (*end == '\\' && end[1] != '\0') ++end; ++end; } if (*end == ']') { /* We found [...], try to match it. */ if (!pm_list(p + 1, end, *s, flags)) return (0); p = end; /* Jump to trailing ']' char. */ break; } else /* No final ']', so just match '['. */ if (*p != *s) return (0); break; case '\\': /* Trailing '\\' matches itself. */ if (p[1] == '\0') { if (*s != '\\') return (0); } else { ++p; if (*p != *s) return (0); } break; case '/': if (*s != '/' && *s != '\0') return (0); /* Note: pattern "/\./" won't match "/"; * pm_slashskip() correctly stops at backslash. */ p = pm_slashskip(p); s = pm_slashskip(s); if (*p == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)) return (1); --p; /* Counteract the increment below. */ --s; break; case '$': /* '$' is special only at end of pattern and only * if PATHMATCH_NO_ANCHOR_END is specified. */ if (p[1] == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)){ /* "dir" == "dir/" == "dir/." */ return (*pm_slashskip(s) == '\0'); } /* Otherwise, '$' is not special. */ /* FALL THROUGH */ default: if (*p != *s) return (0); break; } ++p; ++s; } } static int pm_w(const wchar_t *p, const wchar_t *s, int flags) { const wchar_t *end; /* * Ignore leading './', './/', '././', etc. */ if (s[0] == L'.' && s[1] == L'/') s = pm_slashskip_w(s + 1); if (p[0] == L'.' && p[1] == L'/') p = pm_slashskip_w(p + 1); for (;;) { switch (*p) { case L'\0': if (s[0] == L'/') { if (flags & PATHMATCH_NO_ANCHOR_END) return (1); /* "dir" == "dir/" == "dir/." */ s = pm_slashskip_w(s); } return (*s == L'\0'); case L'?': /* ? always succeeds, unless we hit end of 's' */ if (*s == L'\0') return (0); break; case L'*': /* "*" == "**" == "***" ... */ while (*p == L'*') ++p; /* Trailing '*' always succeeds. */ if (*p == L'\0') return (1); while (*s) { if (archive_pathmatch_w(p, s, flags)) return (1); ++s; } return (0); case L'[': /* * Find the end of the [...] character class, * ignoring \] that might occur within the class. */ end = p + 1; while (*end != L'\0' && *end != L']') { if (*end == L'\\' && end[1] != L'\0') ++end; ++end; } if (*end == L']') { /* We found [...], try to match it. */ if (!pm_list_w(p + 1, end, *s, flags)) return (0); p = end; /* Jump to trailing ']' char. */ break; } else /* No final ']', so just match '['. */ if (*p != *s) return (0); break; case L'\\': /* Trailing '\\' matches itself. */ if (p[1] == L'\0') { if (*s != L'\\') return (0); } else { ++p; if (*p != *s) return (0); } break; case L'/': if (*s != L'/' && *s != L'\0') return (0); /* Note: pattern "/\./" won't match "/"; * pm_slashskip() correctly stops at backslash. */ p = pm_slashskip_w(p); s = pm_slashskip_w(s); if (*p == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END)) return (1); --p; /* Counteract the increment below. */ --s; break; case L'$': /* '$' is special only at end of pattern and only * if PATHMATCH_NO_ANCHOR_END is specified. */ if (p[1] == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END)){ /* "dir" == "dir/" == "dir/." */ return (*pm_slashskip_w(s) == L'\0'); } /* Otherwise, '$' is not special. */ /* FALL THROUGH */ default: if (*p != *s) return (0); break; } ++p; ++s; } } /* Main entry point. */ int __archive_pathmatch(const char *p, const char *s, int flags) { /* Empty pattern only matches the empty string. */ if (p == NULL || *p == '\0') return (s == NULL || *s == '\0'); else if (s == NULL) return (0); /* Leading '^' anchors the start of the pattern. */ if (*p == '^') { ++p; flags &= ~PATHMATCH_NO_ANCHOR_START; } if (*p == '/' && *s != '/') return (0); /* Certain patterns anchor implicitly. */ if (*p == '*' || *p == '/') { while (*p == '/') ++p; while (*s == '/') ++s; return (pm(p, s, flags)); } /* If start is unanchored, try to match start of each path element. */ if (flags & PATHMATCH_NO_ANCHOR_START) { for ( ; s != NULL; s = strchr(s, '/')) { if (*s == '/') s++; if (pm(p, s, flags)) return (1); } return (0); } /* Default: Match from beginning. */ return (pm(p, s, flags)); } int __archive_pathmatch_w(const wchar_t *p, const wchar_t *s, int flags) { /* Empty pattern only matches the empty string. */ if (p == NULL || *p == L'\0') return (s == NULL || *s == L'\0'); else if (s == NULL) return (0); /* Leading '^' anchors the start of the pattern. */ if (*p == L'^') { ++p; flags &= ~PATHMATCH_NO_ANCHOR_START; } if (*p == L'/' && *s != L'/') return (0); /* Certain patterns anchor implicitly. */ if (*p == L'*' || *p == L'/') { while (*p == L'/') ++p; while (*s == L'/') ++s; return (pm_w(p, s, flags)); } /* If start is unanchored, try to match start of each path element. */ if (flags & PATHMATCH_NO_ANCHOR_START) { for ( ; s != NULL; s = wcschr(s, L'/')) { if (*s == L'/') s++; if (pm_w(p, s, flags)) return (1); } return (0); } /* Default: Match from beginning. */ return (pm_w(p, s, flags)); }