Sync fnmatch(3) with FreeBSD.
authorPeter Avalos <pavalos@theshell.com>
Thu, 26 Feb 2009 08:21:07 +0000 (03:21 -0500)
committerPeter Avalos <pavalos@theshell.com>
Tue, 7 Apr 2009 07:09:26 +0000 (21:09 -1000)
* Define FNM_NOSYS iaw SUS.

* Document the missing portions of IEEE Std. 1003.2.

* Add support for multibyte characters.

include/fnmatch.h
lib/libc/gen/fnmatch.3
lib/libc/gen/fnmatch.c

index aee5eec..c01cce3 100644 (file)
@@ -30,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/include/fnmatch.h,v 1.9 1999/11/21 17:32:45 ache Exp $
+ * $FreeBSD: src/include/fnmatch.h,v 1.15 2003/12/18 10:41:39 jkh Exp $
  * $DragonFly: src/include/fnmatch.h,v 1.3 2003/11/14 01:01:43 dillon Exp $
  *     @(#)fnmatch.h   8.1 (Berkeley) 6/2/93
  */
 #ifndef        _FNMATCH_H_
 #define        _FNMATCH_H_
 
+#include <sys/cdefs.h>
+
 #define        FNM_NOMATCH     1       /* Match failed. */
 
 #define        FNM_NOESCAPE    0x01    /* Disable backslash escaping. */
 #define        FNM_PATHNAME    0x02    /* Slash must be matched by slash. */
 #define        FNM_PERIOD      0x04    /* Period must be matched by period. */
 
-#if !defined(_ANSI_SOURCE) && !defined(_POSIX_SOURCE)
+#if __XSI_VISIBLE
+#define        FNM_NOSYS       (-1)    /* Reserved. */
+#endif
+
+#if __BSD_VISIBLE
 #define        FNM_LEADING_DIR 0x08    /* Ignore /<tail> after Imatch. */
 #define        FNM_CASEFOLD    0x10    /* Case insensitive search. */
 #define        FNM_IGNORECASE  FNM_CASEFOLD
 #define        FNM_FILE_NAME   FNM_PATHNAME
 #endif
 
-#include <sys/cdefs.h>
-
 __BEGIN_DECLS
-int     fnmatch (const char *, const char *, int);
+int     fnmatch(const char *, const char *, int);
 __END_DECLS
 
 #endif /* !_FNMATCH_H_ */
index 45e7a4b..11ae84c 100644 (file)
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
-.\" 3. All advertising materials mentioning features or use of this software
-.\"    must display the following acknowledgement:
-.\"    This product includes software developed by the University of
-.\"    California, Berkeley and its contributors.
 .\" 4. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\" SUCH DAMAGE.
 .\"
 .\"     @(#)fnmatch.3  8.3 (Berkeley) 4/28/95
-.\" $FreeBSD: src/lib/libc/gen/fnmatch.3,v 1.9.2.4 2003/03/13 18:05:37 trhodes Exp $
+.\" $FreeBSD: src/lib/libc/gen/fnmatch.3,v 1.19 2007/01/09 00:27:53 imp Exp $
 .\" $DragonFly: src/lib/libc/gen/fnmatch.3,v 1.3 2006/05/26 19:39:36 swildner Exp $
 .\"
-.Dd April 28, 1995
+.Dd July 18, 2004
 .Dt FNMATCH 3
 .Os
 .Sh NAME
 .Nm fnmatch
-.Nd match filename or pathname
+.Nd test whether a filename or pathname matches a shell-style pattern
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
@@ -116,7 +112,7 @@ rest after successful
 .Fa pattern
 matching.
 .It Dv FNM_CASEFOLD
-Ignore  case  distinctions in both the
+Ignore case distinctions in both the
 .Fa pattern
 and the
 .Fa string .
@@ -135,10 +131,14 @@ otherwise, it returns the value
 .Xr glob 3 ,
 .Xr regex 3
 .Sh STANDARDS
-The
+The current implementation of the
 .Fn fnmatch
-function conforms to
+function
+.Em does not
+conform to
 .St -p1003.2 .
+Collating symbol expressions, equivalence class expressions and
+character class expressions are not supported.
 .Sh HISTORY
 The
 .Fn fnmatch
index 33e3158..ae427cc 100644 (file)
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *     This product includes software developed by the University of
- *     California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
@@ -34,6 +30,7 @@
  * SUCH DAMAGE.
  *
  * @(#)fnmatch.c       8.2 (Berkeley) 4/16/94
+ * $FreeBSD: src/lib/libc/gen/fnmatch.c,v 1.18 2007/01/09 00:27:53 imp Exp $
  * $DragonFly: src/lib/libc/gen/fnmatch.c,v 1.6 2005/11/13 00:07:42 swildner Exp $
  */
 
  * Compares a filename or pathname to a pattern.
  */
 
-#include <ctype.h>
+/*
+ * Some notes on multibyte character support:
+ * 1. Patterns with illegal byte sequences match nothing.
+ * 2. Illegal byte sequences in the "string" argument are handled by treating
+ *    them as single-byte characters with a value of the first byte of the
+ *    sequence cast to wchar_t.
+ * 3. Multibyte conversion state objects (mbstate_t) are passed around and
+ *    used for most, but not all, conversions. Further work will be required
+ *    to support state-dependent encodings.
+ */
+
 #include <fnmatch.h>
+#include <limits.h>
 #include <string.h>
-#include <stdio.h>
+#include <wchar.h>
+#include <wctype.h>
 
 #include "collate.h"
 
 #define RANGE_NOMATCH   0
 #define RANGE_ERROR     (-1)
 
-static int rangematch (const char *, char, int, char **);
+static int rangematch(const char *, wchar_t, int, char **, mbstate_t *);
+static int fnmatch1(const char *, const char *, int, mbstate_t, mbstate_t);
 
 int
 fnmatch(const char *pattern, const char *string, int flags)
+{
+       static const mbstate_t initial;
+
+       return (fnmatch1(pattern, string, flags, initial, initial));
+}
+
+static int
+fnmatch1(const char *pattern, const char *string, int flags, mbstate_t patmbs,
+        mbstate_t strmbs)
 {
        const char *stringstart;
        char *newp;
-       char c, test;
+       char c;
+       wchar_t pc, sc;
+       size_t pclen, sclen;
 
-       for (stringstart = string;;)
-               switch (c = *pattern++) {
+       for (stringstart = string;;) {
+               pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs);
+               if (pclen == (size_t)-1 || pclen == (size_t)-2)
+                       return (FNM_NOMATCH);
+               pattern += pclen;
+               sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs);
+               if (sclen == (size_t)-1 || sclen == (size_t)-2) {
+                       sc = (unsigned char)*string;
+                       sclen = 1;
+                       memset(&strmbs, 0, sizeof(strmbs));
+               }
+               switch (pc) {
                case EOS:
-                       if ((flags & FNM_LEADING_DIR) && *string == '/')
+                       if ((flags & FNM_LEADING_DIR) && sc == '/')
                                return (0);
-                       return (*string == EOS ? 0 : FNM_NOMATCH);
+                       return (sc == EOS ? 0 : FNM_NOMATCH);
                case '?':
-                       if (*string == EOS)
+                       if (sc == EOS)
                                return (FNM_NOMATCH);
-                       if (*string == '/' && (flags & FNM_PATHNAME))
+                       if (sc == '/' && (flags & FNM_PATHNAME))
                                return (FNM_NOMATCH);
-                       if (*string == '.' && (flags & FNM_PERIOD) &&
+                       if (sc == '.' && (flags & FNM_PERIOD) &&
                            (string == stringstart ||
                            ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
                                return (FNM_NOMATCH);
-                       ++string;
+                       string += sclen;
                        break;
                case '*':
                        c = *pattern;
@@ -87,7 +118,7 @@ fnmatch(const char *pattern, const char *string, int flags)
                        while (c == '*')
                                c = *++pattern;
 
-                       if (*string == '.' && (flags & FNM_PERIOD) &&
+                       if (sc == '.' && (flags & FNM_PERIOD) &&
                            (string == stringstart ||
                            ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
                                return (FNM_NOMATCH);
@@ -107,25 +138,35 @@ fnmatch(const char *pattern, const char *string, int flags)
                        }
 
                        /* General case, use recursion. */
-                       while ((test = *string) != EOS) {
-                               if (!fnmatch(pattern, string, flags & ~FNM_PERIOD))
+                       while (sc != EOS) {
+                               if (!fnmatch1(pattern, string,
+                                   flags & ~FNM_PERIOD, patmbs, strmbs))
                                        return (0);
-                               if (test == '/' && flags & FNM_PATHNAME)
+                               sclen = mbrtowc(&sc, string, MB_LEN_MAX,
+                                   &strmbs);
+                               if (sclen == (size_t)-1 ||
+                                   sclen == (size_t)-2) {
+                                       sc = (unsigned char)*string;
+                                       sclen = 1;
+                                       memset(&strmbs, 0, sizeof(strmbs));
+                               }
+                               if (sc == '/' && flags & FNM_PATHNAME)
                                        break;
-                               ++string;
+                               string += sclen;
                        }
                        return (FNM_NOMATCH);
                case '[':
-                       if (*string == EOS)
+                       if (sc == EOS)
                                return (FNM_NOMATCH);
-                       if (*string == '/' && (flags & FNM_PATHNAME))
+                       if (sc == '/' && (flags & FNM_PATHNAME))
                                return (FNM_NOMATCH);
-                       if (*string == '.' && (flags & FNM_PERIOD) &&
+                       if (sc == '.' && (flags & FNM_PERIOD) &&
                            (string == stringstart ||
                            ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
                                return (FNM_NOMATCH);
 
-                       switch (rangematch(pattern, *string, flags, &newp)) {
+                       switch (rangematch(pattern, sc, flags, &newp,
+                           &patmbs)) {
                        case RANGE_ERROR:
                                goto norm;
                        case RANGE_MATCH:
@@ -134,37 +175,43 @@ fnmatch(const char *pattern, const char *string, int flags)
                        case RANGE_NOMATCH:
                                return (FNM_NOMATCH);
                        }
-                       ++string;
+                       string += sclen;
                        break;
                case '\\':
                        if (!(flags & FNM_NOESCAPE)) {
-                               if ((c = *pattern++) == EOS) {
-                                       c = '\\';
-                                       --pattern;
-                               }
+                               pclen = mbrtowc(&pc, pattern, MB_LEN_MAX,
+                                   &patmbs);
+                               if (pclen == (size_t)-1 || pclen == (size_t)-2)
+                                       return (FNM_NOMATCH);
+                               if (pclen == 0)
+                                       pc = '\\';
+                               pattern += pclen;
                        }
                        /* FALLTHROUGH */
                default:
                norm:
-                       if (c == *string)
+                       if (pc == sc)
                                ;
                        else if ((flags & FNM_CASEFOLD) &&
-                                (tolower((unsigned char)c) ==
-                                 tolower((unsigned char)*string)))
+                                (towlower(pc) == towlower(sc)))
                                ;
                        else
                                return (FNM_NOMATCH);
-                       string++;
+                       string += sclen;
                        break;
                }
+       }
        /* NOTREACHED */
 }
 
 static int
-rangematch(const char *pattern, char test, int flags, char **newp)
+rangematch(const char *pattern, wchar_t test, int flags, char **newp,
+          mbstate_t *patmbs)
 {
        int negate, ok;
-       char c, c2;
+       wchar_t c, c2;
+       size_t pclen;
+       const char *origpat;
 
        /*
         * A bracket expression starting with an unquoted circumflex
@@ -173,11 +220,11 @@ rangematch(const char *pattern, char test, int flags, char **newp)
         * consistency with the regular expression syntax.
         * J.T. Conklin (conklin@ngai.kaleida.com)
         */
-       if ( (negate = (*pattern == '!' || *pattern == '^')) )
+       if ((negate = (*pattern == '!' || *pattern == '^')))
                ++pattern;
 
        if (flags & FNM_CASEFOLD)
-               test = tolower((unsigned char)test);
+               test = towlower(test);
 
        /*
         * A right bracket shall lose its special meaning and represent
@@ -185,29 +232,41 @@ rangematch(const char *pattern, char test, int flags, char **newp)
         * -- POSIX.2 2.8.3.2
         */
        ok = 0;
-       c = *pattern++;
-       do {
-               if (c == '\\' && !(flags & FNM_NOESCAPE))
-                       c = *pattern++;
-               if (c == EOS)
+       origpat = pattern;
+       for (;;) {
+               if (*pattern == ']' && pattern > origpat) {
+                       pattern++;
+                       break;
+               } else if (*pattern == '\0') {
                        return (RANGE_ERROR);
-
-               if (c == '/' && (flags & FNM_PATHNAME))
+               } else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
                        return (RANGE_NOMATCH);
+               } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) {
+                       pattern++;
+               }
+               pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
+               if (pclen == (size_t)-1 || pclen == (size_t)-2)
+                       return (RANGE_NOMATCH);
+               pattern += pclen;
 
                if (flags & FNM_CASEFOLD)
-                       c = tolower((unsigned char)c);
+                       c = towlower(c);
 
-               if (*pattern == '-'
-                   && (c2 = *(pattern+1)) != EOS && c2 != ']') {
-                       pattern += 2;
-                       if (c2 == '\\' && !(flags & FNM_NOESCAPE))
-                               c2 = *pattern++;
+               if (*pattern == '-' && *(pattern + 1) != EOS &&
+                   *(pattern + 1) != ']') {
+                       if (*++pattern == '\\' && !(flags & FNM_NOESCAPE)) {
+                               if (*pattern != EOS)
+                                       pattern++;
+                       }
+                       pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs);
+                       if (pclen == (size_t)-1 || pclen == (size_t)-2)
+                               return (RANGE_NOMATCH);
+                       pattern += pclen;
                        if (c2 == EOS)
                                return (RANGE_ERROR);
 
                        if (flags & FNM_CASEFOLD)
-                               c2 = tolower((unsigned char)c2);
+                               c2 = towlower(c2);
 
                        if (__collate_load_error ?
                            c <= test && test <= c2 :
@@ -215,9 +274,10 @@ rangematch(const char *pattern, char test, int flags, char **newp)
                            && __collate_range_cmp(test, c2) <= 0
                           )
                                ok = 1;
-               } else if (c == test)
+               } else if (c == test) {
                        ok = 1;
-       } while ((c = *pattern++) != ']');
+               }
+       }
 
        *newp = __DECONST(char *, pattern);
        return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);