sh: Make patmatch() non-recursive.
authorPeter Avalos <pavalos@dragonflybsd.org>
Sun, 5 Feb 2012 19:28:08 +0000 (11:28 -0800)
committerPeter Avalos <pavalos@dragonflybsd.org>
Sun, 5 Feb 2012 20:48:09 +0000 (12:48 -0800)
Obtained-from:  FreeBSD 229201

bin/sh/expand.c

index c10a0f5..cbf75be 100644 (file)
@@ -36,7 +36,7 @@
  * SUCH DAMAGE.
  *
  * @(#)expand.c        8.5 (Berkeley) 5/15/95
- * $FreeBSD: src/bin/sh/expand.c,v 1.93 2011/12/28 23:40:46 jilles Exp $
+ * $FreeBSD: src/bin/sh/expand.c,v 1.94 2012/01/01 20:50:19 jilles Exp $
  */
 
 #include <sys/types.h>
@@ -1444,57 +1444,63 @@ int
 patmatch(const char *pattern, const char *string, int squoted)
 {
        const char *p, *q, *end;
+       const char *bt_p, *bt_q;
        char c;
        wchar_t wc, wc2;
 
        p = pattern;
        q = string;
+       bt_p = NULL;
+       bt_q = NULL;
        for (;;) {
                switch (c = *p++) {
                case '\0':
-                       goto breakloop;
+                       if (*q != '\0')
+                               goto backtrack;
+                       return 1;
                case CTLESC:
                        if (squoted && *q == CTLESC)
                                q++;
                        if (*q++ != *p++)
-                               return 0;
+                               goto backtrack;
                        break;
                case CTLQUOTEMARK:
                        continue;
                case '?':
                        if (squoted && *q == CTLESC)
                                q++;
-                       if (localeisutf8)
+                       if (*q == '\0')
+                               return 0;
+                       if (localeisutf8) {
                                wc = get_wc(&q);
-                       else
+                               /*
+                                * A '?' does not match invalid UTF-8 but a
+                                * '*' does, so backtrack.
+                                */
+                               if (wc == 0)
+                                       goto backtrack;
+                       } else
                                wc = (unsigned char)*q++;
-                       if (wc == '\0')
-                               return 0;
                        break;
                case '*':
                        c = *p;
                        while (c == CTLQUOTEMARK || c == '*')
                                c = *++p;
-                       if (c != CTLESC &&  c != CTLQUOTEMARK &&
-                           c != '?' && c != '*' && c != '[') {
-                               while (*q != c) {
-                                       if (squoted && *q == CTLESC &&
-                                           q[1] == c)
-                                               break;
-                                       if (*q == '\0')
-                                               return 0;
-                                       if (squoted && *q == CTLESC)
-                                               q++;
-                                       q++;
-                               }
-                       }
-                       do {
-                               if (patmatch(p, q, squoted))
-                                       return 1;
-                               if (squoted && *q == CTLESC)
-                                       q++;
-                       } while (*q++ != '\0');
-                       return 0;
+                       /*
+                        * If the pattern ends here, we know the string
+                        * matches without needing to look at the rest of it.
+                        */
+                       if (c == '\0')
+                               return 1;
+                       /*
+                        * First try the shortest match for the '*' that
+                        * could work. We can forget any earlier '*' since
+                        * there is no way having it match more characters
+                        * can help us, given that we are already here.
+                        */
+                       bt_p = p;
+                       bt_q = q;
+                       break;
                case '[': {
                        const char *endp;
                        int invert, found;
@@ -1506,7 +1512,7 @@ patmatch(const char *pattern, const char *string, int squoted)
                        for (;;) {
                                while (*endp == CTLQUOTEMARK)
                                        endp++;
-                               if (*endp == '\0')
+                               if (*endp == 0)
                                        goto dft;               /* no matching ] */
                                if (*endp == CTLESC)
                                        endp++;
@@ -1521,12 +1527,14 @@ patmatch(const char *pattern, const char *string, int squoted)
                        found = 0;
                        if (squoted && *q == CTLESC)
                                q++;
-                       if (localeisutf8)
+                       if (*q == '\0')
+                               return 0;
+                       if (localeisutf8) {
                                chr = get_wc(&q);
-                       else
+                               if (chr == 0)
+                                       goto backtrack;
+                       } else
                                chr = (unsigned char)*q++;
-                       if (chr == '\0')
-                               return 0;
                        c = *p++;
                        do {
                                if (c == CTLQUOTEMARK)
@@ -1567,21 +1575,34 @@ patmatch(const char *pattern, const char *string, int squoted)
                                }
                        } while ((c = *p++) != ']');
                        if (found == invert)
-                               return 0;
+                               goto backtrack;
                        break;
                }
 dft:           default:
                        if (squoted && *q == CTLESC)
                                q++;
-                       if (*q++ != c)
+                       if (*q == '\0')
+                               return 0;
+                       if (*q++ == c)
+                               break;
+backtrack:
+                       /*
+                        * If we have a mismatch (other than hitting the end
+                        * of the string), go back to the last '*' seen and
+                        * have it match one additional character.
+                        */
+                       if (bt_p == NULL)
+                               return 0;
+                       if (squoted && *bt_q == CTLESC)
+                               bt_q++;
+                       if (*bt_q == '\0')
                                return 0;
+                       bt_q++;
+                       p = bt_p;
+                       q = bt_q;
                        break;
                }
        }
-breakloop:
-       if (*q != '\0')
-               return 0;
-       return 1;
 }