sh: Add UTF-8 support to ${#var}.
authorPeter Avalos <pavalos@dragonflybsd.org>
Sun, 3 Jul 2011 02:31:33 +0000 (16:31 -1000)
committerPeter Avalos <pavalos@dragonflybsd.org>
Sun, 21 Aug 2011 19:10:26 +0000 (12:10 -0700)
If the current locale uses UTF-8, ${#var} counts codepoints (more
precisely, bytes b with (b & 0xc0) != 0x80).

Obtained-from:   FreeBSD SVN rev 221602

bin/sh/expand.c
tools/regression/bin/sh/expansion/length7.0 [new file with mode: 0644]
tools/regression/bin/sh/expansion/length8.0 [new file with mode: 0644]

index a7930e0..b113a49 100644 (file)
@@ -36,7 +36,7 @@
  * SUCH DAMAGE.
  *
  * @(#)expand.c        8.5 (Berkeley) 5/15/95
- * $FreeBSD: src/bin/sh/expand.c,v 1.83 2011/02/21 09:01:34 brucec Exp $
+ * $FreeBSD: src/bin/sh/expand.c,v 1.84 2011/05/07 14:32:16 jilles Exp $
  */
 
 #include <sys/types.h>
@@ -664,6 +664,7 @@ evalvar(char *p, int flag)
        int special;
        int startloc;
        int varlen;
+       int varlenb;
        int easy;
        int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
 
@@ -711,8 +712,15 @@ again: /* jump here after setting a variable with ${var=text} */
                if (special) {
                        varvalue(var, varflags & VSQUOTE, subtype, flag);
                        if (subtype == VSLENGTH) {
-                               varlen = expdest - stackblock() - startloc;
-                               STADJUST(-varlen, expdest);
+                               varlenb = expdest - stackblock() - startloc;
+                               varlen = varlenb;
+                               if (localeisutf8) {
+                                       val = stackblock() + startloc;
+                                       for (;val != expdest; val++)
+                                               if ((*val & 0xC0) == 0x80)
+                                                       varlen--;
+                               }
+                               STADJUST(-varlenb, expdest);
                        }
                } else {
                        char const *syntax = (varflags & VSQUOTE) ? DQSYNTAX
@@ -720,7 +728,9 @@ again: /* jump here after setting a variable with ${var=text} */
 
                        if (subtype == VSLENGTH) {
                                for (;*val; val++)
-                                       varlen++;
+                                       if (!localeisutf8 ||
+                                           (*val & 0xC0) != 0x80)
+                                               varlen++;
                        }
                        else {
                                if (quotes)
diff --git a/tools/regression/bin/sh/expansion/length7.0 b/tools/regression/bin/sh/expansion/length7.0
new file mode 100644 (file)
index 0000000..5483c32
--- /dev/null
@@ -0,0 +1,14 @@
+# $FreeBSD: src/tools/regression/bin/sh/expansion/length7.0,v 1.1 2011/05/07 14:32:16 jilles Exp $
+
+unset LC_ALL
+LC_CTYPE=en_US.UTF-8
+export LC_CTYPE
+
+# a umlaut
+s=$(printf '\303\244')
+# euro sign
+s=$s$(printf '\342\202\254')
+# some sort of 't' outside BMP
+s=$s$(printf '\360\235\225\245')
+set -- "$s"
+[ ${#s} = 3 ] && [ ${#1} = 3 ]
diff --git a/tools/regression/bin/sh/expansion/length8.0 b/tools/regression/bin/sh/expansion/length8.0
new file mode 100644 (file)
index 0000000..37c8717
--- /dev/null
@@ -0,0 +1,14 @@
+# $FreeBSD: src/tools/regression/bin/sh/expansion/length8.0,v 1.1 2011/05/07 14:32:16 jilles Exp $
+
+unset LC_ALL
+LC_CTYPE=en_US.ISO8859-1
+export LC_CTYPE
+
+# a umlaut
+s=$(printf '\303\244')
+# euro sign
+s=$s$(printf '\342\202\254')
+# some sort of 't' outside BMP
+s=$s$(printf '\360\235\225\245')
+set -- "$s"
+[ ${#s} = 9 ] && [ ${#1} = 9 ]