From aae9ea87f736cdd91c25858a9c2df58f35c6bfa2 Mon Sep 17 00:00:00 2001 From: Jeroen Ruigrok/asmodai Date: Sun, 18 Sep 2005 18:35:23 +0000 Subject: [PATCH] Synchronise with FreeBSD: Treat filenames as multibyte character strings (according to the current LC_CTYPE setting) when determining which characters are printable. This is an often-requested feature. Use wcwidth() to determine the number of column positions a character takes up, although there are still a few places left where we assume 1 byte = 1 column position, e.g. line-wrapping when handling the -m option. The error handling here is somewhat more complicated than usual: we do our best to show what we can of a filename in the presence of conversion errors, instead of simply aborting. --- bin/ls/extern.h | 5 +- bin/ls/print.c | 7 +- bin/ls/util.c | 192 +++++++++++++++++++++++++++++++++--------------- 3 files changed, 138 insertions(+), 66 deletions(-) diff --git a/bin/ls/extern.h b/bin/ls/extern.h index f05f6cf901..d179a6f64f 100644 --- a/bin/ls/extern.h +++ b/bin/ls/extern.h @@ -27,8 +27,8 @@ * SUCH DAMAGE. * * from: @(#)extern.h 8.1 (Berkeley) 5/31/93 - * $FreeBSD: src/bin/ls/extern.h,v 1.22 2004/04/06 20:06:47 markm Exp $ - * $DragonFly: src/bin/ls/extern.h,v 1.6 2005/09/18 18:01:49 asmodai Exp $ + * $FreeBSD: src/bin/ls/extern.h,v 1.23 2004/05/02 11:25:37 tjr Exp $ + * $DragonFly: src/bin/ls/extern.h,v 1.7 2005/09/18 18:35:23 asmodai Exp $ */ int acccmp(const FTSENT *, const FTSENT *); @@ -46,6 +46,7 @@ int printname(const char *); void printscol(const DISPLAY *); void printstream(const DISPLAY *); void usage(void); +int prn_normal(const char *); size_t len_octal(const char *, int); int prn_octal(const char *); int prn_printable(const char *); diff --git a/bin/ls/print.c b/bin/ls/print.c index 9ff176e08e..f61d2a0ead 100644 --- a/bin/ls/print.c +++ b/bin/ls/print.c @@ -30,8 +30,8 @@ * SUCH DAMAGE. * * @(#)print.c 8.4 (Berkeley) 4/17/94 - * $FreeBSD: src/bin/ls/print.c,v 1.63 2002/11/06 01:18:12 tjr Exp $ - * $DragonFly: src/bin/ls/print.c,v 1.13 2005/09/18 18:01:49 asmodai Exp $ + * $FreeBSD: src/bin/ls/print.c,v 1.71 2004/05/02 11:25:37 tjr Exp $ + * $DragonFly: src/bin/ls/print.c,v 1.14 2005/09/18 18:35:23 asmodai Exp $ */ #include @@ -141,7 +141,7 @@ printname(const char *name) else if (f_nonprint) return prn_printable(name); else - return printf("%s", name); + return prn_normal(name); } void @@ -221,6 +221,7 @@ printstream(const DISPLAY *dp) for (p = dp->list, chcnt = 0; p; p = p->fts_link) { if (p->fts_number == NO_PRINT) continue; + /* XXX strlen does not take octal escapes into account. */ if (strlen(p->fts_name) + chcnt + (p->fts_link ? 2 : 0) >= (unsigned)termwidth) { putchar('\n'); diff --git a/bin/ls/util.c b/bin/ls/util.c index 8ad056c45d..5e8a3311e5 100644 --- a/bin/ls/util.c +++ b/bin/ls/util.c @@ -30,8 +30,8 @@ * SUCH DAMAGE. * * @(#)util.c 8.3 (Berkeley) 4/2/94 - * $FreeBSD: src/bin/ls/util.c,v 1.33 2004/04/06 20:06:47 markm Exp $ - * $DragonFly: src/bin/ls/util.c,v 1.6 2005/09/18 11:31:08 asmodai Exp $ + * $FreeBSD: src/bin/ls/util.c,v 1.35 2004/05/03 11:48:55 tjr Exp $ + * $DragonFly: src/bin/ls/util.c,v 1.7 2005/09/18 18:35:23 asmodai Exp $ */ #include @@ -40,25 +40,82 @@ #include #include #include +#include #include #include #include +#include +#include #include "ls.h" #include "extern.h" +int +prn_normal(const char *s) +{ + mbstate_t mbs; + wchar_t wc; + int i, n; + size_t clen; + + memset(&mbs, 0, sizeof(mbs)); + n = 0; + while ((clen = mbrtowc(&wc, s, MB_LEN_MAX, &mbs)) != 0) { + if (clen == (size_t)-2) { + n += printf("%s", s); + break; + } + if (clen == (size_t)-1) { + memset(&mbs, 0, sizeof(mbs)); + putchar((unsigned char)*s); + s++; + n++; + continue; + } + for (i = 0; i < (int)clen; i++) + putchar((unsigned char)s[i]); + s += clen; + if (iswprint(wc)) + n += wcwidth(wc); + } + return (n); +} + int prn_printable(const char *s) { - char c; - int n; + mbstate_t mbs; + wchar_t wc; + int i, n; + size_t clen; - for (n = 0; (c = *s) != '\0'; ++s, ++n) - if (isprint((unsigned char)c)) - putchar(c); - else + memset(&mbs, 0, sizeof(mbs)); + n = 0; + while ((clen = mbrtowc(&wc, s, MB_LEN_MAX, &mbs)) != 0) { + if (clen == (size_t)-1) { + putchar('?'); + s++; + n++; + memset(&mbs, 0, sizeof(mbs)); + continue; + } + if (clen == (size_t)-2) { putchar('?'); - return n; + n++; + break; + } + if (!iswprint(wc)) { + putchar('?'); + s += clen; + n++; + continue; + } + for (i = 0; i < (int)clen; i++) + putchar((unsigned char)s[i]); + s += clen; + n += wcwidth(wc); + } + return (n); } /* @@ -77,70 +134,83 @@ prn_printable(const char *s) size_t len_octal(const char *s, int len) { - size_t r = 0; + mbstate_t mbs; + wchar_t wc; + size_t clen, r; - while (len--) - if (isprint((unsigned const char)*s++)) r++; else r += 4; - return r; + memset(&mbs, 0, sizeof(mbs)); + r = 0; + while (len != 0 && (clen = mbrtowc(&wc, s, len, &mbs)) != 0) { + if (clen == (size_t)-1) { + r += 4; + s++; + len--; + memset(&mbs, 0, sizeof(mbs)); + continue; + } + if (clen == (size_t)-2) { + r += 4 * len; + break; + } + if (iswprint(wc)) + r++; + else + r += 4 * clen; + s += clen; + } + return (r); } int prn_octal(const char *s) { - unsigned char ch; - int len = 0; - - while ((ch = (unsigned char)*s++)) { - if (isprint(ch) && (ch != '\"') && (ch != '\\')) - putchar(ch), len++; - else if (f_octal_escape) { - putchar('\\'); - switch (ch) { - case '\\': - putchar('\\'); - break; - case '\"': - putchar('"'); - break; - case '\a': - putchar('a'); - break; - case '\b': - putchar('b'); - break; - case '\f': - putchar('f'); - break; - case '\n': - putchar('n'); - break; - case '\r': - putchar('r'); - break; - case '\t': - putchar('t'); - break; - case '\v': - putchar('v'); - break; - default: + static const char esc[] = "\\\\\"\"\aa\bb\ff\nn\rr\tt\vv"; + const char *p; + mbstate_t mbs; + wchar_t wc; + size_t clen; + unsigned char ch; + int goodchar, i, len, prtlen; + + memset(&mbs, 0, sizeof(mbs)); + len = 0; + while ((clen = mbrtowc(&wc, s, MB_LEN_MAX, &mbs)) != 0) { + goodchar = clen != (size_t)-1 && clen != (size_t)-2; + if (goodchar && iswprint(wc) && wc != L'\"' && wc != L'\\') { + for (i = 0; i < (int)clen; i++) + putchar((unsigned char)s[i]); + len += wcwidth(wc); + } else if (goodchar && f_octal_escape && wc >= 0 && + wc <= (wchar_t)UCHAR_MAX && + (p = strchr(esc, (char)wc)) != NULL) { + putchar('\\'); + putchar(p[1]); + len += 2; + } else { + if (goodchar) + prtlen = clen; + else if (clen == (size_t)-1) + prtlen = 1; + else + prtlen = strlen(s); + for (i = 0; i < prtlen; i++) { + ch = (unsigned char)s[i]; + putchar('\\'); putchar('0' + (ch >> 6)); putchar('0' + ((ch >> 3) & 7)); putchar('0' + (ch & 7)); - len += 2; - break; + len += 4; } - len += 2; - } - else { - putchar('\\'); - putchar('0' + (ch >> 6)); - putchar('0' + ((ch >> 3) & 7)); - putchar('0' + (ch & 7)); - len += 4; } + if (clen == (size_t)-2) + break; + if (clen == (size_t)-1) { + memset(&mbs, 0, sizeof(mbs)); + s++; + } else + s += clen; } - return len; + return (len); } void -- 2.32.0