From 4776d4e822ede882d59eb2b58d6dfb813c6148ae Mon Sep 17 00:00:00 2001 From: John Marino Date: Sat, 1 Aug 2015 20:36:42 +0200 Subject: [PATCH] libc/locale: Revamp CTYPE support (from Illumos) The LC_CTYPE format didn't change, but libc stills needs a revamp to properly use the new LC_CTYPE files that localedef(2) generates. --- lib/libc/locale/Makefile.inc | 2 +- lib/libc/locale/Symbol.map | 6 +- lib/libc/locale/ascii.c | 192 -------------- lib/libc/locale/big5.c | 36 ++- lib/libc/locale/euc.c | 437 ++++++++++++++++++++++---------- lib/libc/locale/gb18030.c | 34 ++- lib/libc/locale/gb2312.c | 37 ++- lib/libc/locale/gbk.c | 33 ++- lib/libc/locale/mblocal.h | 26 +- lib/libc/locale/mbsnrtowcs.c | 15 +- lib/libc/locale/mskanji.c | 41 ++- lib/libc/locale/none.c | 16 +- lib/libc/locale/rune.c | 119 +++------ lib/libc/locale/setrunelocale.c | 49 ++-- lib/libc/locale/utf8.c | 46 ++-- lib/libc/locale/wcsnrtombs.c | 19 +- 16 files changed, 579 insertions(+), 529 deletions(-) delete mode 100644 lib/libc/locale/ascii.c diff --git a/lib/libc/locale/Makefile.inc b/lib/libc/locale/Makefile.inc index be11c59755..f4eded0d97 100644 --- a/lib/libc/locale/Makefile.inc +++ b/lib/libc/locale/Makefile.inc @@ -5,7 +5,7 @@ CMAPS+= ${.CURDIR}/locale/Symbol.map -SRCS+= ascii.c big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \ +SRCS+= big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \ gb18030.c gb2312.c gbk.c ctype.c isctype.c iswctype.c \ ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mblen.c \ mbrlen.c \ diff --git a/lib/libc/locale/Symbol.map b/lib/libc/locale/Symbol.map index 75e2224f3b..376cd7d073 100644 --- a/lib/libc/locale/Symbol.map +++ b/lib/libc/locale/Symbol.map @@ -246,7 +246,10 @@ DFprivate_1.0 { __wcwidth_l; __wrap_setrunelocale; _BIG5_init; - _EUC_init; + _EUC_CN_init; + _EUC_JP_init; + _EUC_KR_init; + _EUC_TW_init; _GB18030_init; _GB2312_init; _GBK_init; @@ -254,7 +257,6 @@ DFprivate_1.0 { _Read_RuneMagi; _UTF8_init; _PathLocale; - _ascii_init; _none_init; _collate_load_tables_l; _collate_lookup; diff --git a/lib/libc/locale/ascii.c b/lib/libc/locale/ascii.c deleted file mode 100644 index 6427de6a29..0000000000 --- a/lib/libc/locale/ascii.c +++ /dev/null @@ -1,192 +0,0 @@ -/*- - * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. - * Copyright (c) 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Paul Borman at Krystal Technologies. - * - * Copyright (c) 2011 The FreeBSD Foundation - * All rights reserved. - * Portions of this software were developed by David Chisnall - * under sponsorship from the FreeBSD Foundation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: head/lib/libc/locale/ascii.c 227753 2011-11-20 14:45:42Z theraven $ - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include "mblocal.h" - -static size_t _ascii_mbrtowc(wchar_t * __restrict, const char * __restrict, - size_t, mbstate_t * __restrict); -static int _ascii_mbsinit(const mbstate_t *); -static size_t _ascii_mbsnrtowcs(wchar_t * __restrict dst, - const char ** __restrict src, size_t nms, size_t len, - mbstate_t * __restrict ps __unused); -static size_t _ascii_wcrtomb(char * __restrict, wchar_t, - mbstate_t * __restrict); -static size_t _ascii_wcsnrtombs(char * __restrict, const wchar_t ** __restrict, - size_t, size_t, mbstate_t * __restrict); - -int -_ascii_init(struct xlocale_ctype *l,_RuneLocale *rl) -{ - - l->__mbrtowc = _ascii_mbrtowc; - l->__mbsinit = _ascii_mbsinit; - l->__mbsnrtowcs = _ascii_mbsnrtowcs; - l->__wcrtomb = _ascii_wcrtomb; - l->__wcsnrtombs = _ascii_wcsnrtombs; - l->runes = rl; - l->__mb_cur_max = 1; - l->__mb_sb_limit = 128; - return(0); -} - -static int -_ascii_mbsinit(const mbstate_t *ps __unused) -{ - - /* - * Encoding is not state dependent - we are always in the - * initial state. - */ - return (1); -} - -static size_t -_ascii_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, - mbstate_t * __restrict ps __unused) -{ - - if (s == NULL) - /* Reset to initial shift state (no-op) */ - return (0); - if (n == 0) - /* Incomplete multibyte sequence */ - return ((size_t)-2); - if (*s & 0x80) { - errno = EILSEQ; - return ((size_t)-1); - } - if (pwc != NULL) - *pwc = (unsigned char)*s; - return (*s == '\0' ? 0 : 1); -} - -static size_t -_ascii_wcrtomb(char * __restrict s, wchar_t wc, - mbstate_t * __restrict ps __unused) -{ - - if (s == NULL) - /* Reset to initial shift state (no-op) */ - return (1); - if (wc < 0 || wc > 127) { - errno = EILSEQ; - return ((size_t)-1); - } - *s = (unsigned char)wc; - return (1); -} - -static size_t -_ascii_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, - size_t nms, size_t len, mbstate_t * __restrict ps __unused) -{ - const char *s; - size_t nchr; - - if (dst == NULL) { - for (s = *src; nms > 0 && *s != '\0'; s++, nms--) { - if (*s & 0x80) { - errno = EILSEQ; - return ((size_t)-1); - } - } - return (s - *src); - } - - s = *src; - nchr = 0; - while (len-- > 0 && nms-- > 0) { - if (*s & 0x80) { - errno = EILSEQ; - return ((size_t)-1); - } - if ((*dst++ = (unsigned char)*s++) == L'\0') { - *src = NULL; - return (nchr); - } - nchr++; - } - *src = s; - return (nchr); -} - -static size_t -_ascii_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, - size_t nwc, size_t len, mbstate_t * __restrict ps __unused) -{ - const wchar_t *s; - size_t nchr; - - if (dst == NULL) { - for (s = *src; nwc > 0 && *s != L'\0'; s++, nwc--) { - if (*s < 0 || *s > 127) { - errno = EILSEQ; - return ((size_t)-1); - } - } - return (s - *src); - } - - s = *src; - nchr = 0; - while (len-- > 0 && nwc-- > 0) { - if (*s < 0 || *s > 127) { - errno = EILSEQ; - return ((size_t)-1); - } - if ((*dst++ = *s++) == '\0') { - *src = NULL; - return (nchr); - } - nchr++; - } - *src = s; - return (nchr); -} - diff --git a/lib/libc/locale/big5.c b/lib/libc/locale/big5.c index 9a24f005ad..dbfeb43966 100644 --- a/lib/libc/locale/big5.c +++ b/lib/libc/locale/big5.c @@ -1,4 +1,6 @@ -/*- +/* + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. @@ -19,11 +21,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -40,10 +38,8 @@ * SUCH DAMAGE. * * @(#)big5.c 8.1 (Berkeley) 6/4/93 - * $FreeBSD: head/lib/libc/locale/big5.c 227753 2011-11-20 14:45:42Z theraven $ */ - #include #include #include @@ -59,6 +55,12 @@ static size_t _BIG5_mbrtowc(wchar_t * __restrict, const char * __restrict, static int _BIG5_mbsinit(const mbstate_t *); static size_t _BIG5_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +static size_t _BIG5_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _BIG5_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); typedef struct { wchar_t ch; @@ -70,6 +72,8 @@ _BIG5_init(struct xlocale_ctype *l, _RuneLocale *rl) l->__mbrtowc = _BIG5_mbrtowc; l->__wcrtomb = _BIG5_wcrtomb; + l->__mbsnrtowcs = _BIG5_mbsnrtowcs; + l->__wcsnrtombs = _BIG5_wcsnrtombs; l->__mbsinit = _BIG5_mbsinit; l->runes = rl; l->__mb_cur_max = 2; @@ -145,7 +149,7 @@ _BIG5_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, wc = (wc << 8) | (*s++ & 0xff); if (pwc != NULL) *pwc = wc; - return (2); + return (2); } else { if (pwc != NULL) *pwc = wc; @@ -176,3 +180,17 @@ _BIG5_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) *s = wc & 0xff; return (1); } + +static size_t +_BIG5_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _BIG5_mbrtowc)); +} + +static size_t +_BIG5_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _BIG5_wcrtomb)); +} diff --git a/lib/libc/locale/euc.c b/lib/libc/locale/euc.c index 1c81bb2840..78af95ae35 100644 --- a/lib/libc/locale/euc.c +++ b/lib/libc/locale/euc.c @@ -1,4 +1,6 @@ -/*- +/* + * Copyright 2013 Garrett D'Amore + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. @@ -19,11 +21,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -40,7 +38,6 @@ * SUCH DAMAGE. * * @(#)euc.c 8.1 (Berkeley) 6/4/93 - * $FreeBSD: head/lib/libc/locale/euc.c 227753 2011-11-20 14:45:42Z theraven $ */ #include @@ -55,17 +52,56 @@ extern int __mb_sb_limit; -static size_t _EUC_mbrtowc(wchar_t * __restrict, const char * __restrict, +static size_t _EUC_mbrtowc_impl(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t); +static size_t _EUC_wcrtomb_impl(char * __restrict, wchar_t, + mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t); + +static size_t _EUC_CN_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); -static int _EUC_mbsinit(const mbstate_t *); -static size_t _EUC_wcrtomb(char * __restrict, wchar_t, +static size_t _EUC_JP_mbrtowc(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict); +static size_t _EUC_KR_mbrtowc(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict); +static size_t _EUC_TW_mbrtowc(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict); + +static size_t _EUC_CN_wcrtomb(char * __restrict, wchar_t, + mbstate_t * __restrict); +static size_t _EUC_JP_wcrtomb(char * __restrict, wchar_t, + mbstate_t * __restrict); +static size_t _EUC_KR_wcrtomb(char * __restrict, wchar_t, + mbstate_t * __restrict); +static size_t _EUC_TW_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); -typedef struct { - int count[4]; - wchar_t bits[4]; - wchar_t mask; -} _EucInfo; +static size_t _EUC_CN_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_JP_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_KR_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_TW_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); + +static size_t _EUC_CN_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_JP_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_KR_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _EUC_TW_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); + +static int _EUC_mbsinit(const mbstate_t *); typedef struct { wchar_t ch; @@ -73,94 +109,218 @@ typedef struct { int want; } _EucState; +static int +_EUC_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((const _EucState *)ps)->want == 0); +} + +/* + * EUC-CN uses CS0, CS1 and CS2 (4 bytes). + */ int -_EUC_init(struct xlocale_ctype *l, _RuneLocale *rl) +_EUC_CN_init(struct xlocale_ctype *l, _RuneLocale *rl) { - _EucInfo *ei; - int x, new__mb_cur_max; - char *v, *e; + l->__mbrtowc = _EUC_CN_mbrtowc; + l->__wcrtomb = _EUC_CN_wcrtomb; + l->__mbsnrtowcs = _EUC_CN_mbsnrtowcs; + l->__wcsnrtombs = _EUC_CN_wcsnrtombs; + l->__mbsinit = _EUC_mbsinit; - if (rl->__variable == NULL) - return (EFTYPE); + l->runes = rl; + l->__mb_cur_max = 4; + l->__mb_sb_limit = 256; + return (0); +} - v = (char *)rl->__variable; +static size_t +_EUC_CN_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps) +{ + return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0)); +} - while (*v == ' ' || *v == '\t') - ++v; +static size_t +_EUC_CN_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc)); +} - if ((ei = malloc(sizeof(_EucInfo))) == NULL) - return (errno == 0 ? ENOMEM : errno); +static size_t +_EUC_CN_wcrtomb(char * __restrict s, wchar_t wc, + mbstate_t * __restrict ps) +{ + return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0)); +} - new__mb_cur_max = 0; - for (x = 0; x < 4; ++x) { - ei->count[x] = (int)strtol(v, &e, 0); - if (v == e || !(v = e)) { - free(ei); - return (EFTYPE); - } - if (new__mb_cur_max < ei->count[x]) - new__mb_cur_max = ei->count[x]; - while (*v == ' ' || *v == '\t') - ++v; - ei->bits[x] = (int)strtol(v, &e, 0); - if (v == e || !(v = e)) { - free(ei); - return (EFTYPE); - } - while (*v == ' ' || *v == '\t') - ++v; - } - ei->mask = (int)strtol(v, &e, 0); - if (v == e || !(v = e)) { - free(ei); - return (EFTYPE); - } - rl->__variable = ei; - rl->__variable_len = sizeof(_EucInfo); - l->runes = rl; - l->__mb_cur_max = new__mb_cur_max; - l->__mbrtowc = _EUC_mbrtowc; - l->__wcrtomb = _EUC_wcrtomb; +static size_t +_EUC_CN_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb)); +} + +/* + * EUC-KR uses only CS0 and CS1. + */ +int +_EUC_KR_init(struct xlocale_ctype *l, _RuneLocale *rl) +{ + l->__mbrtowc = _EUC_KR_mbrtowc; + l->__wcrtomb = _EUC_KR_wcrtomb; + l->__mbsnrtowcs = _EUC_KR_mbsnrtowcs; + l->__wcsnrtombs = _EUC_KR_wcsnrtombs; l->__mbsinit = _EUC_mbsinit; - l->__mb_sb_limit = 256; + + l->runes = rl; + l->__mb_cur_max = 2; + l->__mb_sb_limit = 128; return (0); } -static int -_EUC_mbsinit(const mbstate_t *ps) +static size_t +_EUC_KR_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps) { + return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0)); +} - return (ps == NULL || ((const _EucState *)ps)->want == 0); +static size_t +_EUC_KR_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc)); } -#define CEI ((_EucInfo *)(_CurrentRuneLocale->__variable)) +static size_t +_EUC_KR_wcrtomb(char * __restrict s, wchar_t wc, + mbstate_t * __restrict ps) +{ + return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0)); +} -#define _SS2 0x008e -#define _SS3 0x008f +static size_t +_EUC_KR_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb)); +} -#define GR_BITS 0x80808080 /* XXX: to be fixed */ +/* + * EUC-JP uses CS0, CS1, CS2, and CS3. + */ +int +_EUC_JP_init(struct xlocale_ctype *l, _RuneLocale *rl) +{ + l->__mbrtowc = _EUC_JP_mbrtowc; + l->__wcrtomb = _EUC_JP_wcrtomb; + l->__mbsnrtowcs = _EUC_JP_mbsnrtowcs; + l->__wcsnrtombs = _EUC_JP_wcsnrtombs; + l->__mbsinit = _EUC_mbsinit; -static __inline int -_euc_set(u_int c) + l->runes = rl; + l->__mb_cur_max = 3; + l->__mb_sb_limit = 196; + return (0); +} + +static size_t +_EUC_JP_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps) { + return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3)); +} - c &= 0xff; - return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0); +static size_t +_EUC_JP_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc)); } static size_t -_EUC_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, +_EUC_JP_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) +{ + return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3)); +} + +static size_t +_EUC_JP_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb)); +} + +/* + * EUC-TW uses CS0, CS1, and CS2. + */ +int +_EUC_TW_init(struct xlocale_ctype *l, _RuneLocale *rl) +{ + l->__mbrtowc = _EUC_TW_mbrtowc; + l->__wcrtomb = _EUC_TW_wcrtomb; + l->__mbsnrtowcs = _EUC_TW_mbsnrtowcs; + l->__wcsnrtombs = _EUC_TW_wcsnrtombs; + l->__mbsinit = _EUC_mbsinit; + + l->runes = rl; + l->__mb_cur_max = 4; + l->__mb_sb_limit = 256; + return (0); +} + +static size_t +_EUC_TW_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps) +{ + return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0)); +} + +static size_t +_EUC_TW_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc)); +} + +static size_t +_EUC_TW_wcrtomb(char * __restrict s, wchar_t wc, + mbstate_t * __restrict ps) +{ + return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0)); +} + +static size_t +_EUC_TW_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb)); +} + +/* + * Common EUC code. + */ + +static size_t +_EUC_mbrtowc_impl(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps, + uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width) { _EucState *es; - int i, set, want; + int i, want; wchar_t wc; - const char *os; + unsigned char ch; es = (_EucState *)ps; - if (es->want < 0 || es->want > MB_CUR_MAX || es->set < 0 || - es->set > 3) { + if (es->want < 0 || es->want > MB_CUR_MAX) { errno = EINVAL; return ((size_t)-1); } @@ -175,58 +335,59 @@ _EUC_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, /* Incomplete multibyte sequence */ return ((size_t)-2); - os = s; - if (es->want == 0) { - want = CEI->count[set = _euc_set(*s)]; - if (set == 2 || set == 3) { - --want; - if (--n == 0) { - /* Incomplete multibyte sequence */ - es->set = set; - es->want = want; - es->ch = 0; - return ((size_t)-2); - } - ++s; - if (*s == '\0') { - errno = EILSEQ; - return ((size_t)-1); - } + /* Fast path for plain ASCII (CS0) */ + if (((ch = (unsigned char)*s) & 0x80) == 0) { + if (pwc != NULL) + *pwc = ch; + return (ch != '\0' ? 1 : 0); } - wc = (unsigned char)*s++; + + if (ch >= 0xa1) { + /* CS1 */ + want = 2; + } else if (ch == cs2) { + want = cs2width; + } else if (ch == cs3) { + want = cs3width; + } else { + errno = EILSEQ; + return ((size_t)-1); + } + + + es->want = want; + es->ch = 0; } else { - set = es->set; want = es->want; wc = es->ch; } - for (i = (es->want == 0) ? 1 : 0; i < MIN(want, n); i++) { - if (*s == '\0') { - errno = EILSEQ; - return ((size_t)-1); - } - wc = (wc << 8) | (unsigned char)*s++; + + for (i = 0; i < MIN(want, n); i++) { + wc <<= 8; + wc |= *s; + s++; } if (i < want) { /* Incomplete multibyte sequence */ - es->set = set; es->want = want - i; es->ch = wc; return ((size_t)-2); } - wc = (wc & ~CEI->mask) | CEI->bits[set]; if (pwc != NULL) *pwc = wc; es->want = 0; - return (wc == L'\0' ? 0 : s - os); + return (wc == L'\0' ? 0 : want); } static size_t -_EUC_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) +_EUC_wcrtomb_impl(char * __restrict s, wchar_t wc, + mbstate_t * __restrict ps, + uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width) { _EucState *es; - wchar_t m, nm; int i, len; + wchar_t nm; es = (_EucState *)ps; @@ -239,34 +400,52 @@ _EUC_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) /* Reset to initial shift state (no-op) */ return (1); - m = wc & CEI->mask; - nm = wc & ~m; + if ((wc & ~0x7f) == 0) { + /* Fast path for plain ASCII (CS0) */ + *s = (char)wc; + return (1); + } - if (m == CEI->bits[1]) { -CodeSet1: - /* Codeset 1: The first byte must have 0x80 in it. */ - i = len = CEI->count[1]; - while (i-- > 0) - *s++ = (nm >> (i << 3)) | 0x80; + /* Determine the "length" */ + if ((unsigned)wc > 0xffffff) { + len = 4; + } else if ((unsigned)wc > 0xffff) { + len = 3; + } else if ((unsigned)wc > 0xff) { + len = 2; } else { - if (m == CEI->bits[0]) - i = len = CEI->count[0]; - else if (m == CEI->bits[2]) { - i = len = CEI->count[2]; - *s++ = _SS2; - --i; - /* SS2 designates G2 into GR */ - nm |= GR_BITS; - } else if (m == CEI->bits[3]) { - i = len = CEI->count[3]; - *s++ = _SS3; - --i; - /* SS3 designates G3 into GR */ - nm |= GR_BITS; - } else - goto CodeSet1; /* Bletch */ - while (i-- > 0) - *s++ = (nm >> (i << 3)) & 0xff; + len = 1; + } + + if (len > MB_CUR_MAX) { + errno = EILSEQ; + return ((size_t)-1); + } + + /* This first check excludes CS1, which is implicitly valid. */ + if ((wc < 0xa100) || (wc > 0xffff)) { + /* Check for valid CS2 or CS3 */ + nm = (wc >> ((len - 1) * 8)); + if (nm == cs2) { + if (len != cs2width) { + errno = EILSEQ; + return ((size_t)-1); + } + } else if (nm == cs3) { + if (len != cs3width) { + errno = EILSEQ; + return ((size_t)-1); + } + } else { + errno = EILSEQ; + return ((size_t)-1); + } + } + + /* Stash the bytes, least significant last */ + for (i = len - 1; i >= 0; i--) { + s[i] = (wc & 0xff); + wc >>= 8; } return (len); } diff --git a/lib/libc/locale/gb18030.c b/lib/libc/locale/gb18030.c index b8f7e9820c..ad5103405a 100644 --- a/lib/libc/locale/gb18030.c +++ b/lib/libc/locale/gb18030.c @@ -1,4 +1,6 @@ -/*- +/* + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins * All rights reserved. * @@ -27,9 +29,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD: head/lib/libc/locale/gb18030.c 227753 2011-11-20 14:45:42Z theraven $ */ + /* * PRC National Standard GB 18030-2000 encoding of Chinese text. * @@ -50,6 +51,13 @@ static size_t _GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict, static int _GB18030_mbsinit(const mbstate_t *); static size_t _GB18030_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +static size_t _GB18030_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _GB18030_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); + typedef struct { int count; @@ -63,6 +71,8 @@ _GB18030_init(struct xlocale_ctype *l, _RuneLocale *rl) l->__mbrtowc = _GB18030_mbrtowc; l->__wcrtomb = _GB18030_wcrtomb; l->__mbsinit = _GB18030_mbsinit; + l->__mbsnrtowcs = _GB18030_mbsnrtowcs; + l->__wcsnrtombs = _GB18030_wcsnrtombs; l->runes = rl; l->__mb_cur_max = 4; l->__mb_sb_limit = 128; @@ -100,7 +110,7 @@ _GB18030_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, } ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count); - memcpy(gs->bytes + gs->count, s, ncopy); + (void) memcpy(gs->bytes + gs->count, s, ncopy); ocount = gs->count; gs->count += ncopy; s = (char *)gs->bytes; @@ -223,3 +233,19 @@ ilseq: errno = EILSEQ; return ((size_t)-1); } + +static size_t +_GB18030_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, size_t nms, size_t len, + mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GB18030_mbrtowc)); +} + +static size_t +_GB18030_wcsnrtombs(char * __restrict dst, + const wchar_t ** __restrict src, size_t nwc, size_t len, + mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GB18030_wcrtomb)); +} diff --git a/lib/libc/locale/gb2312.c b/lib/libc/locale/gb2312.c index 39feefbbbc..5075fddeb0 100644 --- a/lib/libc/locale/gb2312.c +++ b/lib/libc/locale/gb2312.c @@ -1,4 +1,6 @@ -/*- +/* + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2004 Tim J. Robbins. All rights reserved. * Copyright (c) 2003 David Xu * All rights reserved. @@ -28,8 +30,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD: head/lib/libc/locale/gb2312.c 227753 2011-11-20 14:45:42Z theraven $ */ #include @@ -46,6 +46,13 @@ static size_t _GB2312_mbrtowc(wchar_t * __restrict, const char * __restrict, static int _GB2312_mbsinit(const mbstate_t *); static size_t _GB2312_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +static size_t _GB2312_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _GB2312_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); + typedef struct { int count; @@ -60,6 +67,8 @@ _GB2312_init(struct xlocale_ctype *l, _RuneLocale *rl) l->__mbrtowc = _GB2312_mbrtowc; l->__wcrtomb = _GB2312_wcrtomb; l->__mbsinit = _GB2312_mbsinit; + l->__mbsnrtowcs = _GB2312_mbsnrtowcs; + l->__wcsnrtombs = _GB2312_wcsnrtombs; l->__mb_cur_max = 2; l->__mb_sb_limit = 128; return (0); @@ -72,7 +81,7 @@ _GB2312_mbsinit(const mbstate_t *ps) return (ps == NULL || ((const _GB2312State *)ps)->count == 0); } -static __inline int +static int _GB2312_check(const char *str, size_t n) { const u_char *s = (const u_char *)str; @@ -91,7 +100,7 @@ _GB2312_check(const char *str, size_t n) } else if (s[0] & 0x80) { /* Invalid multibyte sequence */ return (-1); - } + } return (1); } @@ -118,7 +127,7 @@ _GB2312_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, } ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count); - memcpy(gs->bytes + gs->count, s, ncopy); + (void) memcpy(gs->bytes + gs->count, s, ncopy); ocount = gs->count; gs->count += ncopy; s = (char *)gs->bytes; @@ -159,3 +168,19 @@ _GB2312_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) *s = wc & 0xff; return (1); } + +static size_t +_GB2312_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, size_t nms, size_t len, + mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GB2312_mbrtowc)); +} + +static size_t +_GB2312_wcsnrtombs(char * __restrict dst, + const wchar_t ** __restrict src, size_t nwc, size_t len, + mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GB2312_wcrtomb)); +} diff --git a/lib/libc/locale/gbk.c b/lib/libc/locale/gbk.c index a420b60db2..646be5a052 100644 --- a/lib/libc/locale/gbk.c +++ b/lib/libc/locale/gbk.c @@ -1,4 +1,6 @@ -/*- +/* + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. @@ -34,11 +36,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD: head/lib/libc/locale/gbk.c 227753 2011-11-20 14:45:42Z theraven $ */ - #include #include #include @@ -54,6 +53,12 @@ static size_t _GBK_mbrtowc(wchar_t * __restrict, const char * __restrict, static int _GBK_mbsinit(const mbstate_t *); static size_t _GBK_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +static size_t _GBK_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _GBK_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); typedef struct { wchar_t ch; @@ -66,6 +71,8 @@ _GBK_init(struct xlocale_ctype *l, _RuneLocale *rl) l->__mbrtowc = _GBK_mbrtowc; l->__wcrtomb = _GBK_wcrtomb; l->__mbsinit = _GBK_mbsinit; + l->__mbsnrtowcs = _GBK_mbsnrtowcs; + l->__wcsnrtombs = _GBK_wcsnrtombs; l->runes = rl; l->__mb_cur_max = 2; l->__mb_sb_limit = 128; @@ -79,7 +86,7 @@ _GBK_mbsinit(const mbstate_t *ps) return (ps == NULL || ((const _GBKState *)ps)->ch == 0); } -static __inline int +static int _gbk_check(u_int c) { @@ -140,7 +147,7 @@ _GBK_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, wc = (wc << 8) | (*s++ & 0xff); if (pwc != NULL) *pwc = wc; - return (2); + return (2); } else { if (pwc != NULL) *pwc = wc; @@ -171,3 +178,17 @@ _GBK_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) *s = wc & 0xff; return (1); } + +static size_t +_GBK_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GBK_mbrtowc)); +} + +static size_t +_GBK_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GBK_wcrtomb)); +} diff --git a/lib/libc/locale/mblocal.h b/lib/libc/locale/mblocal.h index f3512cbc5c..b93bf7f432 100644 --- a/lib/libc/locale/mblocal.h +++ b/lib/libc/locale/mblocal.h @@ -1,4 +1,6 @@ -/*- +/* + * Copyright 2013 Garrett D'Amore + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2004 Tim J. Robbins. * All rights reserved. * @@ -27,8 +29,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD: head/lib/libc/locale/mblocal.h 227753 2011-11-20 14:45:42Z theraven $ */ #ifndef _MBLOCAL_H_ @@ -37,6 +37,8 @@ #include #include "xlocale_private.h" +#define SS2 0x008e +#define SS3 0x008f /* * Conversion function pointers for current encoding. @@ -62,18 +64,24 @@ extern struct xlocale_ctype __xlocale_global_ctype; * Rune initialization function prototypes. */ int _none_init(struct xlocale_ctype *, _RuneLocale *); -int _ascii_init(struct xlocale_ctype *, _RuneLocale *); int _UTF8_init(struct xlocale_ctype *, _RuneLocale *); -int _EUC_init(struct xlocale_ctype *, _RuneLocale *); +int _EUC_CN_init(struct xlocale_ctype *, _RuneLocale *); +int _EUC_JP_init(struct xlocale_ctype *, _RuneLocale *); +int _EUC_KR_init(struct xlocale_ctype *, _RuneLocale *); +int _EUC_TW_init(struct xlocale_ctype *, _RuneLocale *); int _GB18030_init(struct xlocale_ctype *, _RuneLocale *); int _GB2312_init(struct xlocale_ctype *, _RuneLocale *); int _GBK_init(struct xlocale_ctype *, _RuneLocale *); int _BIG5_init(struct xlocale_ctype *, _RuneLocale *); int _MSKanji_init(struct xlocale_ctype *, _RuneLocale *); -extern size_t __mbsnrtowcs_std(wchar_t * __restrict, const char ** __restrict, - size_t, size_t, mbstate_t * __restrict); -extern size_t __wcsnrtombs_std(char * __restrict, const wchar_t ** __restrict, - size_t, size_t, mbstate_t * __restrict); +typedef size_t (*mbrtowc_pfn_t)(wchar_t * __restrict, + const char * __restrict, size_t, mbstate_t * __restrict); +typedef size_t (*wcrtomb_pfn_t)(char * __restrict, wchar_t, + mbstate_t * __restrict); +size_t __mbsnrtowcs_std(wchar_t * __restrict, const char ** __restrict, + size_t, size_t, mbstate_t * __restrict, mbrtowc_pfn_t); +size_t __wcsnrtombs_std(char * __restrict, const wchar_t ** __restrict, + size_t, size_t, mbstate_t * __restrict, wcrtomb_pfn_t); #endif /* _MBLOCAL_H_ */ diff --git a/lib/libc/locale/mbsnrtowcs.c b/lib/libc/locale/mbsnrtowcs.c index d4f07646c3..241168b84f 100644 --- a/lib/libc/locale/mbsnrtowcs.c +++ b/lib/libc/locale/mbsnrtowcs.c @@ -1,4 +1,6 @@ -/*- +/* + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. * * Copyright (c) 2011 The FreeBSD Foundation @@ -27,11 +29,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD: head/lib/libc/locale/mbsnrtowcs.c 227753 2011-11-20 14:45:42Z theraven $ */ - #include #include #include @@ -56,20 +55,20 @@ mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, size_t __mbsnrtowcs_std(wchar_t * __restrict dst, const char ** __restrict src, - size_t nms, size_t len, mbstate_t * __restrict ps) + size_t nms, size_t len, mbstate_t * __restrict ps, + mbrtowc_pfn_t pmbrtowc) { const char *s; size_t nchr; wchar_t wc; size_t nb; - struct xlocale_ctype *ct = XLOCALE_CTYPE(__get_locale()); s = *src; nchr = 0; if (dst == NULL) { for (;;) { - if ((nb = ct->__mbrtowc(&wc, s, nms, ps)) == (size_t)-1) + if ((nb = pmbrtowc(&wc, s, nms, ps)) == (size_t)-1) /* Invalid sequence - mbrtowc() sets errno. */ return ((size_t)-1); else if (nb == 0 || nb == (size_t)-2) @@ -82,7 +81,7 @@ __mbsnrtowcs_std(wchar_t * __restrict dst, const char ** __restrict src, } while (len-- > 0) { - if ((nb = ct->__mbrtowc(dst, s, nms, ps)) == (size_t)-1) { + if ((nb = pmbrtowc(dst, s, nms, ps)) == (size_t)-1) { *src = s; return ((size_t)-1); } else if (nb == (size_t)-2) { diff --git a/lib/libc/locale/mskanji.c b/lib/libc/locale/mskanji.c index 60172b3da2..78860ddc41 100644 --- a/lib/libc/locale/mskanji.c +++ b/lib/libc/locale/mskanji.c @@ -1,4 +1,6 @@ /* + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * * ja_JP.SJIS locale table for BSD4.4/rune @@ -28,17 +30,16 @@ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)mskanji.c 1.0 (Phase One) 5/5/95 - * $FreeBSD: head/lib/libc/locale/mskanji.c 227753 2011-11-20 14:45:42Z theraven $ */ @@ -57,6 +58,12 @@ static size_t _MSKanji_mbrtowc(wchar_t * __restrict, const char * __restrict, static int _MSKanji_mbsinit(const mbstate_t *); static size_t _MSKanji_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +static size_t _MSKanji_mbsnrtowcs(wchar_t * __restrict, + const char ** __restrict, size_t, size_t, + mbstate_t * __restrict); +static size_t _MSKanji_wcsnrtombs(char * __restrict, + const wchar_t ** __restrict, size_t, size_t, + mbstate_t * __restrict); typedef struct { wchar_t ch; @@ -68,6 +75,8 @@ _MSKanji_init(struct xlocale_ctype *l, _RuneLocale *rl) l->__mbrtowc = _MSKanji_mbrtowc; l->__wcrtomb = _MSKanji_wcrtomb; + l->__mbsnrtowcs = _MSKanji_mbsnrtowcs; + l->__wcsnrtombs = _MSKanji_wcsnrtombs; l->__mbsinit = _MSKanji_mbsinit; l->runes = rl; l->__mb_cur_max = 2; @@ -161,3 +170,19 @@ _MSKanji_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) *s++ = wc >> (i << 3); return (len); } + +static size_t +_MSKanji_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, size_t nms, + size_t len, mbstate_t * __restrict ps) +{ + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _MSKanji_mbrtowc)); +} + +static size_t +_MSKanji_wcsnrtombs(char * __restrict dst, + const wchar_t ** __restrict src, size_t nwc, + size_t len, mbstate_t * __restrict ps) +{ + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _MSKanji_wcrtomb)); +} diff --git a/lib/libc/locale/none.c b/lib/libc/locale/none.c index fe167b9db0..d2bceab51b 100644 --- a/lib/libc/locale/none.c +++ b/lib/libc/locale/none.c @@ -1,4 +1,6 @@ -/*- +/* + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. @@ -36,10 +38,8 @@ * SUCH DAMAGE. * * @(#)none.c 8.1 (Berkeley) 6/4/93 - * $FreeBSD: head/lib/libc/locale/none.c 227753 2011-11-20 14:45:42Z theraven $ */ - #include #include #include @@ -185,16 +185,6 @@ _none_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, /* setup defaults */ -size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, - mbstate_t * __restrict) = _none_mbrtowc; -int (*__mbsinit)(const mbstate_t *) = _none_mbsinit; -size_t (*__mbsnrtowcs)(wchar_t * __restrict, const char ** __restrict, - size_t, size_t, mbstate_t * __restrict) = _none_mbsnrtowcs; -size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict) = - _none_wcrtomb; -size_t (*__wcsnrtombs)(char * __restrict, const wchar_t ** __restrict, - size_t, size_t, mbstate_t * __restrict) = _none_wcsnrtombs; - struct xlocale_ctype __xlocale_global_ctype = { {{0}, "C"}, (_RuneLocale*)&_DefaultRuneLocale, diff --git a/lib/libc/locale/rune.c b/lib/libc/locale/rune.c index 8b278463a5..1271322824 100644 --- a/lib/libc/locale/rune.c +++ b/lib/libc/locale/rune.c @@ -1,4 +1,6 @@ -/*- +/* + * Copyright 2014 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * @@ -30,12 +32,10 @@ * SUCH DAMAGE. * * @(#)rune.c 8.1 (Berkeley) 6/4/93 - * $FreeBSD: head/lib/libc/locale/rune.c 165903 2007-01-09 00:28:16Z imp $ */ #include "namespace.h" -#include #include #include #include @@ -43,14 +43,15 @@ #include #include #include +#include +#include +#include #include "un-namespace.h" #include "runefile.h" -_RuneLocale *_Read_RuneMagi(FILE *); - _RuneLocale * -_Read_RuneMagi(FILE *fp) +_Read_RuneMagi(const char *fname) { char *fdata, *data; void *lastp; @@ -65,119 +66,77 @@ _Read_RuneMagi(FILE *fp) _FileRuneEntry *maplower_ext_ranges; _FileRuneEntry *mapupper_ext_ranges; int runetype_ext_len = 0; + int fd; - if (_fstat(fileno(fp), &sb) < 0) + if ((fd = _open(fname, O_RDONLY)) < 0) { + errno = EINVAL; return (NULL); + } - if ((size_t)sb.st_size < sizeof(_FileRuneLocale)) { - errno = EFTYPE; + if (_fstat(fd, &sb) < 0) { + (void) _close(fd); + errno = EINVAL; return (NULL); } - if ((fdata = malloc(sb.st_size)) == NULL) - return (NULL); - - errno = 0; - rewind(fp); /* Someone might have read the magic number once already */ - if (errno) { - saverr = errno; - free(fdata); - errno = saverr; + if ((size_t)sb.st_size < sizeof (_FileRuneLocale)) { + (void) _close(fd); + errno = EINVAL; return (NULL); } - if (fread(fdata, sb.st_size, 1, fp) != 1) { - saverr = errno; - free(fdata); - errno = saverr; + + fdata = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + (void) _close(fd); + if (fdata == NULL) { + errno = EINVAL; return (NULL); } - frl = (_FileRuneLocale *)fdata; + frl = (_FileRuneLocale *)(void *)fdata; lastp = fdata + sb.st_size; variable = frl + 1; - if (memcmp(frl->magic, _FILE_RUNE_MAGIC_1, sizeof(frl->magic))) { - free(fdata); - errno = EFTYPE; - return (NULL); - } - - frl->variable_len = ntohl(frl->variable_len); - frl->runetype_ext_nranges = ntohl(frl->runetype_ext_nranges); - frl->maplower_ext_nranges = ntohl(frl->maplower_ext_nranges); - frl->mapupper_ext_nranges = ntohl(frl->mapupper_ext_nranges); - - for (x = 0; x < _CACHED_RUNES; ++x) { - frl->runetype[x] = ntohl(frl->runetype[x]); - frl->maplower[x] = ntohl(frl->maplower[x]); - frl->mapupper[x] = ntohl(frl->mapupper[x]); + if (memcmp(frl->magic, _FILE_RUNE_MAGIC_1, sizeof (frl->magic))) { + goto invalid; } runetype_ext_ranges = (_FileRuneEntry *)variable; variable = runetype_ext_ranges + frl->runetype_ext_nranges; if (variable > lastp) { - free(fdata); - errno = EFTYPE; - return (NULL); + goto invalid; } maplower_ext_ranges = (_FileRuneEntry *)variable; variable = maplower_ext_ranges + frl->maplower_ext_nranges; if (variable > lastp) { - free(fdata); - errno = EFTYPE; - return (NULL); + goto invalid; } mapupper_ext_ranges = (_FileRuneEntry *)variable; variable = mapupper_ext_ranges + frl->mapupper_ext_nranges; if (variable > lastp) { - free(fdata); - errno = EFTYPE; - return (NULL); + goto invalid; } frr = runetype_ext_ranges; for (x = 0; x < frl->runetype_ext_nranges; ++x) { uint32_t *types; - frr[x].min = ntohl(frr[x].min); - frr[x].max = ntohl(frr[x].max); - frr[x].map = ntohl(frr[x].map); if (frr[x].map == 0) { int len = frr[x].max - frr[x].min + 1; types = variable; variable = types + len; runetype_ext_len += len; if (variable > lastp) { - free(fdata); - errno = EFTYPE; - return (NULL); + goto invalid; } - while (len-- > 0) - types[len] = ntohl(types[len]); } } - frr = maplower_ext_ranges; - for (x = 0; x < frl->maplower_ext_nranges; ++x) { - frr[x].min = ntohl(frr[x].min); - frr[x].max = ntohl(frr[x].max); - frr[x].map = ntohl(frr[x].map); - } - - frr = mapupper_ext_ranges; - for (x = 0; x < frl->mapupper_ext_nranges; ++x) { - frr[x].min = ntohl(frr[x].min); - frr[x].max = ntohl(frr[x].max); - frr[x].map = ntohl(frr[x].map); - } if ((char *)variable + frl->variable_len > (char *)lastp) { - free(fdata); - errno = EFTYPE; - return (NULL); + goto invalid; } /* @@ -190,17 +149,16 @@ _Read_RuneMagi(FILE *fp) frl->variable_len); if (data == NULL) { saverr = errno; - free(fdata); + (void) munmap(fdata, sb.st_size); errno = saverr; return (NULL); } - rl = (_RuneLocale *)data; + rl = (_RuneLocale *)(void *)data; rl->__variable = rl + 1; - memcpy(rl->__magic, _RUNE_MAGIC_1, sizeof(rl->__magic)); - memcpy(rl->__encoding, frl->encoding, sizeof(rl->__encoding)); - rl->__invalid_rune = 0; + (void) memcpy(rl->__magic, _RUNE_MAGIC_1, sizeof (rl->__magic)); + (void) memcpy(rl->__encoding, frl->encoding, sizeof (rl->__encoding)); rl->__variable_len = frl->variable_len; rl->__runetype_ext.__nranges = frl->runetype_ext_nranges; @@ -262,8 +220,8 @@ _Read_RuneMagi(FILE *fp) rr[x].__map = frr[x].map; } - memcpy(rl->__variable, variable, rl->__variable_len); - free(fdata); + (void) memcpy(rl->__variable, variable, rl->__variable_len); + (void) munmap(fdata, sb.st_size); /* * Go out and zero pointers that should be zero. @@ -281,4 +239,9 @@ _Read_RuneMagi(FILE *fp) rl->__mapupper_ext.__ranges = NULL; return (rl); + +invalid: + (void) munmap(fdata, sb.st_size); + errno = EINVAL; + return (NULL); } diff --git a/lib/libc/locale/setrunelocale.c b/lib/libc/locale/setrunelocale.c index b2b6452d64..58124fba87 100644 --- a/lib/libc/locale/setrunelocale.c +++ b/lib/libc/locale/setrunelocale.c @@ -63,7 +63,7 @@ __thread const _RuneLocale *_ThreadRuneLocale; extern int __mb_sb_limit; -extern _RuneLocale *_Read_RuneMagi(FILE *); +extern _RuneLocale *_Read_RuneMagi(const char *); static int __setrunelocale(struct xlocale_ctype *l, const char *); @@ -72,8 +72,6 @@ destruct_ctype(void *v) { struct xlocale_ctype *l = v; - if (strcmp(l->runes->__encoding, "EUC") == 0) - free(l->runes->__variable); if (&_DefaultRuneLocale != l->runes) free(l->runes); free(l); @@ -89,12 +87,7 @@ __getCurrentRuneLocale(void) static void free_runes(_RuneLocale *rl) { - - /* FIXME: The "EUC" check here is a hideous abstraction violation. */ if ((rl != &_DefaultRuneLocale) && (rl)) { - if (strcmp(rl->__encoding, "EUC") == 0) { - free(rl->__variable); - } free(rl); } } @@ -102,10 +95,9 @@ free_runes(_RuneLocale *rl) static int __setrunelocale(struct xlocale_ctype *l, const char *encoding) { - FILE *fp; - char name[PATH_MAX]; _RuneLocale *rl; - int saverr, ret; + int ret; + char path[PATH_MAX]; struct xlocale_ctype saved = *l; /* @@ -118,37 +110,34 @@ __setrunelocale(struct xlocale_ctype *l, const char *encoding) } /* Range checking not needed, encoding length already checked before */ - (void) strcpy(name, _PathLocale); - (void) strcat(name, "/"); - (void) strcat(name, encoding); - (void) strcat(name, "/LC_CTYPE"); - - if ((fp = fopen(name, "re")) == NULL) - return (errno == 0 ? ENOENT : errno); - - if ((rl = _Read_RuneMagi(fp)) == NULL) { - saverr = (errno == 0 ? EFTYPE : errno); - (void)fclose(fp); - return (saverr); + (void) snprintf(path, sizeof (path), "%s/%s/LC_CTYPE", + _PathLocale, encoding); + + if ((rl = _Read_RuneMagi(path)) == NULL) { + errno = EINVAL; + return (errno); } - (void)fclose(fp); l->__mbrtowc = NULL; l->__mbsinit = NULL; - l->__mbsnrtowcs = __mbsnrtowcs_std; + l->__mbsnrtowcs = NULL; l->__wcrtomb = NULL; - l->__wcsnrtombs = __wcsnrtombs_std; + l->__wcsnrtombs = NULL; rl->__sputrune = NULL; rl->__sgetrune = NULL; if (strcmp(rl->__encoding, "NONE") == 0) ret = _none_init(l, rl); - else if (strcmp(rl->__encoding, "ASCII") == 0) - ret = _ascii_init(l, rl); else if (strcmp(rl->__encoding, "UTF-8") == 0) ret = _UTF8_init(l, rl); - else if (strcmp(rl->__encoding, "EUC") == 0) - ret = _EUC_init(l, rl); + else if (strcmp(rl->__encoding, "EUC-CN") == 0) + ret = _EUC_CN_init(l, rl); + else if (strcmp(rl->__encoding, "EUC-JP") == 0) + ret = _EUC_JP_init(l, rl); + else if (strcmp(rl->__encoding, "EUC-KR") == 0) + ret = _EUC_KR_init(l, rl); + else if (strcmp(rl->__encoding, "EUC-TW") == 0) + ret = _EUC_TW_init(l, rl); else if (strcmp(rl->__encoding, "GB18030") == 0) ret = _GB18030_init(l, rl); else if (strcmp(rl->__encoding, "GB2312") == 0) diff --git a/lib/libc/locale/utf8.c b/lib/libc/locale/utf8.c index 629f72f9aa..88e0c62e98 100644 --- a/lib/libc/locale/utf8.c +++ b/lib/libc/locale/utf8.c @@ -1,4 +1,6 @@ -/*- +/* + * Copyright 2013 Garrett D'Amore + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins * All rights reserved. * @@ -27,8 +29,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD: head/lib/libc/locale/utf8.c 227753 2011-11-20 14:45:42Z theraven $ */ #include @@ -70,7 +70,7 @@ _UTF8_init(struct xlocale_ctype *l, _RuneLocale *rl) l->__mbsnrtowcs = _UTF8_mbsnrtowcs; l->__wcsnrtombs = _UTF8_wcsnrtombs; l->runes = rl; - l->__mb_cur_max = 6; + l->__mb_cur_max = 4; /* * UCS-4 encoding used as the internal representation, so * slots 0x0080-0x00FF are occuped and must be excluded @@ -113,13 +113,6 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, /* Incomplete multibyte sequence */ return ((size_t)-2); - if (us->want == 0 && ((ch = (unsigned char)*s) & ~0x7f) == 0) { - /* Fast path for plain ASCII characters. */ - if (pwc != NULL) - *pwc = ch; - return (ch != '\0' ? 1 : 0); - } - if (us->want == 0) { /* * Determine the number of octets that make up this character @@ -135,10 +128,12 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, */ ch = (unsigned char)*s; if ((ch & 0x80) == 0) { - mask = 0x7f; - want = 1; - lbound = 0; - } else if ((ch & 0xe0) == 0xc0) { + /* Fast path for plain ASCII characters. */ + if (pwc != NULL) + *pwc = ch; + return (ch != '\0' ? 1 : 0); + } + if ((ch & 0xe0) == 0xc0) { mask = 0x1f; want = 2; lbound = 0x80; @@ -150,6 +145,9 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mask = 0x07; want = 4; lbound = 0x10000; +#if 0 + /* These would be illegal in the UTF-8 space */ + } else if ((ch & 0xfc) == 0xf8) { mask = 0x03; want = 5; @@ -158,6 +156,7 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mask = 0x01; want = 6; lbound = 0x4000000; +#endif } else { /* * Malformed input; input is not UTF-8. @@ -178,6 +177,7 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, wch = (unsigned char)*s++ & mask; else wch = us->ch; + for (i = (us->want == 0) ? 1 : 0; i < MIN(want, n); i++) { if ((*s & 0xc0) != 0x80) { /* @@ -310,12 +310,6 @@ _UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) /* Reset to initial shift state (no-op) */ return (1); - if ((wc & ~0x7f) == 0) { - /* Fast path for plain ASCII characters. */ - *s = (char)wc; - return (1); - } - /* * Determine the number of octets needed to represent this character. * We always output the shortest sequence possible. Also specify the @@ -323,8 +317,9 @@ _UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) * about the sequence length. */ if ((wc & ~0x7f) == 0) { - lead = 0; - len = 1; + /* Fast path for plain ASCII characters. */ + *s = (char)wc; + return (1); } else if ((wc & ~0x7ff) == 0) { lead = 0xc0; len = 2; @@ -334,12 +329,15 @@ _UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) } else if ((wc & ~0x1fffff) == 0) { lead = 0xf0; len = 4; +#if 0 + /* Again, 5 and 6 byte encodings are simply not permitted */ } else if ((wc & ~0x3ffffff) == 0) { lead = 0xf8; len = 5; } else if ((wc & ~0x7fffffff) == 0) { lead = 0xfc; len = 6; +#endif } else { errno = EILSEQ; return ((size_t)-1); @@ -419,7 +417,7 @@ _UTF8_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, if (nb > (int)len) /* MB sequence for character won't fit. */ break; - memcpy(dst, buf, nb); + (void) memcpy(dst, buf, nb); } if (*s == L'\0') { *src = NULL; diff --git a/lib/libc/locale/wcsnrtombs.c b/lib/libc/locale/wcsnrtombs.c index a0116b1d56..da26f45179 100644 --- a/lib/libc/locale/wcsnrtombs.c +++ b/lib/libc/locale/wcsnrtombs.c @@ -1,4 +1,6 @@ -/*- +/* + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002-2004 Tim J. Robbins. * All rights reserved. * @@ -27,11 +29,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD: head/lib/libc/locale/wcsnrtombs.c 227753 2011-11-20 14:45:42Z theraven $ */ - #include #include #include @@ -57,21 +56,21 @@ wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, size_t nwc, size_t __wcsnrtombs_std(char * __restrict dst, const wchar_t ** __restrict src, - size_t nwc, size_t len, mbstate_t * __restrict ps) + size_t nwc, size_t len, mbstate_t * __restrict ps, + wcrtomb_pfn_t pwcrtomb) { mbstate_t mbsbak; char buf[MB_LEN_MAX]; const wchar_t *s; size_t nbytes; size_t nb; - struct xlocale_ctype *l = XLOCALE_CTYPE(__get_locale()); s = *src; nbytes = 0; if (dst == NULL) { while (nwc-- > 0) { - if ((nb = l->__wcrtomb(buf, *s, ps)) == (size_t)-1) + if ((nb = pwcrtomb(buf, *s, ps)) == (size_t)-1) /* Invalid character - wcrtomb() sets errno. */ return ((size_t)-1); else if (*s == L'\0') @@ -85,7 +84,7 @@ __wcsnrtombs_std(char * __restrict dst, const wchar_t ** __restrict src, while (len > 0 && nwc-- > 0) { if (len > (size_t)MB_CUR_MAX) { /* Enough space to translate in-place. */ - if ((nb = l->__wcrtomb(dst, *s, ps)) == (size_t)-1) { + if ((nb = pwcrtomb(dst, *s, ps)) == (size_t)-1) { *src = s; return ((size_t)-1); } @@ -98,7 +97,7 @@ __wcsnrtombs_std(char * __restrict dst, const wchar_t ** __restrict src, * character is too long for the buffer. */ mbsbak = *ps; - if ((nb = l->__wcrtomb(buf, *s, ps)) == (size_t)-1) { + if ((nb = pwcrtomb(buf, *s, ps)) == (size_t)-1) { *src = s; return ((size_t)-1); } @@ -107,7 +106,7 @@ __wcsnrtombs_std(char * __restrict dst, const wchar_t ** __restrict src, *ps = mbsbak; break; } - memcpy(dst, buf, nb); + (void) memcpy(dst, buf, nb); } if (*s == L'\0') { *src = NULL; -- 2.41.0