From: Sascha Wildner Date: Sun, 28 Aug 2016 09:25:45 +0000 (+0200) Subject: kernel/libiconv: Sync with FreeBSD. X-Git-Tag: v4.8.0rc~1158 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/4eb357794dc2a9881179b541e6793672f216cdac kernel/libiconv: Sync with FreeBSD. --- diff --git a/sys/conf/files b/sys/conf/files index f29127ce59..2a7b672ea8 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1943,6 +1943,7 @@ libkern/strtouq.c standard #libkern/stack_protector.c standard libiconv/iconv.c optional libiconv libiconv/iconv_converter_if.m optional libiconv +libiconv/iconv_ucs.c optional libiconv libiconv/iconv_xlat.c optional libiconv libiconv/iconv_xlat16.c optional libiconv libprop/prop_array.c standard diff --git a/sys/libiconv/Makefile b/sys/libiconv/Makefile index 73b2fc247f..070aa1cabf 100644 --- a/sys/libiconv/Makefile +++ b/sys/libiconv/Makefile @@ -1,13 +1,14 @@ # $FreeBSD: src/sys/modules/libiconv/Makefile,v 1.4 2003/09/26 20:26:24 fjoe Exp $ -.PATH: ${.CURDIR}/../libkern ${.CURDIR}/../sys +.PATH: ${.CURDIR}/../sys KMOD= libiconv -SRCS= iconv.c iconv_xlat.c iconv_xlat16.c +SRCS= iconv.c iconv_ucs.c iconv_xlat.c iconv_xlat16.c SRCS+= iconv.h SRCS+= iconv_converter_if.c iconv_converter_if.h -EXPORT_SYMS= iconv_open \ +EXPORT_SYMS= iconv_add \ + iconv_open \ iconv_close \ iconv_conv \ iconv_conv_case \ diff --git a/sys/libiconv/iconv.c b/sys/libiconv/iconv.c index 2c172d1521..970d5beb6b 100644 --- a/sys/libiconv/iconv.c +++ b/sys/libiconv/iconv.c @@ -1,5 +1,5 @@ -/* - * Copyright (c) 2000-2001, Boris Popov +/*- + * Copyright (c) 2000-2001 Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -10,12 +10,6 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Boris Popov. - * 4. Neither the name of the author nor the names of any co-contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE @@ -29,17 +23,18 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/libkern/iconv.c,v 1.12.2.1.2.1 2009/04/15 03:14:26 kensmith Exp $ - * $DragonFly: src/sys/libiconv/iconv.c,v 1.8 2008/01/05 14:02:38 swildner Exp $ + * $FreeBSD: head/sys/libkern/iconv.c 267291 2014-06-09 19:27:47Z jhb $ */ #include #include #include #include +#include #include #include #include + #include "iconv_converter_if.h" SYSCTL_DECL(_kern_iconv); @@ -47,10 +42,12 @@ SYSCTL_DECL(_kern_iconv); SYSCTL_NODE(_kern, OID_AUTO, iconv, CTLFLAG_RW, NULL, "kernel iconv interface"); MALLOC_DEFINE(M_ICONV, "iconv", "ICONV structures"); -MALLOC_DEFINE(M_ICONVDATA, "iconv_data", "ICONV data"); +static MALLOC_DEFINE(M_ICONVDATA, "iconv_data", "ICONV data"); MODULE_VERSION(libiconv, 2); +static struct lock iconv_lock; + #ifdef notnow /* * iconv converter instance @@ -85,13 +82,18 @@ iconv_mod_unload(void) { struct iconv_cspair *csp; - while ((csp = TAILQ_FIRST(&iconv_cslist)) != NULL) { - if (csp->cp_refcount) + lockmgr(&iconv_lock, LK_EXCLUSIVE); + TAILQ_FOREACH(csp, &iconv_cslist, cp_link) { + if (csp->cp_refcount) { + lockmgr(&iconv_lock, LK_RELEASE); return EBUSY; + } } while ((csp = TAILQ_FIRST(&iconv_cslist)) != NULL) iconv_unregister_cspair(csp); + lockmgr(&iconv_lock, LK_RELEASE); + lockuninit(&iconv_lock); return 0; } @@ -103,6 +105,7 @@ iconv_mod_handler(module_t mod, int type, void *data) switch (type) { case MOD_LOAD: error = 0; + lockinit(&iconv_lock, "iconv", 0, LK_CANRECURSE); break; case MOD_UNLOAD: error = iconv_mod_unload(); @@ -162,8 +165,8 @@ iconv_lookupcs(const char *to, const char *from, struct iconv_cspair **cspp) struct iconv_cspair *csp; TAILQ_FOREACH(csp, &iconv_cslist, cp_link) { - if (strcmp(csp->cp_to, to) == 0 && - strcmp(csp->cp_from, from) == 0) { + if (strcasecmp(csp->cp_to, to) == 0 && + strcasecmp(csp->cp_from, from) == 0) { if (cspp) *cspp = csp; return 0; @@ -297,6 +300,18 @@ iconv_convchr_case(void *handle, const char **inbuf, return ICONV_CONVERTER_CONV(handle, inbuf, inbytesleft, outbuf, outbytesleft, 1, casetype); } +int +towlower(int c, void *handle) +{ + return ICONV_CONVERTER_TOLOWER(handle, c); +} + +int +towupper(int c, void *handle) +{ + return ICONV_CONVERTER_TOUPPER(handle, c); +} + /* * Give a list of loaded converters. Each name terminated with 0. * An empty string terminates the list. @@ -311,6 +326,7 @@ iconv_sysctl_drvlist(SYSCTL_HANDLER_ARGS) error = 0; + lockmgr(&iconv_lock, LK_SHARED); TAILQ_FOREACH(dcp, &iconv_converters, cc_link) { name = ICONV_CONVERTER_NAME(dcp); if (name == NULL) @@ -319,6 +335,7 @@ iconv_sysctl_drvlist(SYSCTL_HANDLER_ARGS) if (error) break; } + lockmgr(&iconv_lock, LK_RELEASE); if (error) return error; spc = 0; @@ -343,6 +360,7 @@ iconv_sysctl_cslist(SYSCTL_HANDLER_ARGS) bzero(&csi, sizeof(csi)); csi.cs_version = ICONV_CSPAIR_INFO_VER; + lockmgr(&iconv_lock, LK_SHARED); TAILQ_FOREACH(csp, &iconv_cslist, cp_link) { csi.cs_id = csp->cp_id; csi.cs_refcount = csp->cp_refcount; @@ -353,12 +371,25 @@ iconv_sysctl_cslist(SYSCTL_HANDLER_ARGS) if (error) break; } + lockmgr(&iconv_lock, LK_RELEASE); return error; } SYSCTL_PROC(_kern_iconv, OID_AUTO, cslist, CTLFLAG_RD | CTLTYPE_OPAQUE, NULL, 0, iconv_sysctl_cslist, "S,xlat", "registered charset pairs"); +int +iconv_add(const char *converter, const char *to, const char *from) +{ + struct iconv_converter_class *dcp; + struct iconv_cspair *csp; + + if (iconv_lookupconv(converter, &dcp) != 0) + return EINVAL; + + return iconv_register_cspair(to, from, dcp, NULL, &csp); +} + /* * Add new charset pair */ @@ -378,22 +409,20 @@ iconv_sysctl_add(SYSCTL_HANDLER_ARGS) return EINVAL; if (din.ia_datalen > ICONV_CSMAXDATALEN) return EINVAL; - - /* - * Make sure all user-supplied strings are terminated before - * proceeding. - * - * XXX return EINVAL if strings are not properly terminated - */ - din.ia_converter[ICONV_CNVNMAXLEN-1] = 0; - din.ia_to[ICONV_CSNMAXLEN-1] = 0; - din.ia_from[ICONV_CSNMAXLEN-1] = 0; - + if (strlen(din.ia_from) >= ICONV_CSNMAXLEN) + return EINVAL; + if (strlen(din.ia_to) >= ICONV_CSNMAXLEN) + return EINVAL; + if (strlen(din.ia_converter) >= ICONV_CNVNMAXLEN) + return EINVAL; if (iconv_lookupconv(din.ia_converter, &dcp) != 0) return EINVAL; + lockmgr(&iconv_lock, LK_EXCLUSIVE); error = iconv_register_cspair(din.ia_to, din.ia_from, dcp, NULL, &csp); - if (error) + if (error) { + lockmgr(&iconv_lock, LK_RELEASE); return error; + } if (din.ia_datalen) { csp->cp_data = kmalloc(din.ia_datalen, M_ICONVDATA, M_WAITOK); error = copyin(din.ia_data, csp->cp_data, din.ia_datalen); @@ -404,10 +433,12 @@ iconv_sysctl_add(SYSCTL_HANDLER_ARGS) error = SYSCTL_OUT(req, &dout, sizeof(dout)); if (error) goto bad; + lockmgr(&iconv_lock, LK_RELEASE); ICDEBUG("%s => %s, %d bytes\n",din.ia_from, din.ia_to, din.ia_datalen); return 0; bad: iconv_unregister_cspair(csp); + lockmgr(&iconv_lock, LK_RELEASE); return error; } @@ -429,6 +460,12 @@ iconv_converter_donestub(struct iconv_converter_class *dp) return 0; } +int +iconv_converter_tolowerstub(int c, void *handle) +{ + return (c); +} + int iconv_converter_handler(module_t mod, int type, void *data) { @@ -437,16 +474,22 @@ iconv_converter_handler(module_t mod, int type, void *data) switch (type) { case MOD_LOAD: + lockmgr(&iconv_lock, LK_EXCLUSIVE); error = iconv_register_converter(dcp); - if (error) + if (error) { + lockmgr(&iconv_lock, LK_RELEASE); break; + } error = ICONV_CONVERTER_INIT(dcp); if (error) iconv_unregister_converter(dcp); + lockmgr(&iconv_lock, LK_RELEASE); break; case MOD_UNLOAD: + lockmgr(&iconv_lock, LK_EXCLUSIVE); ICONV_CONVERTER_DONE(dcp); error = iconv_unregister_converter(dcp); + lockmgr(&iconv_lock, LK_RELEASE); break; default: error = EINVAL; @@ -461,8 +504,8 @@ char * iconv_convstr(void *handle, char *dst, const char *src) { char *p = dst; - int error; size_t inlen, outlen; + int error; if (handle == NULL) { strcpy(dst, src); @@ -507,9 +550,7 @@ int iconv_lookupcp(char **cpp, const char *s) { if (cpp == NULL) { - ICDEBUG("warning a NULL list passed\n", ""); /* XXX ISO variadic macros cannot - leave out the - variadic args */ + ICDEBUG("warning a NULL list passed\n", ""); return ENOENT; } for (; *cpp; cpp++) diff --git a/sys/libiconv/iconv_converter_if.m b/sys/libiconv/iconv_converter_if.m index 8ae9bb3e6c..d785f8f2f7 100644 --- a/sys/libiconv/iconv_converter_if.m +++ b/sys/libiconv/iconv_converter_if.m @@ -1,5 +1,5 @@ -# -# Copyright (c) 2000-2001, Boris Popov +#- +# Copyright (c) 2000-2001 Boris Popov # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -10,12 +10,6 @@ # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. -# 3. All advertising materials mentioning features or use of this software -# must display the following acknowledgement: -# This product includes software developed by Boris Popov. -# 4. Neither the name of the author nor the names of any co-contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE @@ -29,8 +23,7 @@ # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # -# $FreeBSD: src/sys/libkern/iconv_converter_if.m,v 1.3.20.1 2009/04/15 03:14:26 kensmith Exp $ -# $DragonFly: src/sys/libiconv/iconv_converter_if.m,v 1.3 2004/03/18 18:27:47 dillon Exp $ +# $FreeBSD: head/sys/libkern/iconv_converter_if.m 206361 2010-04-07 16:50:38Z joel $ # #include @@ -62,10 +55,20 @@ STATICMETHOD int init { struct iconv_converter_class *dcp; } DEFAULT iconv_converter_initstub; -STATICMETHOD void done { +STATICMETHOD int done { struct iconv_converter_class *dcp; } DEFAULT iconv_converter_donestub; STATICMETHOD const char * name { struct iconv_converter_class *dcp; }; + +METHOD int tolower { + void *handle; + int c; +} DEFAULT iconv_converter_tolowerstub; + +METHOD int toupper { + void *handle; + int c; +} DEFAULT iconv_converter_tolowerstub; diff --git a/sys/libiconv/iconv_ucs.c b/sys/libiconv/iconv_ucs.c new file mode 100644 index 0000000000..a0294f768c --- /dev/null +++ b/sys/libiconv/iconv_ucs.c @@ -0,0 +1,537 @@ +/*- + * Copyright (c) 2003, 2005 Ryuichiro Imura + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: head/sys/libkern/iconv_ucs.c 267291 2014-06-09 19:27:47Z jhb $ + */ + +#include +#include +#include +#include +#include + +#include "iconv_converter_if.h" + +/* + * "UCS" converter + */ + +#define KICONV_UCS_COMBINE 0x1 +#define KICONV_UCS_FROM_UTF8 0x2 +#define KICONV_UCS_TO_UTF8 0x4 +#define KICONV_UCS_FROM_LE 0x8 +#define KICONV_UCS_TO_LE 0x10 +#define KICONV_UCS_FROM_UTF16 0x20 +#define KICONV_UCS_TO_UTF16 0x40 +#define KICONV_UCS_UCS4 0x80 + +#define ENCODING_UTF16 "UTF-16BE" +#define ENCODING_UTF8 "UTF-8" + +static struct { + const char *name; + int from_flag, to_flag; +} unicode_family[] = { + { "UTF-8", KICONV_UCS_FROM_UTF8, KICONV_UCS_TO_UTF8 }, + { "UCS-2LE", KICONV_UCS_FROM_LE, KICONV_UCS_TO_LE }, + { "UTF-16BE", KICONV_UCS_FROM_UTF16, KICONV_UCS_TO_UTF16 }, + { "UTF-16LE", KICONV_UCS_FROM_UTF16|KICONV_UCS_FROM_LE, + KICONV_UCS_TO_UTF16|KICONV_UCS_TO_LE }, + { NULL, 0, 0 } +}; + +static uint32_t utf8_to_ucs4(const char *src, size_t *utf8width, size_t srclen); +static u_char *ucs4_to_utf8(uint32_t ucs4, char * dst, size_t *utf8width, size_t dstlen); +static uint32_t encode_surrogate(uint32_t code); +static uint32_t decode_surrogate(const u_char *ucs); + +#ifdef MODULE_DEPEND +MODULE_DEPEND(iconv_ucs, libiconv, 2, 2, 2); +#endif + +/* + * UCS converter instance + */ +struct iconv_ucs { + KOBJ_FIELDS; + int convtype; + struct iconv_cspair * d_csp; + struct iconv_cspair * d_cspf; + void * f_ctp; + void * t_ctp; + void * ctype; +}; + +static int +iconv_ucs_open(struct iconv_converter_class *dcp, + struct iconv_cspair *csp, struct iconv_cspair *cspf, void **dpp) +{ + struct iconv_ucs *dp; + int i; + const char *from, *to; + + dp = (struct iconv_ucs *)kobj_create((struct kobj_class*)dcp, M_ICONV, M_WAITOK); + to = csp->cp_to; + from = cspf ? cspf->cp_from : csp->cp_from; + + dp->convtype = 0; + + if (cspf) + dp->convtype |= KICONV_UCS_COMBINE; + for (i = 0; unicode_family[i].name; i++) { + if (strcasecmp(from, unicode_family[i].name) == 0) + dp->convtype |= unicode_family[i].from_flag; + if (strcasecmp(to, unicode_family[i].name) == 0) + dp->convtype |= unicode_family[i].to_flag; + } + if (strcmp(ENCODING_UNICODE, ENCODING_UTF16) == 0) + dp->convtype |= KICONV_UCS_UCS4; + else + dp->convtype &= ~KICONV_UCS_UCS4; + + dp->f_ctp = dp->t_ctp = NULL; + if (dp->convtype & KICONV_UCS_COMBINE) { + if ((dp->convtype & KICONV_UCS_FROM_UTF8) == 0 && + (dp->convtype & KICONV_UCS_FROM_LE) == 0) { + iconv_open(ENCODING_UNICODE, from, &dp->f_ctp); + } + if ((dp->convtype & KICONV_UCS_TO_UTF8) == 0 && + (dp->convtype & KICONV_UCS_TO_LE) == 0) { + iconv_open(to, ENCODING_UNICODE, &dp->t_ctp); + } + } + + dp->ctype = NULL; + if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_TO_UTF8)) + iconv_open(KICONV_WCTYPE_NAME, ENCODING_UTF8, &dp->ctype); + + dp->d_csp = csp; + if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_FROM_LE)) { + if (cspf) { + dp->d_cspf = cspf; + cspf->cp_refcount++; + } else + csp->cp_refcount++; + } + if (dp->convtype & (KICONV_UCS_TO_UTF8 | KICONV_UCS_TO_LE)) + csp->cp_refcount++; + *dpp = (void*)dp; + return 0; +} + +static int +iconv_ucs_close(void *data) +{ + struct iconv_ucs *dp = data; + + if (dp->f_ctp) + iconv_close(dp->f_ctp); + if (dp->t_ctp) + iconv_close(dp->t_ctp); + if (dp->ctype) + iconv_close(dp->ctype); + if (dp->d_cspf) + dp->d_cspf->cp_refcount--; + else if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_FROM_LE)) + dp->d_csp->cp_refcount--; + if (dp->convtype & (KICONV_UCS_TO_UTF8 | KICONV_UCS_TO_LE)) + dp->d_csp->cp_refcount--; + kobj_delete((struct kobj*)data, M_ICONV); + return 0; +} + +static int +iconv_ucs_conv(void *d2p, const char **inbuf, + size_t *inbytesleft, char **outbuf, size_t *outbytesleft, + int convchar, int casetype) +{ + struct iconv_ucs *dp = (struct iconv_ucs*)d2p; + int ret = 0, i; + size_t in, on, ir, or, inlen, outlen, ucslen; + const char *src, *p; + char *dst; + u_char ucs[4], *q; + uint32_t code; + + if (inbuf == NULL || *inbuf == NULL || outbuf == NULL || *outbuf == NULL) + return 0; + ir = in = *inbytesleft; + or = on = *outbytesleft; + src = *inbuf; + dst = *outbuf; + + while (ir > 0 && or > 0) { + + /* + * The first half of conversion. + * (convert any code into ENCODING_UNICODE) + */ + code = 0; + p = src; + if (dp->convtype & KICONV_UCS_FROM_UTF8) { + /* convert UTF-8 to ENCODING_UNICODE */ + inlen = 0; + code = utf8_to_ucs4(p, &inlen, ir); + if (code == 0) { + ret = -1; + break; + } + + if (casetype == KICONV_FROM_LOWER && dp->ctype) { + code = towlower(code, dp->ctype); + } else if (casetype == KICONV_FROM_UPPER && dp->ctype) { + code = towupper(code, dp->ctype); + } + + if ((code >= 0xd800 && code < 0xe000) || code >= 0x110000 ) { + /* reserved for utf-16 surrogate pair */ + /* invalid unicode */ + ret = -1; + break; + } + + if (inlen == 4) { + if (dp->convtype & KICONV_UCS_UCS4) { + ucslen = 4; + code = encode_surrogate(code); + } else { + /* can't handle with ucs-2 */ + ret = -1; + break; + } + } else { + ucslen = 2; + } + + /* save UCS-4 into ucs[] */ + for (q = ucs, i = ucslen - 1 ; i >= 0 ; i--) + *q++ = (code >> (i << 3)) & 0xff; + + } else if (dp->convtype & KICONV_UCS_COMBINE && dp->f_ctp) { + /* convert local code to ENCODING_UNICODE */ + ucslen = 4; + inlen = ir; + q = ucs; + ret = iconv_convchr_case(dp->f_ctp, &p, &inlen, (char **)&q, + &ucslen, casetype & (KICONV_FROM_LOWER | KICONV_FROM_UPPER)); + if (ret) + break; + inlen = ir - inlen; + ucslen = 4 - ucslen; + + } else { + /* src code is a proper subset of ENCODING_UNICODE */ + q = ucs; + if (dp->convtype & KICONV_UCS_FROM_LE) { + *q = *(p + 1); + *(q + 1) = *p; + p += 2; + } else { + *q = *p++; + *(q + 1) = *p++; + } + if ((*q & 0xfc) == 0xd8) { + if (dp->convtype & KICONV_UCS_UCS4 && + dp->convtype & KICONV_UCS_FROM_UTF16) { + inlen = ucslen = 4; + } else { + /* invalid unicode */ + ret = -1; + break; + } + } else { + inlen = ucslen = 2; + } + if (ir < inlen) { + ret = -1; + break; + } + if (ucslen == 4) { + q += 2; + if (dp->convtype & KICONV_UCS_FROM_LE) { + *q = *(p + 1); + *(q + 1) = *p; + } else { + *q = *p++; + *(q + 1) = *p; + } + if ((*q & 0xfc) != 0xdc) { + /* invalid unicode */ + ret = -1; + break; + } + } + } + + /* + * The second half of conversion. + * (convert ENCODING_UNICODE into any code) + */ + p = ucs; + if (dp->convtype & KICONV_UCS_TO_UTF8) { + q = (u_char *)dst; + if (ucslen == 4 && dp->convtype & KICONV_UCS_UCS4) { + /* decode surrogate pair */ + code = decode_surrogate(p); + } else { + code = (ucs[0] << 8) | ucs[1]; + } + + if (casetype == KICONV_LOWER && dp->ctype) { + code = towlower(code, dp->ctype); + } else if (casetype == KICONV_UPPER && dp->ctype) { + code = towupper(code, dp->ctype); + } + + outlen = 0; + if (ucs4_to_utf8(code, q, &outlen, or) == NULL) { + ret = -1; + break; + } + + src += inlen; + ir -= inlen; + dst += outlen; + or -= outlen; + + } else if (dp->convtype & KICONV_UCS_COMBINE && dp->t_ctp) { + ret = iconv_convchr_case(dp->t_ctp, &p, &ucslen, &dst, + &or, casetype & (KICONV_LOWER | KICONV_UPPER)); + if (ret) + break; + + src += inlen; + ir -= inlen; + + } else { + /* dst code is a proper subset of ENCODING_UNICODE */ + if (or < ucslen) { + ret = -1; + break; + } + src += inlen; + ir -= inlen; + or -= ucslen; + if (dp->convtype & KICONV_UCS_TO_LE) { + *dst++ = *(p + 1); + *dst++ = *p; + p += 2; + } else { + *dst++ = *p++; + *dst++ = *p++; + } + if (ucslen == 4) { + if ((dp->convtype & KICONV_UCS_UCS4) == 0 || + (dp->convtype & KICONV_UCS_TO_UTF16) == 0) { + ret = -1; + break; + } + if (dp->convtype & KICONV_UCS_TO_LE) { + *dst++ = *(p + 1); + *dst++ = *p; + } else { + *dst++ = *p++; + *dst++ = *p; + } + } + } + + if (convchar == 1) + break; + } + + *inbuf += in - ir; + *outbuf += on - or; + *inbytesleft -= in - ir; + *outbytesleft -= on - or; + return (ret); +} + +static int +iconv_ucs_init(struct iconv_converter_class *dcp) +{ + int error; + + error = iconv_add(ENCODING_UNICODE, ENCODING_UNICODE, ENCODING_UTF8); + if (error) + return (error); + error = iconv_add(ENCODING_UNICODE, ENCODING_UTF8, ENCODING_UNICODE); + if (error) + return (error); + return (0); +} + +static int +iconv_ucs_done(struct iconv_converter_class *dcp) +{ + return (0); +} + +static const char * +iconv_ucs_name(struct iconv_converter_class *dcp) +{ + return (ENCODING_UNICODE); +} + +static kobj_method_t iconv_ucs_methods[] = { + KOBJMETHOD(iconv_converter_open, iconv_ucs_open), + KOBJMETHOD(iconv_converter_close, iconv_ucs_close), + KOBJMETHOD(iconv_converter_conv, iconv_ucs_conv), + KOBJMETHOD(iconv_converter_init, iconv_ucs_init), + KOBJMETHOD(iconv_converter_done, iconv_ucs_done), + KOBJMETHOD(iconv_converter_name, iconv_ucs_name), + KOBJMETHOD_END +}; + +KICONV_CONVERTER(ucs, sizeof(struct iconv_ucs)); + +static uint32_t +utf8_to_ucs4(const char *src, size_t *utf8width, size_t srclen) +{ + size_t i, w = 0; + uint32_t ucs4 = 0; + + /* + * get leading 1 byte from utf-8 + */ + if ((*src & 0x80) == 0) { + /* + * leading 1 bit is "0" + * utf-8: 0xxxxxxx + * ucs-4: 00000000 00000000 00000000 0xxxxxxx + */ + w = 1; + /* get trailing 7 bits */ + ucs4 = *src & 0x7f; + } else if ((*src & 0xe0) == 0xc0) { + /* + * leading 3 bits are "110" + * utf-8: 110xxxxx 10yyyyyy + * ucs-4: 00000000 00000000 00000xxx xxyyyyyy + */ + w = 2; + /* get trailing 5 bits */ + ucs4 = *src & 0x1f; + } else if ((*src & 0xf0) == 0xe0) { + /* + * leading 4 bits are "1110" + * utf-8: 1110xxxx 10yyyyyy 10zzzzzz + * ucs-4: 00000000 00000000 xxxxyyyy yyzzzzzz + */ + w = 3; + /* get trailing 4 bits */ + ucs4 = *src & 0x0f; + } else if ((*src & 0xf8) == 0xf0) { + /* + * leading 5 bits are "11110" + * utf-8: 11110www 10xxxxxx 10yyyyyy 10zzzzzz + * ucs-4: 00000000 000wwwxx xxxxyyyy yyzzzzzz + */ + w = 4; + /* get trailing 3 bits */ + ucs4 = *src & 0x07; + } else { + /* out of utf-16 range or having illegal bits */ + return (0); + } + + if (srclen < w) + return (0); + + /* + * get left parts from utf-8 + */ + for (i = 1 ; i < w ; i++) { + if ((*(src + i) & 0xc0) != 0x80) { + /* invalid: leading 2 bits are not "10" */ + return (0); + } + /* concatenate trailing 6 bits into ucs4 */ + ucs4 <<= 6; + ucs4 |= *(src + i) & 0x3f; + } + + *utf8width = w; + return (ucs4); +} + +static u_char * +ucs4_to_utf8(uint32_t ucs4, char *dst, size_t *utf8width, size_t dstlen) +{ + u_char lead, *p; + size_t i, w; + + /* + * determine utf-8 width and leading bits + */ + if (ucs4 < 0x80) { + w = 1; + lead = 0; /* "0" */ + } else if (ucs4 < 0x800) { + w = 2; + lead = 0xc0; /* "11" */ + } else if (ucs4 < 0x10000) { + w = 3; + lead = 0xe0; /* "111" */ + } else if (ucs4 < 0x200000) { + w = 4; + lead = 0xf0; /* "1111" */ + } else { + return (NULL); + } + + if (dstlen < w) + return (NULL); + + /* + * construct utf-8 + */ + p = dst; + for (i = w - 1 ; i >= 1 ; i--) { + /* get trailing 6 bits and put it with leading bit as "1" */ + *(p + i) = (ucs4 & 0x3f) | 0x80; + ucs4 >>= 6; + } + *p = ucs4 | lead; + + *utf8width = w; + + return (p); +} + +static uint32_t +encode_surrogate(register uint32_t code) +{ + return ((((code - 0x10000) << 6) & 0x3ff0000) | + ((code - 0x10000) & 0x3ff) | 0xd800dc00); +} + +static uint32_t +decode_surrogate(register const u_char *ucs) +{ + return ((((ucs[0] & 0x3) << 18) | (ucs[1] << 10) | + ((ucs[2] & 0x3) << 8) | ucs[3]) + 0x10000); +} + diff --git a/sys/libiconv/iconv_xlat.c b/sys/libiconv/iconv_xlat.c index 94a0115f92..17469eb62a 100644 --- a/sys/libiconv/iconv_xlat.c +++ b/sys/libiconv/iconv_xlat.c @@ -1,5 +1,5 @@ -/* - * Copyright (c) 2000-2001, Boris Popov +/*- + * Copyright (c) 2000-2001 Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -10,12 +10,6 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Boris Popov. - * 4. Neither the name of the author nor the names of any co-contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE @@ -29,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/libkern/iconv_xlat.c,v 1.5.30.1 2009/04/15 03:14:26 kensmith Exp $ + * $FreeBSD: head/sys/libkern/iconv_xlat.c 206361 2010-04-07 16:50:38Z joel $ */ #include diff --git a/sys/libiconv/iconv_xlat16.c b/sys/libiconv/iconv_xlat16.c index 0c9cd4df56..59ec415b3a 100644 --- a/sys/libiconv/iconv_xlat16.c +++ b/sys/libiconv/iconv_xlat16.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2003, Ryuichiro Imura + * Copyright (c) 2003, 2005 Ryuichiro Imura * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/libkern/iconv_xlat16.c,v 1.3.20.1 2009/04/15 03:14:26 kensmith Exp $ + * $FreeBSD: head/sys/libkern/iconv_xlat16.c 194638 2009-06-22 17:09:46Z delphij $ */ #include @@ -42,12 +42,17 @@ MODULE_DEPEND(iconv_xlat16, libiconv, 2, 2, 2); #endif +#define C2I1(c) ((c) & 0x8000 ? ((c) & 0xff) | 0x100 : (c) & 0xff) +#define C2I2(c) ((c) & 0x8000 ? ((c) >> 8) & 0x7f : ((c) >> 8) & 0xff) + /* * XLAT16 converter instance */ struct iconv_xlat16 { KOBJ_FIELDS; uint32_t * d_table[0x200]; + void * f_ctp; + void * t_ctp; struct iconv_cspair * d_csp; }; @@ -71,6 +76,16 @@ iconv_xlat16_open(struct iconv_converter_class *dcp, } idxp++; } + + if (strcmp(csp->cp_to, KICONV_WCTYPE_NAME) != 0) { + if (iconv_open(KICONV_WCTYPE_NAME, csp->cp_from, &dp->f_ctp) != 0) + dp->f_ctp = NULL; + if (iconv_open(KICONV_WCTYPE_NAME, csp->cp_to, &dp->t_ctp) != 0) + dp->t_ctp = NULL; + } else { + dp->f_ctp = dp->t_ctp = dp; + } + dp->d_csp = csp; csp->cp_refcount++; *dpp = (void*)dp; @@ -82,6 +97,10 @@ iconv_xlat16_close(void *data) { struct iconv_xlat16 *dp = data; + if (dp->f_ctp && dp->f_ctp != data) + iconv_close(dp->f_ctp); + if (dp->t_ctp && dp->t_ctp != data) + iconv_close(dp->t_ctp); dp->d_csp->cp_refcount--; kobj_delete((struct kobj*)data, M_ICONV); return (0); @@ -99,7 +118,7 @@ iconv_xlat16_conv(void *d2p, const char **inbuf, size_t in, on, ir, or, inlen; uint32_t code; u_char u, l; - uint16_t c1, c2; + uint16_t c1, c2, ctmp; if (inbuf == NULL || *inbuf == NULL || outbuf == NULL || *outbuf == NULL) return (0); @@ -111,21 +130,32 @@ iconv_xlat16_conv(void *d2p, const char **inbuf, while(ir > 0 && or > 0) { inlen = 0; - code = '\0'; + code = 0; c1 = ir > 1 ? *(src+1) & 0xff : 0; c2 = *src & 0xff; + ctmp = 0; c1 = c2 & 0x80 ? c1 | 0x100 : c1; c2 = c2 & 0x80 ? c2 & 0x7f : c2; - if (ir > 1 && dp->d_table[c1]) { + if (ir > 1 && dp->d_table[c1] && dp->d_table[c1][c2]) { /* * inbuf char is a double byte char */ - code = dp->d_table[c1][c2]; - if (code) - inlen = 2; + inlen = 2; + + /* toupper,tolower */ + if (casetype == KICONV_FROM_LOWER && dp->f_ctp) + ctmp = towlower(((u_char)*src << 8) | (u_char)*(src + 1), + dp->f_ctp); + else if (casetype == KICONV_FROM_UPPER && dp->f_ctp) + ctmp = towupper(((u_char)*src << 8) | (u_char)*(src + 1), + dp->f_ctp); + if (ctmp) { + c1 = C2I1(ctmp); + c2 = C2I2(ctmp); + } } if (inlen == 0) { @@ -138,11 +168,31 @@ iconv_xlat16_conv(void *d2p, const char **inbuf, * inbuf char is a single byte char */ inlen = 1; - code = dp->d_table[c1][c2]; - if (!code) { - ret = -1; - break; + + if (casetype & (KICONV_FROM_LOWER|KICONV_FROM_UPPER)) + code = dp->d_table[c1][c2]; + + if (casetype == KICONV_FROM_LOWER) { + if (dp->f_ctp) + ctmp = towlower((u_char)*src, dp->f_ctp); + else if (code & XLAT16_HAS_FROM_LOWER_CASE) + ctmp = (u_char)(code >> 16); + } else if (casetype == KICONV_FROM_UPPER) { + if (dp->f_ctp) + ctmp = towupper((u_char)*src, dp->f_ctp); + else if (code & XLAT16_HAS_FROM_UPPER_CASE) + ctmp = (u_char)(code >> 16); } + if (ctmp) { + c1 = C2I1(ctmp << 8); + c2 = C2I2(ctmp << 8); + } + } + + code = dp->d_table[c1][c2]; + if (!code) { + ret = -1; + break; } nullin = (code & XLAT16_ACCEPT_NULL_IN) ? 1 : 0; @@ -157,14 +207,6 @@ iconv_xlat16_conv(void *d2p, const char **inbuf, /* * now start translation */ - if ((casetype == KICONV_FROM_LOWER && code & XLAT16_HAS_FROM_LOWER_CASE) || - (casetype == KICONV_FROM_UPPER && code & XLAT16_HAS_FROM_UPPER_CASE)) { - c2 = (u_char)(code >> 16); - c1 = c2 & 0x80 ? 0x100 : 0; - c2 = c2 & 0x80 ? c2 & 0x7f : c2; - code = dp->d_table[c1][c2]; - } - u = (u_char)(code >> 8); l = (u_char)code; @@ -185,15 +227,38 @@ iconv_xlat16_conv(void *d2p, const char **inbuf, ret = -1; break; } + + /* toupper,tolower */ + if (casetype == KICONV_LOWER && dp->t_ctp) { + code = towlower((uint16_t)code, dp->t_ctp); + u = (u_char)(code >> 8); + l = (u_char)code; + } + if (casetype == KICONV_UPPER && dp->t_ctp) { + code = towupper((uint16_t)code, dp->t_ctp); + u = (u_char)(code >> 8); + l = (u_char)code; + } + *dst++ = u; *dst++ = l; or -= 2; } else { - if ((casetype == KICONV_LOWER && code & XLAT16_HAS_LOWER_CASE) || - (casetype == KICONV_UPPER && code & XLAT16_HAS_UPPER_CASE)) - *dst++ = (u_char)(code >> 16); - else - *dst++ = l; + /* toupper,tolower */ + if (casetype == KICONV_LOWER) { + if (dp->t_ctp) + l = (u_char)towlower(l, dp->t_ctp); + else if (code & XLAT16_HAS_LOWER_CASE) + l = (u_char)(code >> 16); + } + if (casetype == KICONV_UPPER) { + if (dp->t_ctp) + l = (u_char)towupper(l, dp->t_ctp); + else if (code & XLAT16_HAS_UPPER_CASE) + l = (u_char)(code >> 16); + } + + *dst++ = l; or--; } @@ -231,6 +296,55 @@ iconv_xlat16_name(struct iconv_converter_class *dcp) return ("xlat16"); } +static int +iconv_xlat16_tolower(void *d2p, int c) +{ + struct iconv_xlat16 *dp = (struct iconv_xlat16*)d2p; + int c1, c2, out; + + if (c < 0x100) { + c1 = C2I1(c << 8); + c2 = C2I2(c << 8); + } else if (c < 0x10000) { + c1 = C2I1(c); + c2 = C2I2(c); + } else + return (c); + + if (dp->d_table[c1] && dp->d_table[c1][c2] & XLAT16_HAS_LOWER_CASE) { + /*return (int)(dp->d_table[c1][c2] & 0xffff);*/ + out = dp->d_table[c1][c2] & 0xffff; + if ((out & 0xff) == 0) + out = (out >> 8) & 0xff; + return (out); + } else + return (c); +} + +static int +iconv_xlat16_toupper(void *d2p, int c) +{ + struct iconv_xlat16 *dp = (struct iconv_xlat16*)d2p; + int c1, c2, out; + + if (c < 0x100) { + c1 = C2I1(c << 8); + c2 = C2I2(c << 8); + } else if (c < 0x10000) { + c1 = C2I1(c); + c2 = C2I2(c); + } else + return (c); + + if (dp->d_table[c1] && dp->d_table[c1][c2] & XLAT16_HAS_UPPER_CASE) { + out = dp->d_table[c1][c2] & 0xffff; + if ((out & 0xff) == 0) + out = (out >> 8) & 0xff; + return (out); + } else + return (c); +} + static kobj_method_t iconv_xlat16_methods[] = { KOBJMETHOD(iconv_converter_open, iconv_xlat16_open), KOBJMETHOD(iconv_converter_close, iconv_xlat16_close), @@ -240,6 +354,8 @@ static kobj_method_t iconv_xlat16_methods[] = { KOBJMETHOD(iconv_converter_done, iconv_xlat16_done), #endif KOBJMETHOD(iconv_converter_name, iconv_xlat16_name), + KOBJMETHOD(iconv_converter_tolower, iconv_xlat16_tolower), + KOBJMETHOD(iconv_converter_toupper, iconv_xlat16_toupper), KOBJMETHOD_END };