From efd03c68b8b393fcfa26da9cfb9c86af19b25f14 Mon Sep 17 00:00:00 2001 From: Alexander Polakov Date: Fri, 5 Jun 2009 11:59:03 +0400 Subject: [PATCH] Stage 1/5: Update libiconv to support multibyte encodings. Code from FreeBSD, ported by Alexander Polakov, fixed by Simon Schubert. --- sys/libiconv/Makefile | 2 +- sys/libiconv/iconv.c | 64 ++++++-- sys/libiconv/iconv_converter_if.m | 4 +- sys/libiconv/iconv_xlat.c | 17 ++- sys/libiconv/iconv_xlat16.c | 246 ++++++++++++++++++++++++++++++ sys/sys/iconv.h | 104 ++++++++++++- 6 files changed, 414 insertions(+), 23 deletions(-) create mode 100644 sys/libiconv/iconv_xlat16.c diff --git a/sys/libiconv/Makefile b/sys/libiconv/Makefile index 66ba1f0c49..52e5d9ac9f 100644 --- a/sys/libiconv/Makefile +++ b/sys/libiconv/Makefile @@ -4,7 +4,7 @@ .PATH: ${.CURDIR}/../libkern ${.CURDIR}/../sys KMOD= libiconv -SRCS= iconv.c iconv_xlat.c +SRCS= iconv.c iconv_xlat.c iconv_xlat16.c SRCS+= iconv.h SRCS+= iconv_converter_if.c iconv_converter_if.h MFILES= libiconv/iconv_converter_if.m diff --git a/sys/libiconv/iconv.c b/sys/libiconv/iconv.c index d733590166..b3b1e1e6f0 100644 --- a/sys/libiconv/iconv.c +++ b/sys/libiconv/iconv.c @@ -29,25 +29,27 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/libkern/iconv.c,v 1.1.2.1 2001/05/21 08:28:07 bp Exp $ + * $FreeBSD: src/sys/libkern/iconv.c,v 1.12.2.1.2.1 2009/04/15 03:14:26 kensmith Exp $ * $DragonFly: src/sys/libiconv/iconv.c,v 1.8 2008/01/05 14:02:38 swildner Exp $ */ + #include #include #include #include #include - +#include +#include #include "iconv_converter_if.h" SYSCTL_DECL(_kern_iconv); SYSCTL_NODE(_kern, OID_AUTO, iconv, CTLFLAG_RW, NULL, "kernel iconv interface"); -MALLOC_DEFINE(M_ICONV, "ICONV", "ICONV structures"); -MALLOC_DEFINE(M_ICONVDATA, "ICONV data", "ICONV data"); +MALLOC_DEFINE(M_ICONV, "iconv", "ICONV structures"); +MALLOC_DEFINE(M_ICONVDATA, "iconv_data", "ICONV data"); -MODULE_VERSION(libiconv, 1); +MODULE_VERSION(libiconv, 2); #ifdef notnow /* @@ -86,8 +88,10 @@ iconv_mod_unload(void) while ((csp = TAILQ_FIRST(&iconv_cslist)) != NULL) { if (csp->cp_refcount) return EBUSY; - iconv_unregister_cspair(csp); } + + while ((csp = TAILQ_FIRST(&iconv_cslist)) != NULL) + iconv_unregister_cspair(csp); return 0; } @@ -269,7 +273,28 @@ int iconv_conv(void *handle, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { - return ICONV_CONVERTER_CONV(handle, inbuf, inbytesleft, outbuf, outbytesleft); + return ICONV_CONVERTER_CONV(handle, inbuf, inbytesleft, outbuf, outbytesleft, 0, 0); +} + +int +iconv_conv_case(void *handle, const char **inbuf, + size_t *inbytesleft, char **outbuf, size_t *outbytesleft, int casetype) +{ + return ICONV_CONVERTER_CONV(handle, inbuf, inbytesleft, outbuf, outbytesleft, 0, casetype); +} + +int +iconv_convchr(void *handle, const char **inbuf, + size_t *inbytesleft, char **outbuf, size_t *outbytesleft) +{ + return ICONV_CONVERTER_CONV(handle, inbuf, inbytesleft, outbuf, outbytesleft, 1, 0); +} + +int +iconv_convchr_case(void *handle, const char **inbuf, + size_t *inbytesleft, char **outbuf, size_t *outbytesleft, int casetype) +{ + return ICONV_CONVERTER_CONV(handle, inbuf, inbytesleft, outbuf, outbytesleft, 1, casetype); } /* @@ -357,6 +382,8 @@ iconv_sysctl_add(SYSCTL_HANDLER_ARGS) /* * Make sure all user-supplied strings are terminated before * proceeding. + * + * XXX return EINVAL if strings are not properly terminated */ din.ia_converter[ICONV_CNVNMAXLEN-1] = 0; din.ia_to[ICONV_CSNMAXLEN-1] = 0; @@ -377,6 +404,7 @@ iconv_sysctl_add(SYSCTL_HANDLER_ARGS) error = SYSCTL_OUT(req, &dout, sizeof(dout)); if (error) goto bad; + ICDEBUG("%s => %s, %d bytes\n",din.ia_from, din.ia_to, din.ia_datalen); return 0; bad: iconv_unregister_cspair(csp); @@ -427,7 +455,7 @@ iconv_converter_handler(module_t mod, int type, void *data) } /* - * Common used functions + * Common used functions (don't use with unicode) */ char * iconv_convstr(void *handle, char *dst, const char *src) @@ -479,7 +507,9 @@ int iconv_lookupcp(char **cpp, const char *s) { if (cpp == NULL) { - ICDEBUG("warning a NULL list passed\n"); + ICDEBUG("warning a NULL list passed\n", ""); /* XXX ISO variadic macros cannot + leave out the + variadic args */ return ENOENT; } for (; *cpp; cpp++) @@ -487,3 +517,19 @@ iconv_lookupcp(char **cpp, const char *s) return 0; return ENOENT; } + +#if 0 +/* + * Return if fsname is in use of not + */ +int +iconv_vfs_refcount(const char *fsname) +{ + struct vfsconf *vfsp; + + getvfsbyname(fsname, vfsp); + if (vfsp != NULL && vfsp->vfc_refcount > 0) + return (EBUSY); + return (0); +} +#endif diff --git a/sys/libiconv/iconv_converter_if.m b/sys/libiconv/iconv_converter_if.m index 0fc94f1cd9..8ae9bb3e6c 100644 --- a/sys/libiconv/iconv_converter_if.m +++ b/sys/libiconv/iconv_converter_if.m @@ -29,7 +29,7 @@ # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # -# $FreeBSD: src/sys/libkern/iconv_converter_if.m,v 1.1.2.1 2001/05/21 08:28:07 bp Exp $ +# $FreeBSD: src/sys/libkern/iconv_converter_if.m,v 1.3.20.1 2009/04/15 03:14:26 kensmith Exp $ # $DragonFly: src/sys/libiconv/iconv_converter_if.m,v 1.3 2004/03/18 18:27:47 dillon Exp $ # @@ -54,6 +54,8 @@ METHOD int conv { size_t *inbytesleft; char **outbuf; size_t *outbytesleft; + int convchar; + int casetype; }; STATICMETHOD int init { diff --git a/sys/libiconv/iconv_xlat.c b/sys/libiconv/iconv_xlat.c index ffa8d0ece3..9d4dd994b5 100644 --- a/sys/libiconv/iconv_xlat.c +++ b/sys/libiconv/iconv_xlat.c @@ -29,9 +29,10 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/libkern/iconv_xlat.c,v 1.1.2.1 2001/05/21 08:28:07 bp Exp $ + * $FreeBSD: src/sys/libkern/iconv_xlat.c,v 1.5.30.1 2009/04/15 03:14:26 kensmith Exp $ * $DragonFly: src/sys/libiconv/iconv_xlat.c,v 1.3 2004/03/18 18:27:47 dillon Exp $ */ + #include #include #include @@ -45,7 +46,7 @@ */ #ifdef MODULE_DEPEND -MODULE_DEPEND(iconv_xlat, libiconv, 1, 1, 1); +MODULE_DEPEND(iconv_xlat, libiconv, 2, 2, 2); #endif /* @@ -83,7 +84,8 @@ iconv_xlat_close(void *data) static int iconv_xlat_conv(void *d2p, const char **inbuf, - size_t *inbytesleft, char **outbuf, size_t *outbytesleft) + size_t *inbytesleft, char **outbuf, size_t *outbytesleft, + int convchar, int casetype) { struct iconv_xlat *dp = (struct iconv_xlat*)d2p; const char *src; @@ -92,14 +94,19 @@ iconv_xlat_conv(void *d2p, const char **inbuf, if (inbuf == NULL || *inbuf == NULL || outbuf == NULL || *outbuf == NULL) return 0; - r = n = min(*inbytesleft, *outbytesleft); + if (casetype != 0) + return -1; + if (convchar == 1) + r = n = 1; + else + r = n = min(*inbytesleft, *outbytesleft); src = *inbuf; dst = *outbuf; while(r--) *dst++ = dp->d_table[(u_char)*src++]; *inbuf += n; *outbuf += n; - *inbytesleft += n; + *inbytesleft -= n; *outbytesleft -= n; return 0; } diff --git a/sys/libiconv/iconv_xlat16.c b/sys/libiconv/iconv_xlat16.c new file mode 100644 index 0000000000..aec7fd96bd --- /dev/null +++ b/sys/libiconv/iconv_xlat16.c @@ -0,0 +1,246 @@ +/*- + * Copyright (c) 2003, Ryuichiro Imura + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/libkern/iconv_xlat16.c,v 1.3.20.1 2009/04/15 03:14:26 kensmith Exp $ + */ + +#include +#include +#include +#include +#include + +#include "iconv_converter_if.h" + +/* + * "XLAT16" converter + */ + +#ifdef MODULE_DEPEND +MODULE_DEPEND(iconv_xlat16, libiconv, 2, 2, 2); +#endif + +/* + * XLAT16 converter instance + */ +struct iconv_xlat16 { + KOBJ_FIELDS; + uint32_t * d_table[0x200]; + struct iconv_cspair * d_csp; +}; + +static int +iconv_xlat16_open(struct iconv_converter_class *dcp, + struct iconv_cspair *csp, struct iconv_cspair *cspf, void **dpp) +{ + struct iconv_xlat16 *dp; + uint32_t *headp, **idxp; + int i; + + dp = (struct iconv_xlat16 *)kobj_create((struct kobj_class*)dcp, M_ICONV, M_WAITOK); + headp = (uint32_t *)((caddr_t)csp->cp_data + sizeof(dp->d_table)); + idxp = (uint32_t **)csp->cp_data; + for (i = 0 ; i < 0x200 ; i++) { + if (*idxp) { + dp->d_table[i] = headp; + headp += 0x80; + } else { + dp->d_table[i] = NULL; + } + idxp++; + } + dp->d_csp = csp; + csp->cp_refcount++; + *dpp = (void*)dp; + return (0); +} + +static int +iconv_xlat16_close(void *data) +{ + struct iconv_xlat16 *dp = data; + + dp->d_csp->cp_refcount--; + kobj_delete((struct kobj*)data, M_ICONV); + return (0); +} + +static int +iconv_xlat16_conv(void *d2p, const char **inbuf, + size_t *inbytesleft, char **outbuf, size_t *outbytesleft, + int convchar, int casetype) +{ + struct iconv_xlat16 *dp = (struct iconv_xlat16*)d2p; + const char *src; + char *dst; + int nullin, ret = 0; + size_t in, on, ir, or, inlen; + uint32_t code; + u_char u, l; + uint16_t c1, c2; + + if (inbuf == NULL || *inbuf == NULL || outbuf == NULL || *outbuf == NULL) + return (0); + ir = in = *inbytesleft; + or = on = *outbytesleft; + src = *inbuf; + dst = *outbuf; + + while(ir > 0 && or > 0) { + + inlen = 0; + code = '\0'; + + c1 = ir > 1 ? *(src+1) & 0xff : 0; + c2 = *src & 0xff; + + c1 = c2 & 0x80 ? c1 | 0x100 : c1; + c2 = c2 & 0x80 ? c2 & 0x7f : c2; + + if (ir > 1 && dp->d_table[c1]) { + /* + * inbuf char is a double byte char + */ + code = dp->d_table[c1][c2]; + if (code) + inlen = 2; + } + + if (inlen == 0) { + c1 &= 0xff00; + if (!dp->d_table[c1]) { + ret = -1; + break; + } + /* + * inbuf char is a single byte char + */ + inlen = 1; + code = dp->d_table[c1][c2]; + if (!code) { + ret = -1; + break; + } + } + + nullin = (code & XLAT16_ACCEPT_NULL_IN) ? 1 : 0; + if (inlen == 1 && nullin) { + /* + * XLAT16_ACCEPT_NULL_IN requires inbuf has 2byte + */ + ret = -1; + break; + } + + /* + * now start translation + */ + if ((casetype == KICONV_FROM_LOWER && code & XLAT16_HAS_FROM_LOWER_CASE) || + (casetype == KICONV_FROM_UPPER && code & XLAT16_HAS_FROM_UPPER_CASE)) { + c2 = (u_char)(code >> 16); + c1 = c2 & 0x80 ? 0x100 : 0; + c2 = c2 & 0x80 ? c2 & 0x7f : c2; + code = dp->d_table[c1][c2]; + } + + u = (u_char)(code >> 8); + l = (u_char)code; + +#ifdef XLAT16_ACCEPT_3BYTE_CHR + if (code & XLAT16_IS_3BYTE_CHR) { + if (or < 3) { + ret = -1; + break; + } + *dst++ = u; + *dst++ = l; + *dst++ = (u_char)(code >> 16); + or -= 3; + } else +#endif + if (u || code & XLAT16_ACCEPT_NULL_OUT) { + if (or < 2) { + ret = -1; + break; + } + *dst++ = u; + *dst++ = l; + or -= 2; + } else { + if ((casetype == KICONV_LOWER && code & XLAT16_HAS_LOWER_CASE) || + (casetype == KICONV_UPPER && code & XLAT16_HAS_UPPER_CASE)) + *dst++ = (u_char)(code >> 16); + else + *dst++ = l; + or--; + } + + if (inlen == 2) { + /* + * there is a case that inbuf char is a single + * byte char while inlen == 2 + */ + if ((u_char)*(src+1) == 0 && !nullin ) { + src++; + ir--; + } else { + src += 2; + ir -= 2; + } + } else { + src++; + ir--; + } + + if (convchar == 1) + break; + } + + *inbuf += in - ir; + *outbuf += on - or; + *inbytesleft -= in - ir; + *outbytesleft -= on - or; + return (ret); +} + +static const char * +iconv_xlat16_name(struct iconv_converter_class *dcp) +{ + return ("xlat16"); +} + +static kobj_method_t iconv_xlat16_methods[] = { + KOBJMETHOD(iconv_converter_open, iconv_xlat16_open), + KOBJMETHOD(iconv_converter_close, iconv_xlat16_close), + KOBJMETHOD(iconv_converter_conv, iconv_xlat16_conv), +#if 0 + KOBJMETHOD(iconv_converter_init, iconv_xlat16_init), + KOBJMETHOD(iconv_converter_done, iconv_xlat16_done), +#endif + KOBJMETHOD(iconv_converter_name, iconv_xlat16_name), + {0, 0} +}; + +KICONV_CONVERTER(xlat16, sizeof(struct iconv_xlat16)); diff --git a/sys/sys/iconv.h b/sys/sys/iconv.h index 322479c724..dcbb5dfefe 100644 --- a/sys/sys/iconv.h +++ b/sys/sys/iconv.h @@ -29,7 +29,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/sys/iconv.h,v 1.1.2.1 2001/05/21 08:28:07 bp Exp $ + * $FreeBSD: src/sys/sys/iconv.h,v 1.12.8.1 2009/04/15 03:14:26 kensmith Exp $ * $DragonFly: src/sys/sys/iconv.h,v 1.6 2007/10/03 18:58:20 dillon Exp $ */ #ifndef _SYS_ICONV_H_ @@ -41,7 +41,21 @@ #define ICONV_CSNMAXLEN 31 /* maximum length of charset name */ #define ICONV_CNVNMAXLEN 31 /* maximum length of converter name */ -#define ICONV_CSMAXDATALEN 1024 /* maximum size of data associated with cs pair */ +/* maximum size of data associated with cs pair */ +#define ICONV_CSMAXDATALEN (sizeof(caddr_t) * 0x200 + sizeof(uint32_t) * 0x200 * 0x80) + +#define XLAT16_ACCEPT_NULL_OUT 0x01000000 +#define XLAT16_ACCEPT_NULL_IN 0x02000000 +#define XLAT16_HAS_LOWER_CASE 0x04000000 +#define XLAT16_HAS_UPPER_CASE 0x08000000 +#define XLAT16_HAS_FROM_LOWER_CASE 0x10000000 +#define XLAT16_HAS_FROM_UPPER_CASE 0x20000000 +#define XLAT16_IS_3BYTE_CHR 0x40000000 + +#define KICONV_LOWER 1 /* tolower converted character */ +#define KICONV_UPPER 2 /* toupper converted character */ +#define KICONV_FROM_LOWER 4 /* tolower source character, then convert */ +#define KICONV_FROM_UPPER 8 /* toupper source character, then convert */ /* * Entry for cslist sysctl @@ -79,7 +93,14 @@ struct iconv_add_out { __BEGIN_DECLS +#define ENCODING_UNICODE "UTF-16BE" +#define KICONV_VENDOR_MICSFT 1 /* Microsoft Vendor Code for quirk */ + int kiconv_add_xlat_table(const char *, const char *, const u_char *); +int kiconv_add_xlat16_cspair(const char *, const char *, int); +int kiconv_add_xlat16_cspairs(const char *, const char *); +int kiconv_add_xlat16_table(const char *, const char *, const void *, int); +const char *kiconv_quirkcs(const char *, int); __END_DECLS @@ -112,9 +133,9 @@ struct iconv_cspair { }; #define KICONV_CONVERTER(name,size) \ - static DEFINE_CLASS_EXT(iconv_ ## name, iconv_ ## name ## _class, \ - iconv_ ## name ## _methods, \ - (size), iconv_converter_class); \ + static struct iconv_converter_class iconv_ ## name ## _class = { \ + "iconv_"#name, iconv_ ## name ## _methods, size, NULL \ + }; \ static moduledata_t iconv_ ## name ## _mod = { \ "iconv_"#name, iconv_converter_handler, \ (void*)&iconv_ ## name ## _class \ @@ -140,8 +161,77 @@ int iconv_open(const char *to, const char *from, void **handle); int iconv_close(void *handle); int iconv_conv(void *handle, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); +int iconv_conv_case(void *handle, const char **inbuf, + size_t *inbytesleft, char **outbuf, size_t *outbytesleft, int casetype); +int iconv_convchr(void *handle, const char **inbuf, + size_t *inbytesleft, char **outbuf, size_t *outbytesleft); +int iconv_convchr_case(void *handle, const char **inbuf, + size_t *inbytesleft, char **outbuf, size_t *outbytesleft, int casetype); char* iconv_convstr(void *handle, char *dst, const char *src); void* iconv_convmem(void *handle, void *dst, const void *src, int size); +#if 0 +int iconv_vfs_refcount(const char *fsname); +#endif + +/* + * Bridge struct of iconv functions + */ +struct iconv_functions { + int (*open)(const char *to, const char *from, void **handle); + int (*close)(void *handle); + int (*conv)(void *handle, const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft); + int (*conv_case)(void *handle, const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft, int casetype); + int (*convchr)(void *handle, const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft); + int (*convchr_case)(void *handle, const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft, int casetype); +}; + +#define VFS_DECLARE_ICONV(fsname) \ + static struct iconv_functions fsname ## _iconv_core = { \ + iconv_open, \ + iconv_close, \ + iconv_conv, \ + iconv_conv_case, \ + iconv_convchr, \ + iconv_convchr_case \ + }; \ + extern struct iconv_functions *fsname ## _iconv; \ + static int fsname ## _iconv_mod_handler(module_t mod, \ + int type, void *d); \ + static int \ + fsname ## _iconv_mod_handler(module_t mod, int type, void *d) \ + { \ + int error = 0; \ + switch(type) { \ + case MOD_LOAD: \ + fsname ## _iconv = & fsname ## _iconv_core; \ + break; \ + case MOD_UNLOAD: \ + /* error = iconv_vfs_refcount(#fsname); */ \ + error = module_lookupbyname(#fsname); \ + if (error) \ + return (EBUSY); \ + fsname ## _iconv = NULL; \ + break; \ + default: \ + error = EINVAL; \ + break; \ + } \ + return (error); \ + } \ + static moduledata_t fsname ## _iconv_mod = { \ + #fsname"_iconv", \ + fsname ## _iconv_mod_handler, \ + NULL \ + }; \ + DECLARE_MODULE(fsname ## _iconv, fsname ## _iconv_mod, \ + SI_SUB_DRIVERS, SI_ORDER_ANY); \ + MODULE_DEPEND(fsname ## _iconv, fsname, 1, 1, 1); \ + MODULE_DEPEND(fsname ## _iconv, libiconv, 2, 2, 2); \ + MODULE_VERSION(fsname ## _iconv, 1) /* * Internal functions @@ -153,9 +243,9 @@ int iconv_converter_donestub(struct iconv_converter_class *dp); int iconv_converter_handler(module_t mod, int type, void *data); #ifdef ICONV_DEBUG -#define ICDEBUG(format, args...) kprintf("%s: "format, __func__ ,## args) +#define ICDEBUG(format, ...) kprintf("%s: "format, __func__ , __VA_ARGS__) #else -#define ICDEBUG(format, args...) +#define ICDEBUG(format, ...) #endif #endif /* !_KERNEL */ -- 2.41.0