kernel/libiconv: Sync with FreeBSD.
authorSascha Wildner <saw@online.de>
Sun, 28 Aug 2016 09:25:45 +0000 (11:25 +0200)
committerSascha Wildner <saw@online.de>
Sun, 28 Aug 2016 09:31:28 +0000 (11:31 +0200)
sys/conf/files
sys/libiconv/Makefile
sys/libiconv/iconv.c
sys/libiconv/iconv_converter_if.m
sys/libiconv/iconv_ucs.c [new file with mode: 0644]
sys/libiconv/iconv_xlat.c
sys/libiconv/iconv_xlat16.c

index f29127c..2a7b672 100644 (file)
@@ -1943,6 +1943,7 @@ libkern/strtouq.c                 standard
 #libkern/stack_protector.c             standard
 libiconv/iconv.c                       optional libiconv
 libiconv/iconv_converter_if.m          optional libiconv
+libiconv/iconv_ucs.c                   optional libiconv
 libiconv/iconv_xlat.c                  optional libiconv
 libiconv/iconv_xlat16.c                        optional libiconv
 libprop/prop_array.c                   standard
index 73b2fc2..070aa1c 100644 (file)
@@ -1,13 +1,14 @@
 # $FreeBSD: src/sys/modules/libiconv/Makefile,v 1.4 2003/09/26 20:26:24 fjoe Exp $
 
-.PATH: ${.CURDIR}/../libkern ${.CURDIR}/../sys
+.PATH: ${.CURDIR}/../sys
 
 KMOD=  libiconv
-SRCS=  iconv.c iconv_xlat.c iconv_xlat16.c
+SRCS=  iconv.c iconv_ucs.c iconv_xlat.c iconv_xlat16.c
 SRCS+= iconv.h 
 SRCS+= iconv_converter_if.c iconv_converter_if.h
 
-EXPORT_SYMS=   iconv_open      \
+EXPORT_SYMS=   iconv_add       \
+               iconv_open      \
                iconv_close     \
                iconv_conv      \
                iconv_conv_case \
index 2c172d1..970d5be 100644 (file)
@@ -1,5 +1,5 @@
-/*
- * Copyright (c) 2000-2001, Boris Popov
+/*-
+ * Copyright (c) 2000-2001 Boris Popov
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *    This product includes software developed by Boris Popov.
- * 4. Neither the name of the author nor the names of any co-contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/libkern/iconv.c,v 1.12.2.1.2.1 2009/04/15 03:14:26 kensmith Exp $
- * $DragonFly: src/sys/libiconv/iconv.c,v 1.8 2008/01/05 14:02:38 swildner Exp $
+ * $FreeBSD: head/sys/libkern/iconv.c 267291 2014-06-09 19:27:47Z jhb $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/iconv.h>
+#include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/syslog.h>
+
 #include "iconv_converter_if.h"
 
 SYSCTL_DECL(_kern_iconv);
@@ -47,10 +42,12 @@ SYSCTL_DECL(_kern_iconv);
 SYSCTL_NODE(_kern, OID_AUTO, iconv, CTLFLAG_RW, NULL, "kernel iconv interface");
 
 MALLOC_DEFINE(M_ICONV, "iconv", "ICONV structures");
-MALLOC_DEFINE(M_ICONVDATA, "iconv_data", "ICONV data");
+static MALLOC_DEFINE(M_ICONVDATA, "iconv_data", "ICONV data");
 
 MODULE_VERSION(libiconv, 2);
 
+static struct lock iconv_lock;
+
 #ifdef notnow
 /*
  * iconv converter instance
@@ -85,13 +82,18 @@ iconv_mod_unload(void)
 {
        struct iconv_cspair *csp;
 
-       while ((csp = TAILQ_FIRST(&iconv_cslist)) != NULL) {
-               if (csp->cp_refcount)
+       lockmgr(&iconv_lock, LK_EXCLUSIVE);
+       TAILQ_FOREACH(csp, &iconv_cslist, cp_link) {
+               if (csp->cp_refcount) {
+                       lockmgr(&iconv_lock, LK_RELEASE);
                        return EBUSY;
+               }
        }
 
        while ((csp = TAILQ_FIRST(&iconv_cslist)) != NULL)
                iconv_unregister_cspair(csp);
+       lockmgr(&iconv_lock, LK_RELEASE);
+       lockuninit(&iconv_lock);
        return 0;
 }
 
@@ -103,6 +105,7 @@ iconv_mod_handler(module_t mod, int type, void *data)
        switch (type) {
            case MOD_LOAD:
                error = 0;
+               lockinit(&iconv_lock, "iconv", 0, LK_CANRECURSE);
                break;
            case MOD_UNLOAD:
                error = iconv_mod_unload();
@@ -162,8 +165,8 @@ iconv_lookupcs(const char *to, const char *from, struct iconv_cspair **cspp)
        struct iconv_cspair *csp;
 
        TAILQ_FOREACH(csp, &iconv_cslist, cp_link) {
-               if (strcmp(csp->cp_to, to) == 0 &&
-                   strcmp(csp->cp_from, from) == 0) {
+               if (strcasecmp(csp->cp_to, to) == 0 &&
+                   strcasecmp(csp->cp_from, from) == 0) {
                        if (cspp)
                                *cspp = csp;
                        return 0;
@@ -297,6 +300,18 @@ iconv_convchr_case(void *handle, const char **inbuf,
        return ICONV_CONVERTER_CONV(handle, inbuf, inbytesleft, outbuf, outbytesleft, 1, casetype);
 }
 
+int
+towlower(int c, void *handle)
+{
+       return ICONV_CONVERTER_TOLOWER(handle, c);
+}
+
+int
+towupper(int c, void *handle)
+{
+       return ICONV_CONVERTER_TOUPPER(handle, c);
+}
+
 /*
  * Give a list of loaded converters. Each name terminated with 0.
  * An empty string terminates the list.
@@ -311,6 +326,7 @@ iconv_sysctl_drvlist(SYSCTL_HANDLER_ARGS)
 
        error = 0;
 
+       lockmgr(&iconv_lock, LK_SHARED);
        TAILQ_FOREACH(dcp, &iconv_converters, cc_link) {
                name = ICONV_CONVERTER_NAME(dcp);
                if (name == NULL)
@@ -319,6 +335,7 @@ iconv_sysctl_drvlist(SYSCTL_HANDLER_ARGS)
                if (error)
                        break;
        }
+       lockmgr(&iconv_lock, LK_RELEASE);
        if (error)
                return error;
        spc = 0;
@@ -343,6 +360,7 @@ iconv_sysctl_cslist(SYSCTL_HANDLER_ARGS)
        bzero(&csi, sizeof(csi));
        csi.cs_version = ICONV_CSPAIR_INFO_VER;
 
+       lockmgr(&iconv_lock, LK_SHARED);
        TAILQ_FOREACH(csp, &iconv_cslist, cp_link) {
                csi.cs_id = csp->cp_id;
                csi.cs_refcount = csp->cp_refcount;
@@ -353,12 +371,25 @@ iconv_sysctl_cslist(SYSCTL_HANDLER_ARGS)
                if (error)
                        break;
        }
+       lockmgr(&iconv_lock, LK_RELEASE);
        return error;
 }
 
 SYSCTL_PROC(_kern_iconv, OID_AUTO, cslist, CTLFLAG_RD | CTLTYPE_OPAQUE,
            NULL, 0, iconv_sysctl_cslist, "S,xlat", "registered charset pairs");
 
+int
+iconv_add(const char *converter, const char *to, const char *from)
+{
+       struct iconv_converter_class *dcp;
+       struct iconv_cspair *csp;
+
+       if (iconv_lookupconv(converter, &dcp) != 0)
+               return EINVAL;
+
+       return iconv_register_cspair(to, from, dcp, NULL, &csp);
+}
+
 /*
  * Add new charset pair
  */
@@ -378,22 +409,20 @@ iconv_sysctl_add(SYSCTL_HANDLER_ARGS)
                return EINVAL;
        if (din.ia_datalen > ICONV_CSMAXDATALEN)
                return EINVAL;
-
-       /*
-        * Make sure all user-supplied strings are terminated before
-        * proceeding.
-        *
-        * XXX return EINVAL if strings are not properly terminated
-        */
-       din.ia_converter[ICONV_CNVNMAXLEN-1] = 0;
-       din.ia_to[ICONV_CSNMAXLEN-1] = 0;
-       din.ia_from[ICONV_CSNMAXLEN-1] = 0;
-
+       if (strlen(din.ia_from) >= ICONV_CSNMAXLEN)
+               return EINVAL;
+       if (strlen(din.ia_to) >= ICONV_CSNMAXLEN)
+               return EINVAL;
+       if (strlen(din.ia_converter) >= ICONV_CNVNMAXLEN)
+               return EINVAL;
        if (iconv_lookupconv(din.ia_converter, &dcp) != 0)
                return EINVAL;
+       lockmgr(&iconv_lock, LK_EXCLUSIVE);
        error = iconv_register_cspair(din.ia_to, din.ia_from, dcp, NULL, &csp);
-       if (error)
+       if (error) {
+               lockmgr(&iconv_lock, LK_RELEASE);
                return error;
+       }
        if (din.ia_datalen) {
                csp->cp_data = kmalloc(din.ia_datalen, M_ICONVDATA, M_WAITOK);
                error = copyin(din.ia_data, csp->cp_data, din.ia_datalen);
@@ -404,10 +433,12 @@ iconv_sysctl_add(SYSCTL_HANDLER_ARGS)
        error = SYSCTL_OUT(req, &dout, sizeof(dout));
        if (error)
                goto bad;
+       lockmgr(&iconv_lock, LK_RELEASE);
        ICDEBUG("%s => %s, %d bytes\n",din.ia_from, din.ia_to, din.ia_datalen);
        return 0;
 bad:
        iconv_unregister_cspair(csp);
+       lockmgr(&iconv_lock, LK_RELEASE);
        return error;
 }
 
@@ -429,6 +460,12 @@ iconv_converter_donestub(struct iconv_converter_class *dp)
        return 0;
 }
 
+int
+iconv_converter_tolowerstub(int c, void *handle)
+{
+       return (c);
+}
+
 int
 iconv_converter_handler(module_t mod, int type, void *data)
 {
@@ -437,16 +474,22 @@ iconv_converter_handler(module_t mod, int type, void *data)
 
        switch (type) {
            case MOD_LOAD:
+               lockmgr(&iconv_lock, LK_EXCLUSIVE);
                error = iconv_register_converter(dcp);
-               if (error)
+               if (error) {
+                       lockmgr(&iconv_lock, LK_RELEASE);
                        break;
+               }
                error = ICONV_CONVERTER_INIT(dcp);
                if (error)
                        iconv_unregister_converter(dcp);
+               lockmgr(&iconv_lock, LK_RELEASE);
                break;
            case MOD_UNLOAD:
+               lockmgr(&iconv_lock, LK_EXCLUSIVE);
                ICONV_CONVERTER_DONE(dcp);
                error = iconv_unregister_converter(dcp);
+               lockmgr(&iconv_lock, LK_RELEASE);
                break;
            default:
                error = EINVAL;
@@ -461,8 +504,8 @@ char *
 iconv_convstr(void *handle, char *dst, const char *src)
 {
        char *p = dst;
-       int error;
        size_t inlen, outlen;
+       int error;
 
        if (handle == NULL) {
                strcpy(dst, src);
@@ -507,9 +550,7 @@ int
 iconv_lookupcp(char **cpp, const char *s)
 {
        if (cpp == NULL) {
-               ICDEBUG("warning a NULL list passed\n", ""); /* XXX ISO variadic                                                                macros cannot
-                                                               leave out the
-                                                               variadic args */
+               ICDEBUG("warning a NULL list passed\n", "");
                return ENOENT;
        }
        for (; *cpp; cpp++)
index 8ae9bb3..d785f8f 100644 (file)
@@ -1,5 +1,5 @@
-#
-# Copyright (c) 2000-2001, Boris Popov
+#-
+# Copyright (c) 2000-2001 Boris Popov
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
-# 3. All advertising materials mentioning features or use of this software
-#    must display the following acknowledgement:
-#    This product includes software developed by Boris Popov.
-# 4. Neither the name of the author nor the names of any co-contributors
-#    may be used to endorse or promote products derived from this software
-#    without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@@ -29,8 +23,7 @@
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
-# $FreeBSD: src/sys/libkern/iconv_converter_if.m,v 1.3.20.1 2009/04/15 03:14:26 kensmith Exp $
-# $DragonFly: src/sys/libiconv/iconv_converter_if.m,v 1.3 2004/03/18 18:27:47 dillon Exp $
+# $FreeBSD: head/sys/libkern/iconv_converter_if.m 206361 2010-04-07 16:50:38Z joel $
 #
 
 #include <sys/iconv.h>
@@ -62,10 +55,20 @@ STATICMETHOD int init {
        struct iconv_converter_class *dcp;
 } DEFAULT iconv_converter_initstub;
 
-STATICMETHOD void done {
+STATICMETHOD int done {
        struct iconv_converter_class *dcp;
 } DEFAULT iconv_converter_donestub;
 
 STATICMETHOD const char * name {
        struct iconv_converter_class *dcp;
 };
+
+METHOD int tolower {
+       void *handle;
+       int c;
+} DEFAULT iconv_converter_tolowerstub;
+
+METHOD int toupper {
+       void *handle;
+       int c;
+} DEFAULT iconv_converter_tolowerstub;
diff --git a/sys/libiconv/iconv_ucs.c b/sys/libiconv/iconv_ucs.c
new file mode 100644 (file)
index 0000000..a0294f7
--- /dev/null
@@ -0,0 +1,537 @@
+/*-
+ * Copyright (c) 2003, 2005 Ryuichiro Imura
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/libkern/iconv_ucs.c 267291 2014-06-09 19:27:47Z jhb $
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/iconv.h>
+
+#include "iconv_converter_if.h"
+
+/*
+ * "UCS" converter
+ */
+
+#define        KICONV_UCS_COMBINE      0x1
+#define        KICONV_UCS_FROM_UTF8    0x2
+#define        KICONV_UCS_TO_UTF8      0x4
+#define        KICONV_UCS_FROM_LE      0x8
+#define        KICONV_UCS_TO_LE        0x10
+#define        KICONV_UCS_FROM_UTF16   0x20
+#define        KICONV_UCS_TO_UTF16     0x40
+#define        KICONV_UCS_UCS4         0x80
+
+#define        ENCODING_UTF16  "UTF-16BE"
+#define        ENCODING_UTF8   "UTF-8"
+
+static struct {
+       const char *name;
+       int from_flag, to_flag;
+} unicode_family[] = {
+       { "UTF-8",      KICONV_UCS_FROM_UTF8,   KICONV_UCS_TO_UTF8 },
+       { "UCS-2LE",    KICONV_UCS_FROM_LE,     KICONV_UCS_TO_LE },
+       { "UTF-16BE",   KICONV_UCS_FROM_UTF16,  KICONV_UCS_TO_UTF16 },
+       { "UTF-16LE",   KICONV_UCS_FROM_UTF16|KICONV_UCS_FROM_LE,
+           KICONV_UCS_TO_UTF16|KICONV_UCS_TO_LE },
+       { NULL,         0,      0 }
+};
+
+static uint32_t utf8_to_ucs4(const char *src, size_t *utf8width, size_t srclen);
+static u_char *ucs4_to_utf8(uint32_t ucs4, char * dst, size_t *utf8width, size_t dstlen);
+static uint32_t encode_surrogate(uint32_t code);
+static uint32_t decode_surrogate(const u_char *ucs);
+
+#ifdef MODULE_DEPEND
+MODULE_DEPEND(iconv_ucs, libiconv, 2, 2, 2);
+#endif
+
+/*
+ * UCS converter instance
+ */
+struct iconv_ucs {
+       KOBJ_FIELDS;
+       int                     convtype;
+       struct iconv_cspair *   d_csp;
+       struct iconv_cspair *   d_cspf;
+       void *                  f_ctp;
+       void *                  t_ctp;
+       void *                  ctype;
+};
+
+static int
+iconv_ucs_open(struct iconv_converter_class *dcp,
+       struct iconv_cspair *csp, struct iconv_cspair *cspf, void **dpp)
+{
+       struct iconv_ucs *dp;
+       int i;
+       const char *from, *to;
+
+       dp = (struct iconv_ucs *)kobj_create((struct kobj_class*)dcp, M_ICONV, M_WAITOK);
+       to = csp->cp_to;
+       from = cspf ? cspf->cp_from : csp->cp_from;
+
+       dp->convtype = 0;
+
+       if (cspf)
+               dp->convtype |= KICONV_UCS_COMBINE;
+       for (i = 0; unicode_family[i].name; i++) {
+               if (strcasecmp(from, unicode_family[i].name) == 0)
+                       dp->convtype |= unicode_family[i].from_flag;
+               if (strcasecmp(to, unicode_family[i].name) == 0)
+                       dp->convtype |= unicode_family[i].to_flag;
+       }
+       if (strcmp(ENCODING_UNICODE, ENCODING_UTF16) == 0)
+               dp->convtype |= KICONV_UCS_UCS4;
+       else
+               dp->convtype &= ~KICONV_UCS_UCS4;
+
+       dp->f_ctp = dp->t_ctp = NULL;
+       if (dp->convtype & KICONV_UCS_COMBINE) {
+               if ((dp->convtype & KICONV_UCS_FROM_UTF8) == 0 &&
+                   (dp->convtype & KICONV_UCS_FROM_LE) == 0) {
+                       iconv_open(ENCODING_UNICODE, from, &dp->f_ctp);
+               }
+               if ((dp->convtype & KICONV_UCS_TO_UTF8) == 0 &&
+                   (dp->convtype & KICONV_UCS_TO_LE) == 0) {
+                       iconv_open(to, ENCODING_UNICODE, &dp->t_ctp);
+               }
+       }
+
+       dp->ctype = NULL;
+       if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_TO_UTF8))
+               iconv_open(KICONV_WCTYPE_NAME, ENCODING_UTF8, &dp->ctype);
+
+       dp->d_csp = csp;
+       if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_FROM_LE)) {
+               if (cspf) {
+                       dp->d_cspf = cspf;
+                       cspf->cp_refcount++;
+               } else
+                       csp->cp_refcount++;
+       }
+       if (dp->convtype & (KICONV_UCS_TO_UTF8 | KICONV_UCS_TO_LE))
+               csp->cp_refcount++;
+       *dpp = (void*)dp;
+       return 0;
+}
+
+static int
+iconv_ucs_close(void *data)
+{
+       struct iconv_ucs *dp = data;
+
+       if (dp->f_ctp)
+               iconv_close(dp->f_ctp);
+       if (dp->t_ctp)
+               iconv_close(dp->t_ctp);
+       if (dp->ctype)
+               iconv_close(dp->ctype);
+       if (dp->d_cspf)
+               dp->d_cspf->cp_refcount--;
+       else if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_FROM_LE))
+               dp->d_csp->cp_refcount--;
+       if (dp->convtype & (KICONV_UCS_TO_UTF8 | KICONV_UCS_TO_LE))
+               dp->d_csp->cp_refcount--;
+       kobj_delete((struct kobj*)data, M_ICONV);
+       return 0;
+}
+
+static int
+iconv_ucs_conv(void *d2p, const char **inbuf,
+       size_t *inbytesleft, char **outbuf, size_t *outbytesleft,
+       int convchar, int casetype)
+{
+       struct iconv_ucs *dp = (struct iconv_ucs*)d2p;
+       int ret = 0, i;
+       size_t in, on, ir, or, inlen, outlen, ucslen;
+       const char *src, *p;
+       char *dst;
+       u_char ucs[4], *q;
+       uint32_t code;
+
+       if (inbuf == NULL || *inbuf == NULL || outbuf == NULL || *outbuf == NULL)
+               return 0;
+       ir = in = *inbytesleft;
+       or = on = *outbytesleft;
+       src = *inbuf;
+       dst = *outbuf;
+
+       while (ir > 0 && or > 0) {
+
+               /*
+                * The first half of conversion.
+                * (convert any code into ENCODING_UNICODE)
+                */
+               code = 0;
+               p = src;
+               if (dp->convtype & KICONV_UCS_FROM_UTF8) {
+                       /* convert UTF-8 to ENCODING_UNICODE */
+                       inlen = 0;
+                       code = utf8_to_ucs4(p, &inlen, ir);
+                       if (code == 0) {
+                               ret = -1;
+                               break;
+                       }
+
+                       if (casetype == KICONV_FROM_LOWER && dp->ctype) {
+                               code = towlower(code, dp->ctype);
+                       } else if (casetype == KICONV_FROM_UPPER && dp->ctype) {
+                               code = towupper(code, dp->ctype);
+                       }
+
+                       if ((code >= 0xd800 && code < 0xe000) || code >= 0x110000 ) {
+                               /* reserved for utf-16 surrogate pair */
+                               /* invalid unicode */
+                               ret = -1;
+                               break;
+                       }
+
+                       if (inlen == 4) {
+                               if (dp->convtype & KICONV_UCS_UCS4) {
+                                       ucslen = 4;
+                                       code = encode_surrogate(code);
+                               } else {
+                                       /* can't handle with ucs-2 */
+                                       ret = -1;
+                                       break;
+                               }
+                       } else {
+                               ucslen = 2;
+                       }
+
+                       /* save UCS-4 into ucs[] */
+                       for (q = ucs, i = ucslen - 1 ; i >= 0 ; i--)
+                               *q++ = (code >> (i << 3)) & 0xff;
+
+               } else if (dp->convtype & KICONV_UCS_COMBINE && dp->f_ctp) {
+                       /* convert local code to ENCODING_UNICODE */
+                       ucslen = 4;
+                       inlen = ir;
+                       q = ucs;
+                       ret = iconv_convchr_case(dp->f_ctp, &p, &inlen, (char **)&q,
+                           &ucslen, casetype & (KICONV_FROM_LOWER | KICONV_FROM_UPPER));
+                       if (ret)
+                               break;
+                       inlen = ir - inlen;
+                       ucslen = 4 - ucslen;
+
+               } else {
+                       /* src code is a proper subset of ENCODING_UNICODE */
+                       q = ucs;
+                       if (dp->convtype & KICONV_UCS_FROM_LE) {
+                               *q = *(p + 1);
+                               *(q + 1) = *p;
+                               p += 2;
+                       } else {
+                               *q = *p++;
+                               *(q + 1) = *p++;
+                       }
+                       if ((*q & 0xfc) == 0xd8) {
+                               if (dp->convtype & KICONV_UCS_UCS4 &&
+                                   dp->convtype & KICONV_UCS_FROM_UTF16) {
+                                       inlen = ucslen = 4;
+                               } else {
+                                       /* invalid unicode */
+                                       ret = -1;
+                                       break;
+                               }
+                       } else {
+                               inlen = ucslen = 2;
+                       }
+                       if (ir < inlen) {
+                               ret = -1;
+                               break;
+                       }
+                       if (ucslen == 4) {
+                               q += 2;
+                               if (dp->convtype & KICONV_UCS_FROM_LE) {
+                                       *q = *(p + 1);
+                                       *(q + 1) = *p;
+                               } else {
+                                       *q = *p++;
+                                       *(q + 1) = *p;
+                               }
+                               if ((*q & 0xfc) != 0xdc) {
+                                       /* invalid unicode */
+                                       ret = -1;
+                                       break;
+                               }
+                       }
+               }
+
+               /*
+                * The second half of conversion.
+                * (convert ENCODING_UNICODE into any code)
+                */
+               p = ucs;
+               if (dp->convtype & KICONV_UCS_TO_UTF8) {
+                       q = (u_char *)dst;
+                       if (ucslen == 4 && dp->convtype & KICONV_UCS_UCS4) {
+                               /* decode surrogate pair */
+                               code = decode_surrogate(p);
+                       } else {
+                               code = (ucs[0] << 8) | ucs[1];
+                       }
+
+                       if (casetype == KICONV_LOWER && dp->ctype) {
+                               code = towlower(code, dp->ctype);
+                       } else if (casetype == KICONV_UPPER && dp->ctype) {
+                               code = towupper(code, dp->ctype);
+                       }
+
+                       outlen = 0;
+                       if (ucs4_to_utf8(code, q, &outlen, or) == NULL) {
+                               ret = -1;
+                               break;
+                       }
+
+                       src += inlen;
+                       ir -= inlen;
+                       dst += outlen;
+                       or -= outlen;
+
+               } else if (dp->convtype & KICONV_UCS_COMBINE && dp->t_ctp) {
+                       ret = iconv_convchr_case(dp->t_ctp, &p, &ucslen, &dst,
+                           &or, casetype & (KICONV_LOWER | KICONV_UPPER));
+                       if (ret)
+                               break;
+
+                       src += inlen;
+                       ir -= inlen;
+
+               } else {
+                       /* dst code is a proper subset of ENCODING_UNICODE */
+                       if (or < ucslen) {
+                               ret = -1;
+                               break;
+                       }
+                       src += inlen;
+                       ir -= inlen;
+                       or -= ucslen;
+                       if (dp->convtype & KICONV_UCS_TO_LE) {
+                               *dst++ = *(p + 1);
+                               *dst++ = *p;
+                               p += 2;
+                       } else {
+                               *dst++ = *p++;
+                               *dst++ = *p++;
+                       }
+                       if (ucslen == 4) {
+                               if ((dp->convtype & KICONV_UCS_UCS4) == 0 ||
+                                   (dp->convtype & KICONV_UCS_TO_UTF16) == 0) {
+                                       ret = -1;
+                                       break;
+                               }
+                               if (dp->convtype & KICONV_UCS_TO_LE) {
+                                       *dst++ = *(p + 1);
+                                       *dst++ = *p;
+                               } else {
+                                       *dst++ = *p++;
+                                       *dst++ = *p;
+                               }
+                       }
+               }
+
+               if (convchar == 1)
+                       break;
+       }
+
+       *inbuf += in - ir;
+       *outbuf += on - or;
+       *inbytesleft -= in - ir;
+       *outbytesleft -= on - or;
+       return (ret);
+}
+
+static int
+iconv_ucs_init(struct iconv_converter_class *dcp)
+{
+       int error;
+
+       error = iconv_add(ENCODING_UNICODE, ENCODING_UNICODE, ENCODING_UTF8);
+       if (error)
+               return (error);
+       error = iconv_add(ENCODING_UNICODE, ENCODING_UTF8, ENCODING_UNICODE);
+       if (error)
+               return (error);
+       return (0);
+}
+
+static int
+iconv_ucs_done(struct iconv_converter_class *dcp)
+{
+       return (0);
+}
+
+static const char *
+iconv_ucs_name(struct iconv_converter_class *dcp)
+{
+       return (ENCODING_UNICODE);
+}
+
+static kobj_method_t iconv_ucs_methods[] = {
+       KOBJMETHOD(iconv_converter_open,        iconv_ucs_open),
+       KOBJMETHOD(iconv_converter_close,       iconv_ucs_close),
+       KOBJMETHOD(iconv_converter_conv,        iconv_ucs_conv),
+       KOBJMETHOD(iconv_converter_init,        iconv_ucs_init),
+       KOBJMETHOD(iconv_converter_done,        iconv_ucs_done),
+       KOBJMETHOD(iconv_converter_name,        iconv_ucs_name),
+       KOBJMETHOD_END
+};
+
+KICONV_CONVERTER(ucs, sizeof(struct iconv_ucs));
+
+static uint32_t
+utf8_to_ucs4(const char *src, size_t *utf8width, size_t srclen)
+{
+       size_t i, w = 0;
+       uint32_t ucs4 = 0;
+
+       /*
+        * get leading 1 byte from utf-8
+        */
+       if ((*src & 0x80) == 0) {
+               /*
+                * leading 1 bit is "0"
+                *  utf-8: 0xxxxxxx
+                *  ucs-4: 00000000 00000000 00000000 0xxxxxxx
+                */
+               w = 1;
+               /* get trailing 7 bits */
+               ucs4 = *src & 0x7f;
+       } else if ((*src & 0xe0) == 0xc0) {
+               /*
+                * leading 3 bits are "110"
+                *  utf-8: 110xxxxx 10yyyyyy
+                *  ucs-4: 00000000 00000000 00000xxx xxyyyyyy
+                */
+               w = 2;
+               /* get trailing 5 bits */
+               ucs4 = *src & 0x1f;
+       } else if ((*src & 0xf0) == 0xe0) {
+               /*
+                * leading 4 bits are "1110"
+                *  utf-8: 1110xxxx 10yyyyyy 10zzzzzz
+                *  ucs-4: 00000000 00000000 xxxxyyyy yyzzzzzz
+                */
+               w = 3;
+               /* get trailing 4 bits */
+               ucs4 = *src & 0x0f;
+       } else if ((*src & 0xf8) == 0xf0) {
+               /*
+                * leading 5 bits are "11110"
+                *  utf-8: 11110www 10xxxxxx 10yyyyyy 10zzzzzz
+                *  ucs-4: 00000000 000wwwxx xxxxyyyy yyzzzzzz
+                */
+               w = 4;
+               /* get trailing 3 bits */
+               ucs4 = *src & 0x07;
+       } else {
+               /* out of utf-16 range or having illegal bits */
+               return (0);
+       }
+
+       if (srclen < w)
+               return (0);
+
+       /*
+        * get left parts from utf-8
+        */
+       for (i = 1 ; i < w ; i++) {
+               if ((*(src + i) & 0xc0) != 0x80) {
+                       /* invalid: leading 2 bits are not "10" */
+                       return (0);
+               }
+               /* concatenate trailing 6 bits into ucs4 */
+               ucs4 <<= 6;
+               ucs4 |= *(src + i) & 0x3f;
+       }
+
+       *utf8width = w;
+       return (ucs4);
+}
+
+static u_char *
+ucs4_to_utf8(uint32_t ucs4, char *dst, size_t *utf8width, size_t dstlen)
+{
+       u_char lead, *p;
+       size_t i, w;
+
+       /*
+        * determine utf-8 width and leading bits
+        */
+       if (ucs4 < 0x80) {
+               w = 1;
+               lead = 0;       /* "0" */
+       } else if (ucs4 < 0x800) {
+               w = 2;
+               lead = 0xc0;    /* "11" */
+       } else if (ucs4 < 0x10000) {
+               w = 3;
+               lead = 0xe0;    /* "111" */
+       } else if (ucs4 < 0x200000) {
+               w = 4;
+               lead = 0xf0;    /* "1111" */
+       } else {
+               return (NULL);
+       }
+
+       if (dstlen < w)
+               return (NULL);
+
+       /*
+        * construct utf-8
+        */
+       p = dst;
+       for (i = w - 1 ; i >= 1 ; i--) {
+               /* get trailing 6 bits and put it with leading bit as "1" */
+               *(p + i) = (ucs4 & 0x3f) | 0x80;
+               ucs4 >>= 6;
+       }
+       *p = ucs4 | lead;
+
+       *utf8width = w;
+
+       return (p);
+}
+
+static uint32_t
+encode_surrogate(register uint32_t code)
+{
+       return ((((code - 0x10000) << 6) & 0x3ff0000) |
+           ((code - 0x10000) & 0x3ff) | 0xd800dc00);
+}
+
+static uint32_t
+decode_surrogate(register const u_char *ucs)
+{
+       return ((((ucs[0] & 0x3) << 18) | (ucs[1] << 10) |
+           ((ucs[2] & 0x3) << 8) | ucs[3]) + 0x10000);
+}
+
index 94a0115..17469eb 100644 (file)
@@ -1,5 +1,5 @@
-/*
- * Copyright (c) 2000-2001, Boris Popov
+/*-
+ * Copyright (c) 2000-2001 Boris Popov
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *    This product includes software developed by Boris Popov.
- * 4. Neither the name of the author nor the names of any co-contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@@ -29,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/libkern/iconv_xlat.c,v 1.5.30.1 2009/04/15 03:14:26 kensmith Exp $
+ * $FreeBSD: head/sys/libkern/iconv_xlat.c 206361 2010-04-07 16:50:38Z joel $
  */
 
 #include <sys/param.h>
index 0c9cd4d..59ec415 100644 (file)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2003, Ryuichiro Imura
+ * Copyright (c) 2003, 2005 Ryuichiro Imura
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: src/sys/libkern/iconv_xlat16.c,v 1.3.20.1 2009/04/15 03:14:26 kensmith Exp $
+ * $FreeBSD: head/sys/libkern/iconv_xlat16.c 194638 2009-06-22 17:09:46Z delphij $
  */
 
 #include <sys/param.h>
 MODULE_DEPEND(iconv_xlat16, libiconv, 2, 2, 2);
 #endif
 
+#define C2I1(c)        ((c) & 0x8000 ? ((c) & 0xff) | 0x100 : (c) & 0xff)
+#define C2I2(c)        ((c) & 0x8000 ? ((c) >> 8) & 0x7f : ((c) >> 8) & 0xff)
+
 /*
  * XLAT16 converter instance
  */
 struct iconv_xlat16 {
        KOBJ_FIELDS;
        uint32_t *              d_table[0x200];
+       void *                  f_ctp;
+       void *                  t_ctp;
        struct iconv_cspair *   d_csp;
 };
 
@@ -71,6 +76,16 @@ iconv_xlat16_open(struct iconv_converter_class *dcp,
                }
                idxp++;
        }
+
+       if (strcmp(csp->cp_to, KICONV_WCTYPE_NAME) != 0) {
+               if (iconv_open(KICONV_WCTYPE_NAME, csp->cp_from, &dp->f_ctp) != 0)
+                       dp->f_ctp = NULL;
+               if (iconv_open(KICONV_WCTYPE_NAME, csp->cp_to, &dp->t_ctp) != 0)
+                       dp->t_ctp = NULL;
+       } else {
+               dp->f_ctp = dp->t_ctp = dp;
+       }
+
        dp->d_csp = csp;
        csp->cp_refcount++;
        *dpp = (void*)dp;
@@ -82,6 +97,10 @@ iconv_xlat16_close(void *data)
 {
        struct iconv_xlat16 *dp = data;
 
+       if (dp->f_ctp && dp->f_ctp != data)
+               iconv_close(dp->f_ctp);
+       if (dp->t_ctp && dp->t_ctp != data)
+               iconv_close(dp->t_ctp);
        dp->d_csp->cp_refcount--;
        kobj_delete((struct kobj*)data, M_ICONV);
        return (0);
@@ -99,7 +118,7 @@ iconv_xlat16_conv(void *d2p, const char **inbuf,
        size_t in, on, ir, or, inlen;
        uint32_t code;
        u_char u, l;
-       uint16_t c1, c2;
+       uint16_t c1, c2, ctmp;
 
        if (inbuf == NULL || *inbuf == NULL || outbuf == NULL || *outbuf == NULL)
                return (0);
@@ -111,21 +130,32 @@ iconv_xlat16_conv(void *d2p, const char **inbuf,
        while(ir > 0 && or > 0) {
 
                inlen = 0;
-               code = '\0';
+               code = 0;
 
                c1 = ir > 1 ? *(src+1) & 0xff : 0;
                c2 = *src & 0xff;
+               ctmp = 0;
 
                c1 = c2 & 0x80 ? c1 | 0x100 : c1;
                c2 = c2 & 0x80 ? c2 & 0x7f : c2;
 
-               if (ir > 1 && dp->d_table[c1]) {
+               if (ir > 1 && dp->d_table[c1] && dp->d_table[c1][c2]) {
                        /*
                         * inbuf char is a double byte char
                         */
-                       code = dp->d_table[c1][c2];
-                       if (code)
-                               inlen = 2;
+                       inlen = 2;
+
+                       /* toupper,tolower */
+                       if (casetype == KICONV_FROM_LOWER && dp->f_ctp)
+                               ctmp = towlower(((u_char)*src << 8) | (u_char)*(src + 1),
+                                   dp->f_ctp);
+                       else if (casetype == KICONV_FROM_UPPER && dp->f_ctp)
+                               ctmp = towupper(((u_char)*src << 8) | (u_char)*(src + 1),
+                                   dp->f_ctp);
+                       if (ctmp) {
+                               c1 = C2I1(ctmp);
+                               c2 = C2I2(ctmp);
+                       }
                }
 
                if (inlen == 0) {
@@ -138,11 +168,31 @@ iconv_xlat16_conv(void *d2p, const char **inbuf,
                         * inbuf char is a single byte char
                         */
                        inlen = 1;
-                       code = dp->d_table[c1][c2];
-                       if (!code) {
-                               ret = -1;
-                               break;
+
+                       if (casetype & (KICONV_FROM_LOWER|KICONV_FROM_UPPER))
+                               code = dp->d_table[c1][c2];
+
+                       if (casetype == KICONV_FROM_LOWER) {
+                               if (dp->f_ctp)
+                                       ctmp = towlower((u_char)*src, dp->f_ctp);
+                               else if (code & XLAT16_HAS_FROM_LOWER_CASE)
+                                       ctmp = (u_char)(code >> 16);
+                       } else if (casetype == KICONV_FROM_UPPER) {
+                               if (dp->f_ctp)
+                                       ctmp = towupper((u_char)*src, dp->f_ctp);
+                               else if (code & XLAT16_HAS_FROM_UPPER_CASE)
+                                       ctmp = (u_char)(code >> 16);
                        }
+                       if (ctmp) {
+                               c1 = C2I1(ctmp << 8);
+                               c2 = C2I2(ctmp << 8);
+                       }
+               }
+
+               code = dp->d_table[c1][c2];
+               if (!code) {
+                       ret = -1;
+                       break;
                }
 
                nullin = (code & XLAT16_ACCEPT_NULL_IN) ? 1 : 0;
@@ -157,14 +207,6 @@ iconv_xlat16_conv(void *d2p, const char **inbuf,
                /*
                 * now start translation
                 */
-               if ((casetype == KICONV_FROM_LOWER && code & XLAT16_HAS_FROM_LOWER_CASE) ||
-                   (casetype == KICONV_FROM_UPPER && code & XLAT16_HAS_FROM_UPPER_CASE)) {
-                       c2 = (u_char)(code >> 16);
-                       c1 = c2 & 0x80 ? 0x100 : 0;
-                       c2 = c2 & 0x80 ? c2 & 0x7f : c2;
-                       code = dp->d_table[c1][c2];
-               }
-
                u = (u_char)(code >> 8);
                l = (u_char)code;
 
@@ -185,15 +227,38 @@ iconv_xlat16_conv(void *d2p, const char **inbuf,
                                ret = -1;
                                break;
                        }
+
+                       /* toupper,tolower */
+                       if (casetype == KICONV_LOWER && dp->t_ctp) {
+                               code = towlower((uint16_t)code, dp->t_ctp);
+                               u = (u_char)(code >> 8);
+                               l = (u_char)code;
+                       }
+                       if (casetype == KICONV_UPPER && dp->t_ctp) {
+                               code = towupper((uint16_t)code, dp->t_ctp);
+                               u = (u_char)(code >> 8);
+                               l = (u_char)code;
+                       }
+
                        *dst++ = u;
                        *dst++ = l;
                        or -= 2;
                } else {
-                       if ((casetype == KICONV_LOWER && code & XLAT16_HAS_LOWER_CASE) ||
-                           (casetype == KICONV_UPPER && code & XLAT16_HAS_UPPER_CASE))
-                               *dst++ = (u_char)(code >> 16);
-                       else
-                               *dst++ = l;
+                       /* toupper,tolower */
+                       if (casetype == KICONV_LOWER) {
+                               if (dp->t_ctp)
+                                       l = (u_char)towlower(l, dp->t_ctp);
+                               else if (code & XLAT16_HAS_LOWER_CASE)
+                                       l = (u_char)(code >> 16);
+                       }
+                       if (casetype == KICONV_UPPER) {
+                               if (dp->t_ctp)
+                                       l = (u_char)towupper(l, dp->t_ctp);
+                               else if (code & XLAT16_HAS_UPPER_CASE)
+                                       l = (u_char)(code >> 16);
+                       }
+
+                       *dst++ = l;
                        or--;
                }
 
@@ -231,6 +296,55 @@ iconv_xlat16_name(struct iconv_converter_class *dcp)
        return ("xlat16");
 }
 
+static int
+iconv_xlat16_tolower(void *d2p, int c)
+{
+       struct iconv_xlat16 *dp = (struct iconv_xlat16*)d2p;
+       int c1, c2, out;
+
+       if (c < 0x100) {
+               c1 = C2I1(c << 8);
+               c2 = C2I2(c << 8);
+       } else if (c < 0x10000) {
+               c1 = C2I1(c);
+               c2 = C2I2(c);
+       } else
+               return (c);
+
+       if (dp->d_table[c1] && dp->d_table[c1][c2] & XLAT16_HAS_LOWER_CASE) {
+               /*return (int)(dp->d_table[c1][c2] & 0xffff);*/
+               out = dp->d_table[c1][c2] & 0xffff;
+               if ((out & 0xff) == 0)
+                       out = (out >> 8) & 0xff;
+               return (out);
+       } else
+               return (c);
+}
+
+static int
+iconv_xlat16_toupper(void *d2p, int c)
+{
+       struct iconv_xlat16 *dp = (struct iconv_xlat16*)d2p;
+       int c1, c2, out;
+
+       if (c < 0x100) {
+               c1 = C2I1(c << 8);
+               c2 = C2I2(c << 8);
+       } else if (c < 0x10000) {
+               c1 = C2I1(c);
+               c2 = C2I2(c);
+       } else
+               return (c);
+
+       if (dp->d_table[c1] && dp->d_table[c1][c2] & XLAT16_HAS_UPPER_CASE) {
+               out = dp->d_table[c1][c2] & 0xffff;
+               if ((out & 0xff) == 0)
+                       out = (out >> 8) & 0xff;
+               return (out);
+       } else
+               return (c);
+}
+
 static kobj_method_t iconv_xlat16_methods[] = {
        KOBJMETHOD(iconv_converter_open,        iconv_xlat16_open),
        KOBJMETHOD(iconv_converter_close,       iconv_xlat16_close),
@@ -240,6 +354,8 @@ static kobj_method_t iconv_xlat16_methods[] = {
        KOBJMETHOD(iconv_converter_done,        iconv_xlat16_done),
 #endif
        KOBJMETHOD(iconv_converter_name,        iconv_xlat16_name),
+       KOBJMETHOD(iconv_converter_tolower,     iconv_xlat16_tolower),
+       KOBJMETHOD(iconv_converter_toupper,     iconv_xlat16_toupper),
        KOBJMETHOD_END
 };