collate 1/4: Add support for LC_COLLATE format "DragonFly 4.4" in libc
authorJohn Marino <draco@marino.st>
Tue, 28 Jul 2015 19:25:23 +0000 (21:25 +0200)
committerJohn Marino <draco@marino.st>
Tue, 28 Jul 2015 23:38:43 +0000 (01:38 +0200)
The collate functions within libc have been using version 1 and 1.2 of the
packed LC_COLLATE binary formats.  These were generated with the colldef
tool, but the new LC_COLLATE files are going to be generated by the new
localedef tool using CLDR POSIX files as input.  The DragonFly-flavored
version of localedef identifies the format as "DragonFly 4.4".  Any
LC_COLLATE file with a different version will simply not be loaded, and
all LC* categories will get set to "C" (aka "POSIX") locale.

This work is based off of Nexenta's contribution to Illumos (successor
to OpenSolaris).  The integration with xlocale is my work though.

The following commits will enable localedef tool, disable the colldef
tool, add generated colldef directory, and finally remove colldef from
base.

include/limits.h
lib/libc/locale/Symbol.map
lib/libc/locale/collate.c
lib/libc/locale/collate.h
lib/libc/locale/collcmp.c
lib/libc/locale/setrunelocale.c
lib/libc/string/strcoll.c
lib/libc/string/strxfrm.c
lib/libc/string/wcscoll.c
lib/libc/string/wcsxfrm.c

index ad3bfac..6fbfb2b 100644 (file)
@@ -59,7 +59,7 @@
 #define        BC_DIM_MAX               2048   /* max array elements in bc(1) */
 #define        BC_SCALE_MAX               99   /* max scale value in bc(1) */
 #define        BC_STRING_MAX            1000   /* max const string length in bc(1) */
-#define        COLL_WEIGHTS_MAX            0   /* max weights for order keyword */
+#define        COLL_WEIGHTS_MAX           10   /* max weights for order keyword */
 #define        EXPR_NEST_MAX              32   /* max expressions nested in expr(1) */
 #define        LINE_MAX                 2048   /* max bytes in an input line */
 #define        RE_DUP_MAX                255   /* max RE's in interval notation */
index a069666..75e2224 100644 (file)
@@ -205,14 +205,8 @@ DF402.0 {
 
 DFprivate_1.0 {
     __detect_path_locale;
-    __collate_err;
     __collate_load;
-    __collate_load_error;
     __collate_load_tables;
-    __collate_load_tables_l;
-    __collate_lookup;
-    __collate_strdup;
-    __collate_substitute;
     __collate_range_cmp;
     __ctype_load;
     __fix_locale_grouping_str;
@@ -262,4 +256,6 @@ DFprivate_1.0 {
     _PathLocale;
     _ascii_init;
     _none_init;
+    _collate_load_tables_l;
+    _collate_lookup;
 };
index f132008..4df1ce4 100644 (file)
@@ -1,4 +1,5 @@
-/*-
+/*
+ * Copright 2010 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
  *             at Electronni Visti IA, Kiev, Ukraine.
  *                     All rights reserved.
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: head/lib/libc/locale/collate.c 244126 2012-12-11 22:52:56Z jilles $
+ * Adapted to xlocale by John Marino <draco@marino.st>
  */
 
-
 #include "namespace.h"
-#include <arpa/inet.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <wchar.h>
 #include <errno.h>
 #include <unistd.h>
-#include <sysexits.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
 #include "un-namespace.h"
 
 #include "collate.h"
 #include "setlocale.h"
 #include "ldpart.h"
 
-#include "libc_private.h"
-
-/*
- * To avoid modifying the original (single-threaded) code too much, we'll just
- * define the old globals as fields inside the table.
- *
- * We also modify the collation table test functions to search the thread-local
- * table first and the global table second.  
- */
-#define __collate_substitute_nontrivial (table->__collate_substitute_nontrivial)
-#define __collate_substitute_table_ptr (table->__collate_substitute_table_ptr)
-#define __collate_char_pri_table_ptr (table->__collate_char_pri_table_ptr)
-#define __collate_chain_pri_table (table->__collate_chain_pri_table)
-int __collate_load_error;
-
-
 struct xlocale_collate __xlocale_global_collate = {
-       {{0}, "C"}, 1, 0
+       {{0}, "C"}, 1, 0, 0, 0
 };
 
- struct xlocale_collate __xlocale_C_collate = {
-       {{0}, "C"}, 1, 0
+struct xlocale_collate __xlocale_C_collate = {
+       {{0}, "C"}, 1, 0, 0, 0
 };
 
-void __collate_err(int ex, const char *f) __dead2;
+#include "libc_private.h"
 
 int
 __collate_load_tables_l(const char *encoding, struct xlocale_collate *table);
@@ -80,8 +67,8 @@ static void
 destruct_collate(void *t)
 {
        struct xlocale_collate *table = t;
-       if (__collate_chain_pri_table) {
-               free(__collate_chain_pri_table);
+       if (table->map && (table->maplen > 0)) {
+               (void) munmap(table->map, table->maplen);
        }
        free(t);
 }
@@ -110,18 +97,19 @@ int
 __collate_load_tables(const char *encoding)
 {
        int ret = __collate_load_tables_l(encoding, &__xlocale_global_collate);
-       __collate_load_error = __xlocale_global_collate.__collate_load_error;
        return ret;
 }
 
 int
 __collate_load_tables_l(const char *encoding, struct xlocale_collate *table)
 {
-       FILE *fp;
-       int i, saverr, chains;
-       uint32_t u32;
-       char strbuf[STR_LEN], buf[PATH_MAX];
-       void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table;
+       int i, chains, z;
+       char buf[PATH_MAX];
+       char *TMP;
+       char *map;
+       collate_info_t *info;
+       struct stat sbuf;
+       int fd;
 
        /* 'encoding' must be already checked. */
        if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
@@ -129,217 +117,529 @@ __collate_load_tables_l(const char *encoding, struct xlocale_collate *table)
                return (_LDP_CACHE);
        }
 
-       /* 'PathLocale' must be already set & checked. */
-       /* Range checking not needed, encoding has fixed size */
-       (void)strcpy(buf, _PathLocale);
-       (void)strcat(buf, "/");
-       (void)strcat(buf, encoding);
-       (void)strcat(buf, "/LC_COLLATE");
-       if ((fp = fopen(buf, "re")) == NULL)
-               return (_LDP_ERROR);
+       (void) snprintf(buf, sizeof (buf), "%s/%s/LC_COLLATE",
+           _PathLocale, encoding);
 
-       if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) {
-               saverr = errno;
-               (void)fclose(fp);
-               errno = saverr;
+       if ((fd = _open(buf, O_RDONLY)) < 0)
                return (_LDP_ERROR);
-       }
-       chains = -1;
-       if (strcmp(strbuf, COLLATE_VERSION) == 0)
-               chains = 0;
-       else if (strcmp(strbuf, COLLATE_VERSION1_2) == 0)
-               chains = 1;
-       if (chains < 0) {
-               (void)fclose(fp);
-               errno = EFTYPE;
+       if (_fstat(fd, &sbuf) < 0) {
+               (void) _close(fd);
                return (_LDP_ERROR);
        }
-       if (chains) {
-               if (fread(&u32, sizeof(u32), 1, fp) != 1) {
-                       saverr = errno;
-                       (void)fclose(fp);
-                       errno = saverr;
-                       return (_LDP_ERROR);
-               }
-               if ((chains = (int)ntohl(u32)) < 1) {
-                       (void)fclose(fp);
-                       errno = EFTYPE;
-                       return (_LDP_ERROR);
-               }
-       } else
-               chains = TABLE_SIZE;
-
-       if ((TMP_substitute_table =
-            malloc(sizeof(__collate_substitute_table))) == NULL) {
-               saverr = errno;
-               (void)fclose(fp);
-               errno = saverr;
+       if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) {
+               (void) _close(fd);
+               errno = EINVAL;
                return (_LDP_ERROR);
        }
-       if ((TMP_char_pri_table =
-            malloc(sizeof(__collate_char_pri_table))) == NULL) {
-               saverr = errno;
-               free(TMP_substitute_table);
-               (void)fclose(fp);
-               errno = saverr;
+       map = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+       (void) _close(fd);
+       if ((TMP = map) == NULL) {
                return (_LDP_ERROR);
        }
-       if ((TMP_chain_pri_table =
-            malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) {
-               saverr = errno;
-               free(TMP_substitute_table);
-               free(TMP_char_pri_table);
-               (void)fclose(fp);
-               errno = saverr;
+
+       if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) {
+               (void) munmap(map, sbuf.st_size);
+               errno = EINVAL;
                return (_LDP_ERROR);
        }
+       TMP += COLLATE_STR_LEN;
 
-#define FREAD(a, b, c, d) \
-{ \
-       if (fread(a, b, c, d) != c) { \
-               saverr = errno; \
-               free(TMP_substitute_table); \
-               free(TMP_char_pri_table); \
-               free(TMP_chain_pri_table); \
-               (void)fclose(d); \
-               errno = saverr; \
-               return (_LDP_ERROR); \
-       } \
-}
+       info = (void *)TMP;
+       TMP += sizeof (*info);
 
-       FREAD(TMP_substitute_table, sizeof(__collate_substitute_table), 1, fp);
-       FREAD(TMP_char_pri_table, sizeof(__collate_char_pri_table), 1, fp);
-       FREAD(TMP_chain_pri_table,
-             sizeof(*__collate_chain_pri_table), chains, fp);
-       (void)fclose(fp);
-
-       if (__collate_substitute_table_ptr != NULL)
-               free(__collate_substitute_table_ptr);
-       __collate_substitute_table_ptr = TMP_substitute_table;
-       if (__collate_char_pri_table_ptr != NULL)
-               free(__collate_char_pri_table_ptr);
-       __collate_char_pri_table_ptr = TMP_char_pri_table;
-       for (i = 0; i < UCHAR_MAX + 1; i++) {
-               __collate_char_pri_table[i].prim =
-                   ntohl(__collate_char_pri_table[i].prim);
-               __collate_char_pri_table[i].sec =
-                   ntohl(__collate_char_pri_table[i].sec);
+       if ((info->directive_count < 1) ||
+           (info->directive_count >= COLL_WEIGHTS_MAX) ||
+           ((chains = info->chain_count) < 0)) {
+               (void) munmap(map, sbuf.st_size);
+               errno = EINVAL;
+               return (_LDP_ERROR);
        }
-       if (__collate_chain_pri_table != NULL)
-               free(__collate_chain_pri_table);
-       __collate_chain_pri_table = TMP_chain_pri_table;
-       for (i = 0; i < chains; i++) {
-               __collate_chain_pri_table[i].prim =
-                   ntohl(__collate_chain_pri_table[i].prim);
-               __collate_chain_pri_table[i].sec =
-                   ntohl(__collate_chain_pri_table[i].sec);
+
+       i = (sizeof (collate_char_t) * (UCHAR_MAX + 1)) +
+           (sizeof (collate_chain_t) * chains) +
+           (sizeof (collate_large_t) * info->large_count);
+       for (z = 0; z < (info->directive_count); z++) {
+               i += sizeof (collate_subst_t) * info->subst_count[z];
+       }
+       if (i != (sbuf.st_size - (TMP - map))) {
+               (void) munmap(map, sbuf.st_size);
+               errno = EINVAL;
+               return (_LDP_ERROR);
        }
-       __collate_substitute_nontrivial = 0;
-       for (i = 0; i < UCHAR_MAX + 1; i++) {
-               if (__collate_substitute_table[i][0] != i ||
-                   __collate_substitute_table[i][1] != 0) {
-                       __collate_substitute_nontrivial = 1;
-                       break;
+
+       table->char_pri_table = (void *)TMP;
+       TMP += sizeof (collate_char_t) * (UCHAR_MAX + 1);
+
+       for (z = 0; z < info->directive_count; z++) {
+               if (info->subst_count[z] > 0) {
+                       table->subst_table[z] = (void *)TMP;
+                       TMP += info->subst_count[z] * sizeof (collate_subst_t);
+               } else {
+                       table->subst_table[z] = NULL;
                }
        }
+
+       if (chains > 0) {
+               table->chain_pri_table = (void *)TMP;
+               TMP += chains * sizeof (collate_chain_t);
+       } else
+               table->chain_pri_table = NULL;
+       if (info->large_count > 0)
+               table->large_pri_table = (void *)TMP;
+       else
+               table->large_pri_table = NULL;
+
+       table->info = info;
        table->__collate_load_error = 0;
 
        return (_LDP_LOADED);
 }
 
-u_char *
-__collate_substitute(struct xlocale_collate *table, const u_char *s)
+/*
+ * Note: for performance reasons, we have expanded bsearch here.  This avoids
+ * function call overhead with each comparison.
+ */
+
+static int32_t *
+substsearch(struct xlocale_collate *table, const wchar_t key, int pass)
+{
+       collate_subst_t *p;
+       int n = table->info->subst_count[pass];
+
+       if (n == 0)
+               return (NULL);
+
+       if (pass >= table->info->directive_count)
+               return (NULL);
+
+       if (!(key & COLLATE_SUBST_PRIORITY))
+               return (NULL);
+
+       p = table->subst_table[pass] + (key & ~COLLATE_SUBST_PRIORITY);
+       return (p->pri);
+}
+
+static collate_chain_t *
+chainsearch(struct xlocale_collate *table, const wchar_t *key, int *len)
 {
-       int dest_len, len, nlen;
-       int delta = strlen(s);
-       u_char *dest_str = NULL;
-
-       if (s == NULL || *s == '\0')
-               return (__collate_strdup(""));
-       delta += delta / 8;
-       dest_str = malloc(dest_len = delta);
-       if (dest_str == NULL)
-               __collate_err(EX_OSERR, __func__);
-       len = 0;
-       while (*s) {
-               nlen = len + strlen(__collate_substitute_table[*s]);
-               if (dest_len <= nlen) {
-                       dest_str = reallocf(dest_str, dest_len = nlen + delta);
-                       if (dest_str == NULL)
-                               __collate_err(EX_OSERR, __func__);
+       int low;
+       int high;
+       int next, compar, l;
+       collate_chain_t *p;
+       collate_chain_t *tab;
+
+       if (table->info->chain_count == 0)
+               return (NULL);
+
+       low = 0;
+       high = table->info->chain_count - 1;
+       tab = table->chain_pri_table;
+
+       while (low <= high) {
+               next = (low + high) / 2;
+               p = tab + next;
+               compar = *key - *p->str;
+               if (compar == 0) {
+                       l = wcsnlen(p->str, COLLATE_STR_LEN);
+                       compar = wcsncmp(key, p->str, l);
+                       if (compar == 0) {
+                               *len = l;
+                               return (p);
+                       }
                }
-               (void)strcpy(dest_str + len, __collate_substitute_table[*s++]);
-               len = nlen;
+               if (compar > 0)
+                       low = next + 1;
+               else
+                       high = next - 1;
        }
-       return (dest_str);
+       return (NULL);
+}
+
+static collate_large_t *
+largesearch(struct xlocale_collate *table, const wchar_t key)
+{
+       int low = 0;
+       int high = table->info->large_count - 1;
+       int next, compar;
+       collate_large_t *p;
+       collate_large_t *tab = table->large_pri_table;
+
+       if (table->info->large_count == 0)
+               return (NULL);
+
+       while (low <= high) {
+               next = (low + high) / 2;
+               p = tab + next;
+               compar = key - p->val;
+               if (compar == 0)
+                       return (p);
+               if (compar > 0)
+                       low = next + 1;
+               else
+                       high = next - 1;
+       }
+       return (NULL);
 }
 
 void
-__collate_lookup(struct xlocale_collate *table, const u_char *t, int *len, int *prim, int *sec)
+_collate_lookup(struct xlocale_collate *table, const wchar_t *t, int *len,
+    int *pri, int which, const int **state)
 {
-       struct __collate_st_chain_pri *p2;
+       collate_chain_t *p2;
+       collate_large_t *match;
+       int p, l;
+       const int *sptr;
+
+       /*
+        * If this is the "last" pass for the UNDEFINED, then
+        * we just return the priority itself.
+        */
+       if (which >= table->info->directive_count) {
+               *pri = *t;
+               *len = 1;
+               *state = NULL;
+               return;
+       }
 
+       /*
+        * If we have remaining substitution data from a previous
+        * call, consume it first.
+        */
+       if ((sptr = *state) != NULL) {
+               *pri = *sptr;
+               sptr++;
+               *state = *sptr ? sptr : NULL;
+               *len = 0;
+               return;
+       }
+
+       /* No active substitutions */
        *len = 1;
-       *prim = *sec = 0;
-       for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) {
-               if (*t == p2->str[0] &&
-                   strncmp(t, p2->str, strlen(p2->str)) == 0) {
-                       *len = strlen(p2->str);
-                       *prim = p2->prim;
-                       *sec = p2->sec;
-                       return;
+
+       /*
+        * Check for composites such as dipthongs that collate as a
+        * single element (aka chains or collating-elements).
+        */
+       if (((p2 = chainsearch(table, t, &l)) != NULL) &&
+           ((p = p2->pri[which]) >= 0)) {
+
+               *len = l;
+               *pri = p;
+
+       } else if (*t <= UCHAR_MAX) {
+
+               /*
+                * Character is a small (8-bit) character.
+                * We just look these up directly for speed.
+                */
+               *pri = table->char_pri_table[*t].pri[which];
+
+       } else if ((table->info->large_count > 0) &&
+           ((match = largesearch(table, *t)) != NULL)) {
+
+               /*
+                * Character was found in the extended table.
+                */
+               *pri = match->pri.pri[which];
+
+       } else {
+               /*
+                * Character lacks a specific definition.
+                */
+               if (table->info->directive[which] & DIRECTIVE_UNDEFINED) {
+                       /* Mask off sign bit to prevent ordering confusion. */
+                       *pri = (*t & COLLATE_MAX_PRIORITY);
+               } else {
+                       *pri = table->info->undef_pri[which];
+               }
+               /* No substitutions for undefined characters! */
+               return;
+       }
+
+       /*
+        * Try substituting (expanding) the character.  We are
+        * currently doing this *after* the chain compression.  I
+        * think it should not matter, but this way might be slightly
+        * faster.
+        *
+        * We do this after the priority search, as this will help us
+        * to identify a single key value.  In order for this to work,
+        * its important that the priority assigned to a given element
+        * to be substituted be unique for that level.  The localedef
+        * code ensures this for us.
+        */
+       if ((sptr = substsearch(table, *pri, which)) != NULL) {
+               if ((*pri = *sptr) != 0) {
+                       sptr++;
+                       *state = *sptr ? sptr : NULL;
                }
        }
-       *prim = __collate_char_pri_table[*t].prim;
-       *sec = __collate_char_pri_table[*t].sec;
+
 }
 
-u_char *
-__collate_strdup(u_char *s)
+/*
+ * This is the meaty part of wcsxfrm & strxfrm.  Note that it does
+ * NOT NULL terminate.  That is left to the caller.
+ */
+size_t
+_collate_wxfrm(struct xlocale_collate *table, const wchar_t *src, wchar_t *xf,
+    size_t room)
 {
-       u_char *t = strdup(s);
+       int             pri;
+       int             len;
+       const wchar_t   *t;
+       wchar_t         *tr = NULL;
+       int             direc;
+       int             pass;
+       const int32_t   *state;
+       size_t          want = 0;
+       size_t          need = 0;
+
+       for (pass = 0; pass <= table->info->directive_count; pass++) {
+
+               state = NULL;
+
+               if (pass != 0) {
+                       /* insert level separator from the previous pass */
+                       if (room) {
+                               *xf++ = 1;
+                               room--;
+                       }
+                       want++;
+               }
+
+               /* special pass for undefined */
+               if (pass == table->info->directive_count) {
+                       direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
+               } else {
+                       direc = table->info->directive[pass];
+               }
+
+               t = src;
+
+               if (direc & DIRECTIVE_BACKWARD) {
+                       wchar_t *bp, *fp, c;
+                       if (tr)
+                               free(tr);
+                       if ((tr = wcsdup(t)) == NULL) {
+                               errno = ENOMEM;
+                               goto fail;
+                       }
+                       bp = tr;
+                       fp = tr + wcslen(tr) - 1;
+                       while (bp < fp) {
+                               c = *bp;
+                               *bp++ = *fp;
+                               *fp-- = c;
+                       }
+                       t = (const wchar_t *)tr;
+               }
 
-       if (t == NULL)
-               __collate_err(EX_OSERR, __func__);
-       return (t);
+               if (direc & DIRECTIVE_POSITION) {
+                       while (*t || state) {
+                               _collate_lookup(table, t, &len, &pri, pass, &state);
+                               t += len;
+                               if (pri <= 0) {
+                                       if (pri < 0) {
+                                               errno = EINVAL;
+                                               goto fail;
+                                       }
+                                       pri = COLLATE_MAX_PRIORITY;
+                               }
+                               if (room) {
+                                       *xf++ = pri;
+                                       room--;
+                               }
+                               want++;
+                               need = want;
+                       }
+               } else {
+                       while (*t || state) {
+                               _collate_lookup(table, t, &len, &pri, pass, &state);
+                               t += len;
+                               if (pri <= 0) {
+                                       if (pri < 0) {
+                                               errno = EINVAL;
+                                               goto fail;
+                                       }
+                                       continue;
+                               }
+                               if (room) {
+                                       *xf++ = pri;
+                                       room--;
+                               }
+                               want++;
+                               need = want;
+                       }
+               }
+       }
+       if (tr)
+               free(tr);
+       return (need);
+
+fail:
+       if (tr)
+               free(tr);
+       return ((size_t)(-1));
 }
 
-void
-__collate_err(int ex, const char *f)
+/*
+ * In the non-POSIX case, we transform each character into a string of
+ * characters representing the character's priority.  Since char is usually
+ * signed, we are limited by 7 bits per byte.  To avoid zero, we need to add
+ * XFRM_OFFSET, so we can't use a full 7 bits.  For simplicity, we choose 6
+ * bits per byte.
+ *
+ * It turns out that we sometimes have real priorities that are
+ * 31-bits wide.  (But: be careful using priorities where the high
+ * order bit is set -- i.e. the priority is negative.  The sort order
+ * may be surprising!)
+ *
+ * TODO: This would be a good area to optimize somewhat.  It turns out
+ * that real prioririties *except for the last UNDEFINED pass* are generally
+ * very small.  We need the localedef code to precalculate the max
+ * priority for us, and ideally also give us a mask, and then we could
+ * severely limit what we expand to.
+ */
+#define        XFRM_BYTES      6
+#define        XFRM_OFFSET     ('0')   /* make all printable characters */
+#define        XFRM_SHIFT      6
+#define        XFRM_MASK       ((1 << XFRM_SHIFT) - 1)
+#define        XFRM_SEP        ('.')   /* chosen to be less than XFRM_OFFSET */
+
+static int
+xfrm(struct xlocale_collate *table, unsigned char *p, int pri, int pass)
 {
-       const char *s;
-       int serrno = errno;
-
-       s = _getprogname();
-       _write(STDERR_FILENO, s, strlen(s));
-       _write(STDERR_FILENO, ": ", 2);
-       s = f;
-       _write(STDERR_FILENO, s, strlen(s));
-       _write(STDERR_FILENO, ": ", 2);
-       s = strerror(serrno);
-       _write(STDERR_FILENO, s, strlen(s));
-       _write(STDERR_FILENO, "\n", 1);
-       exit(ex);
+       /* we use unsigned to ensure zero fill on right shift */
+       uint32_t val = (uint32_t)table->info->pri_count[pass];
+       int nc = 0;
+
+       while (val) {
+               *p = (pri & XFRM_MASK) + XFRM_OFFSET;
+               pri >>= XFRM_SHIFT;
+               val >>= XFRM_SHIFT;
+               p++;
+               nc++;
+       }
+       return (nc);
 }
 
-#ifdef COLLATE_DEBUG
-void
-__collate_print_tables()
+size_t
+_collate_sxfrm(struct xlocale_collate *table, const wchar_t *src, char *xf,
+    size_t room)
 {
-       int i;
-       struct __collate_st_chain_pri *p2;
-
-       printf("Substitute table:\n");
-       for (i = 0; i < UCHAR_MAX + 1; i++)
-           if (i != *__collate_substitute_table[i])
-               printf("\t'%c' --> \"%s\"\n", i,
-                      __collate_substitute_table[i]);
-       printf("Chain priority table:\n");
-       for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++)
-               printf("\t\"%s\" : %d %d\n", p2->str, p2->prim, p2->sec);
-       printf("Char priority table:\n");
-       for (i = 0; i < UCHAR_MAX + 1; i++)
-               printf("\t'%c' : %d %d\n", i, __collate_char_pri_table[i].prim,
-                      __collate_char_pri_table[i].sec);
+       int             pri;
+       int             len;
+       const wchar_t   *t;
+       wchar_t         *tr = NULL;
+       int             direc;
+       int             pass;
+       const int32_t   *state;
+       size_t          want = 0;
+       size_t          need = 0;
+       int             b;
+       uint8_t         buf[XFRM_BYTES];
+
+       for (pass = 0; pass <= table->info->directive_count; pass++) {
+
+               state = NULL;
+
+               if (pass != 0) {
+                       /* insert level separator from the previous pass */
+                       if (room) {
+                               *xf++ = XFRM_SEP;
+                               room--;
+                       }
+                       want++;
+               }
+
+               /* special pass for undefined */
+               if (pass == table->info->directive_count) {
+                       direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
+               } else {
+                       direc = table->info->directive[pass];
+               }
+
+               t = src;
+
+               if (direc & DIRECTIVE_BACKWARD) {
+                       wchar_t *bp, *fp, c;
+                       if (tr)
+                               free(tr);
+                       if ((tr = wcsdup(t)) == NULL) {
+                               errno = ENOMEM;
+                               goto fail;
+                       }
+                       bp = tr;
+                       fp = tr + wcslen(tr) - 1;
+                       while (bp < fp) {
+                               c = *bp;
+                               *bp++ = *fp;
+                               *fp-- = c;
+                       }
+                       t = (const wchar_t *)tr;
+               }
+
+               if (direc & DIRECTIVE_POSITION) {
+                       while (*t || state) {
+
+                               _collate_lookup(table, t, &len, &pri, pass, &state);
+                               t += len;
+                               if (pri <= 0) {
+                                       if (pri < 0) {
+                                               errno = EINVAL;
+                                               goto fail;
+                                       }
+                                       pri = COLLATE_MAX_PRIORITY;
+                               }
+
+                               b = xfrm(table, buf, pri, pass);
+                               want += b;
+                               if (room) {
+                                       while (b) {
+                                               b--;
+                                               if (room) {
+                                                       *xf++ = buf[b];
+                                                       room--;
+                                               }
+                                       }
+                               }
+                               need = want;
+                       }
+               } else {
+                       while (*t || state) {
+                               _collate_lookup(table, t, &len, &pri, pass, &state);
+                               t += len;
+                               if (pri <= 0) {
+                                       if (pri < 0) {
+                                               errno = EINVAL;
+                                               goto fail;
+                                       }
+                                       continue;
+                               }
+
+                               b = xfrm(table, buf, pri, pass);
+                               want += b;
+                               if (room) {
+
+                                       while (b) {
+                                               b--;
+                                               if (room) {
+                                                       *xf++ = buf[b];
+                                                       room--;
+                                               }
+                                       }
+                               }
+                               need = want;
+                       }
+               }
+       }
+       if (tr)
+               free(tr);
+       return (need);
+
+fail:
+       if (tr)
+               free(tr);
+       return ((size_t)(-1));
 }
-#endif
index 97c2755..d676d8b 100644 (file)
@@ -1,4 +1,5 @@
-/*-
+/*
+ * Copyright 2010 Nexenta Systmes, Inc.  All rights reserved.
  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
  *             at Electronni Visti IA, Kiev, Ukraine.
  *                     All rights reserved.
@@ -29,7 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD: head/lib/libc/locale/collate.h 227753 2011-11-20 14:45:42Z theraven $
+ * Adapted to xlocale by John Marino <draco@marino.st>
  */
 
 #ifndef _COLLATE_H_
 #include <limits.h>
 #include "xlocale_private.h"
 
-#define STR_LEN 10
-#define TABLE_SIZE 100
-#define COLLATE_VERSION    "1.0\n"
-#define COLLATE_VERSION1_2 "1.2\n"
+#define        COLLATE_STR_LEN         24              /* should be 64-bit multiple */
+#define        COLLATE_VERSION         "DragonFly-4.4\n"
 
-struct __collate_st_char_pri {
-       int prim, sec;
-};
-struct __collate_st_chain_pri {
-       u_char str[STR_LEN];
-       int prim, sec;
-};
+#define        COLLATE_MAX_PRIORITY    (0x7fffffff)    /* max signed value */
+#define        COLLATE_SUBST_PRIORITY  (0x40000000)    /* bit indicates subst table */
+
+#define        DIRECTIVE_UNDEF         0x00
+#define        DIRECTIVE_FORWARD       0x01
+#define        DIRECTIVE_BACKWARD      0x02
+#define        DIRECTIVE_POSITION      0x04
+#define        DIRECTIVE_UNDEFINED     0x08    /* special last weight for UNDEFINED */
+
+#define        DIRECTIVE_DIRECTION_MASK (DIRECTIVE_FORWARD | DIRECTIVE_BACKWARD)
 
-#define __collate_substitute_table (*__collate_substitute_table_ptr)
-#define __collate_char_pri_table (*__collate_char_pri_table_ptr)
+/*
+ * The collate file format is as follows:
+ *
+ * char                version[COLLATE_STR_LEN];       // must be COLLATE_VERSION
+ * collate_info_t      info;                   // see below, includes padding
+ * collate_char_pri_t  char_data[256];         // 8 bit char values
+ * collate_subst_t     subst[*];               // 0 or more substitutions
+ * collate_chain_pri_t chains[*];              // 0 or more chains
+ * collate_large_pri_t large[*];               // extended char priorities
+ *
+ * Note that all structures must be 32-bit aligned, as each structure
+ * contains 32-bit member fields.  The entire file is mmap'd, so its
+ * critical that alignment be observed.  It is not generally safe to
+ * use any 64-bit values in the structures.
+ */
+
+typedef struct collate_info {
+       uint8_t directive_count;
+       uint8_t directive[COLL_WEIGHTS_MAX];
+       int32_t pri_count[COLL_WEIGHTS_MAX];
+       int32_t flags;
+       int32_t chain_count;
+       int32_t large_count;
+       int32_t subst_count[COLL_WEIGHTS_MAX];
+       int32_t undef_pri[COLL_WEIGHTS_MAX];
+} collate_info_t;
+
+typedef struct collate_char {
+       int32_t pri[COLL_WEIGHTS_MAX];
+} collate_char_t;
+
+typedef struct collate_chain {
+       wchar_t str[COLLATE_STR_LEN];
+       int32_t pri[COLL_WEIGHTS_MAX];
+} collate_chain_t;
+
+typedef struct collate_large {
+       int32_t val;
+       collate_char_t pri;
+} collate_large_t;
+
+typedef struct collate_subst {
+       int32_t key;
+       int32_t pri[COLLATE_STR_LEN];
+} collate_subst_t;
 
 struct xlocale_collate {
        struct xlocale_component header;
        int __collate_load_error;
-       int __collate_substitute_nontrivial;
+       char * map;
+       size_t maplen;
 
-       u_char (*__collate_substitute_table_ptr)[UCHAR_MAX + 1][STR_LEN];
-       struct __collate_st_char_pri (*__collate_char_pri_table_ptr)[UCHAR_MAX + 1];
-       struct __collate_st_chain_pri *__collate_chain_pri_table;
+       collate_info_t  *info;
+       collate_char_t  *char_pri_table;
+       collate_large_t *large_pri_table;
+       collate_chain_t *chain_pri_table;
+       collate_subst_t *subst_table[COLL_WEIGHTS_MAX];
 };
 
-
 __BEGIN_DECLS
-u_char *__collate_strdup(u_char *);
-u_char *__collate_substitute(struct xlocale_collate *, const u_char *);
 int    __collate_load_tables(const char *);
-void   __collate_lookup(struct xlocale_collate *, const u_char *, int *, int *, int *);
-int    __collate_range_cmp(struct xlocale_collate *, int, int);
-#ifdef COLLATE_DEBUG
-void   __collate_print_tables(void);
-#endif
+void   _collate_lookup(struct xlocale_collate *,const wchar_t *, int *, int *,
+       int, const int **);
+int    __collate_range_cmp(struct xlocale_collate *, wchar_t, wchar_t);
+size_t _collate_wxfrm(struct xlocale_collate *, const wchar_t *, wchar_t *,
+       size_t);
+size_t _collate_sxfrm(struct xlocale_collate *, const wchar_t *, char *,
+       size_t);
 __END_DECLS
 
 #endif /* !_COLLATE_H_ */
index b1209ce..a73d782 100644 (file)
@@ -33,6 +33,7 @@
 
 
 #include <string.h>
+#include <wchar.h>
 #include <xlocale.h>
 #include "collate.h"
 
  * Compare two characters using collate
  */
 
-int __collate_range_cmp(struct xlocale_collate *table, int c1, int c2)
+int __collate_range_cmp(struct xlocale_collate *table, wchar_t c1, wchar_t c2)
 {
-       static char s1[2], s2[2];
+       wchar_t s1[2], s2[2];
 
        s1[0] = c1;
+       s1[1] = 0;
        s2[0] = c2;
+       s2[0] = 0;
        struct _xlocale l = {{0}};
        l.components[XLC_COLLATE] = (struct xlocale_component *)table;
-       return (strcoll_l(s1, s2, &l));
+       return (wcscoll_l(s1, s2, &l));
 }
index 4e0f8ca..b2b6452 100644 (file)
@@ -67,12 +67,6 @@ extern _RuneLocale   *_Read_RuneMagi(FILE *);
 
 static int             __setrunelocale(struct xlocale_ctype *l, const char *);
 
-#define __collate_substitute_nontrivial (table->__collate_substitute_nontrivial)
-#define __collate_substitute_table_ptr (table->__collate_substitute_table_ptr)
-#define __collate_char_pri_table_ptr (table->__collate_char_pri_table_ptr)
-#define __collate_chain_pri_table (table->__collate_chain_pri_table)
-
-
 static void
 destruct_ctype(void *v)
 {
index 9749763..0e2f6fa 100644 (file)
@@ -1,4 +1,5 @@
-/*-
+/*
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
  *             at Electronni Visti IA, Kiev, Ukraine.
  *                     All rights reserved.
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- * $FreeBSD: head/lib/libc/string/strcoll.c 228202 2011-12-02 15:41:09Z eadler $
  */
 
 #include <stdlib.h>
 #include <string.h>
+#include <errno.h>
+#include <wchar.h>
 #include "collate.h"
 
-#include <stdio.h>
 
+/*
+ * In order to properly handle multibyte locales, its easiet to just
+ * convert to wide characters and then use wcscoll.  However if an
+ * error occurs, we gracefully fall back to simple strcmp.  Caller
+ * should check errno.
+ */
 int
 strcoll_l(const char *s, const char *s2, locale_t locale)
 {
-       int len, len2, prim, prim2, sec, sec2, ret, ret2;
-       const char *t, *t2;
-       char *tt, *tt2;
+       int ret;
+       wchar_t *t1 = NULL, *t2 = NULL;
+       wchar_t *w1 = NULL, *w2 = NULL;
+       mbstate_t mbs1 = { 0 };
+       mbstate_t mbs2 = { 0 };
+       size_t sz1, sz2;
        FIX_LOCALE(locale);
        struct xlocale_collate *table =
                (struct xlocale_collate*)locale->components[XLC_COLLATE];
 
        if (table->__collate_load_error)
-               return strcmp(s, s2);
+               goto error;
+
+       sz1 = strlen(s) + 1;
+       sz2 = strlen(s2) + 1;
+
+       /*
+        * Simple assumption: conversion to wide format is strictly
+        * reducing, i.e. a single byte (or multibyte character)
+        * cannot result in multiple wide characters.
+        */
+       if ((t1 = malloc(sz1 * sizeof (wchar_t))) == NULL)
+               goto error;
+       w1 = t1;
+       if ((t2 = malloc(sz2 * sizeof (wchar_t))) == NULL)
+               goto error;
+       w2 = t2;
+
+       if ((mbsrtowcs_l(w1, &s, sz1, &mbs1, locale)) == (size_t)-1)
+               goto error;
+
+       if ((mbsrtowcs_l(w2, &s2, sz2, &mbs2, locale)) == (size_t)-1)
+               goto error;
+
+       ret = wcscoll_l(w1, w2, locale);
+       if (t1)
+               free(t1);
+       if (t2)
+               free(t2);
 
-       len = len2 = 1;
-       ret = ret2 = 0;
-       if (table->__collate_substitute_nontrivial) {
-               t = tt = __collate_substitute(table, s);
-               t2 = tt2 = __collate_substitute(table, s2);
-       } else {
-               tt = tt2 = NULL;
-               t = s;
-               t2 = s2;
-       }
-       while(*t && *t2) {
-               prim = prim2 = 0;
-               while(*t && !prim) {
-                       __collate_lookup(table, t, &len, &prim, &sec);
-                       t += len;
-               }
-               while(*t2 && !prim2) {
-                       __collate_lookup(table, t2, &len2, &prim2, &sec2);
-                       t2 += len2;
-               }
-               if(!prim || !prim2)
-                       break;
-               if(prim != prim2) {
-                       ret = prim - prim2;
-                       goto end;
-               }
-               if(!ret2)
-                       ret2 = sec - sec2;
-       }
-       if(!*t && *t2)
-               ret = -(int)((u_char)*t2);
-       else if(*t && !*t2)
-               ret = (u_char)*t;
-       else if(!*t && !*t2)
-               ret = ret2;
-  end:
-       free(tt);
-       free(tt2);
+       return (ret);
 
-       return ret;
+error:
+       if (t1)
+               free(t1);
+       if (t2)
+               free(t2);
+       return (strcmp(s, s2));
 }
 
 int
index 41fa27e..14c9074 100644 (file)
@@ -1,4 +1,5 @@
-/*-
+/*
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
  *             at Electronni Visti IA, Kiev, Ukraine.
  *                     All rights reserved.
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- * $FreeBSD: head/lib/libc/string/strxfrm.c 227753 2011-11-20 14:45:42Z theraven $
  */
 
 #include <stdlib.h>
 #include <string.h>
+#include <errno.h>
+#include <wchar.h>
 #include "collate.h"
 
 size_t
@@ -47,9 +48,10 @@ strxfrm(char * __restrict dest, const char * __restrict src, size_t len)
 size_t
 strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, locale_t locale)
 {
-       int prim, sec, l;
        size_t slen;
-       char *s, *ss;
+       size_t xlen;
+       wchar_t *wcs = NULL;
+
        FIX_LOCALE(locale);
        struct xlocale_collate *table =
                (struct xlocale_collate*)locale->components[XLC_COLLATE];
@@ -57,32 +59,44 @@ strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, local
        if (!*src) {
                if (len > 0)
                        *dest = '\0';
-               return 0;
+               return (0);
        }
 
+       /*
+        * The conversion from multibyte to wide character strings is
+        * strictly reducing (one byte of an mbs cannot expand to more
+        * than one wide character.)
+        */
+       slen = strlen(src);
+
        if (table->__collate_load_error)
-               return strlcpy(dest, src, len);
+               goto error;
+
+       if ((wcs = malloc((slen + 1) * sizeof (wchar_t))) == NULL)
+               goto error;
+
+       if (mbstowcs_l(wcs, src, slen + 1, locale) == (size_t)-1)
+               goto error;
 
-       slen = 0;
-       prim = sec = 0;
-       ss = s = __collate_substitute(table, src);
-       while (*s) {
-               while (*s && !prim) {
-                       __collate_lookup(table, s, &l, &prim, &sec);
-                       s += l;
-               }
-               if (prim) {
-                       if (len > 1) {
-                               *dest++ = (char)prim;
-                               len--;
-                       }
-                       slen++;
-                       prim = 0;
-               }
+       if ((xlen = _collate_sxfrm(table, wcs, dest, len)) == (size_t)-1)
+               goto error;
+
+       if (wcs)
+               free(wcs);
+
+       if (len > xlen) {
+               dest[xlen] = 0;
+       } else if (len) {
+               dest[len-1] = 0;
        }
-       free(ss);
-       if (len > 0)
-               *dest = '\0';
 
-       return slen;
+       return (xlen);
+
+error:
+       /* errno should be set to ENOMEM if malloc failed */
+       if (wcs)
+               free(wcs);
+       (void) strlcpy(dest, src, len);
+
+       return (slen);
 }
index 9907e28..87a91c2 100644 (file)
@@ -1,4 +1,5 @@
-/*-
+/*
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2002 Tim J. Robbins
  * All rights reserved.
  *
@@ -27,8 +28,6 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- * $FreeBSD: head/lib/libc/string/wcscoll.c 227753 2011-11-20 14:45:42Z theraven $
  */
 
 #include <errno.h>
 #include <wchar.h>
 #include "collate.h"
 
-static char *__mbsdup(const wchar_t *);
-
-/*
- * Placeholder implementation of wcscoll(). Attempts to use the single-byte
- * collation ordering where possible, and falls back on wcscmp() in locales
- * with extended character sets.
- */
 int
 wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
 {
-       char *mbs1, *mbs2;
-       int diff, sverrno;
+       int len1, len2, pri1, pri2, ret;
+       wchar_t *tr1 = NULL, *tr2 = NULL;
+       int direc, pass;
+
        FIX_LOCALE(locale);
        struct xlocale_collate *table =
                (struct xlocale_collate*)locale->components[XLC_COLLATE];
 
-       if (table->__collate_load_error || MB_CUR_MAX > 1)
+       if (table->__collate_load_error)
                /*
-                * Locale has no special collating order, could not be
-                * loaded, or has an extended character set; do a fast binary
-                * comparison.
+                * Locale has no special collating order or could not be
+                * loaded, do a fast binary comparison.
                 */
                return (wcscmp(ws1, ws2));
 
-       if ((mbs1 = __mbsdup(ws1)) == NULL || (mbs2 = __mbsdup(ws2)) == NULL) {
-               /*
-                * Out of memory or illegal wide chars; fall back to wcscmp()
-                * but leave errno indicating the error. Callers that don't
-                * check for error will get a reasonable but often slightly
-                * incorrect result.
-                */
-               sverrno = errno;
-               free(mbs1);
-               errno = sverrno;
-               return (wcscmp(ws1, ws2));
+       ret = 0;
+
+       /*
+        * Once upon a time we had code to try to optimize this, but
+        * it turns out that you really can't make many assumptions
+        * safely.  You absolutely have to run this pass by pass,
+        * because some passes will be ignored for a given character,
+        * while others will not.  Simpler locales will benefit from
+        * having fewer passes, and most comparisions should resolve
+        * during the primary pass anyway.
+        *
+        * Note that we do one final extra pass at the end to pick
+        * up UNDEFINED elements.  There is special handling for them.
+        */
+       for (pass = 0; pass <= table->info->directive_count; pass++) {
+
+               const int32_t *st1 = NULL;
+               const int32_t *st2 = NULL;
+               const wchar_t   *w1 = ws1;
+               const wchar_t   *w2 = ws2;
+
+               /* special pass for UNDEFINED */
+               if (pass == table->info->directive_count) {
+                       direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
+               } else {
+                       direc = table->info->directive[pass];
+               }
+
+               if (direc & DIRECTIVE_BACKWARD) {
+                       wchar_t *bp, *fp, c;
+                       if ((tr1 = wcsdup(w1)) == NULL)
+                               goto fail;
+                       bp = tr1;
+                       fp = tr1 + wcslen(tr1) - 1;
+                       while (bp < fp) {
+                               c = *bp;
+                               *bp++ = *fp;
+                               *fp-- = c;
+                       }
+                       if ((tr2 = wcsdup(w2)) == NULL)
+                               goto fail;
+                       bp = tr2;
+                       fp = tr2 + wcslen(tr2) - 1;
+                       while (bp < fp) {
+                               c = *bp;
+                               *bp++ = *fp;
+                               *fp-- = c;
+                       }
+                       w1 = tr1;
+                       w2 = tr2;
+               }
+
+               if (direc & DIRECTIVE_POSITION) {
+                       while ((*w1 || st1) && (*w2 || st2)) {
+                               pri1 = pri2 = 0;
+                               _collate_lookup(table, w1, &len1, &pri1, pass,
+                                   &st1);
+                               if (pri1 <= 0) {
+                                       if (pri1 < 0) {
+                                               errno = EINVAL;
+                                               goto fail;
+                                       }
+                                       pri1 = COLLATE_MAX_PRIORITY;
+                               }
+                               _collate_lookup(table, w2, &len2, &pri2, pass,
+                                   &st2);
+                               if (pri2 <= 0) {
+                                       if (pri2 < 0) {
+                                               errno = EINVAL;
+                                               goto fail;
+                                       }
+                                       pri2 = COLLATE_MAX_PRIORITY;
+                               }
+                               if (pri1 != pri2) {
+                                       ret = pri1 - pri2;
+                                       goto end;
+                               }
+                               w1 += len1;
+                               w2 += len2;
+                       }
+               } else {
+                       while ((*w1 || st1) && (*w2 || st2)) {
+                               pri1 = pri2 = 0;
+                               while (*w1) {
+                                       _collate_lookup(table, w1, &len1,
+                                           &pri1, pass, &st1);
+                                       if (pri1 > 0)
+                                               break;
+                                       if (pri1 < 0) {
+                                               errno = EINVAL;
+                                               goto fail;
+                                       }
+                                       w1 += len1;
+                               }
+                               while (*w2) {
+                                       _collate_lookup(table, w2, &len2,
+                                           &pri2, pass, &st2);
+                                       if (pri2 > 0)
+                                               break;
+                                       if (pri2 < 0) {
+                                               errno = EINVAL;
+                                               goto fail;
+                                       }
+                                       w2 += len2;
+                               }
+                               if (!pri1 || !pri2)
+                                       break;
+                               if (pri1 != pri2) {
+                                       ret = pri1 - pri2;
+                                       goto end;
+                               }
+                               w1 += len1;
+                               w2 += len2;
+                       }
+               }
+               if (!*w1) {
+                       if (*w2) {
+                               ret = -(int)*w2;
+                               goto end;
+                       }
+               } else {
+                       ret = *w1;
+                       goto end;
+               }
        }
+       ret = 0;
+
+end:
+       if (tr1)
+               free(tr1);
+       if (tr2)
+               free(tr2);
 
-       diff = strcoll_l(mbs1, mbs2, locale);
-       sverrno = errno;
-       free(mbs1);
-       free(mbs2);
-       errno = sverrno;
+       return (ret);
 
-       return (diff);
+fail:
+       ret = wcscmp(ws1, ws2);
+       goto end;
 }
 
 int
@@ -88,24 +199,3 @@ wcscoll(const wchar_t *ws1, const wchar_t *ws2)
 {
        return wcscoll_l(ws1, ws2, __get_locale());
 }
-
-static char *
-__mbsdup(const wchar_t *ws)
-{
-       static const mbstate_t initial;
-       mbstate_t st;
-       const wchar_t *wcp;
-       size_t len;
-       char *mbs;
-
-       wcp = ws;
-       st = initial;
-       if ((len = wcsrtombs(NULL, &wcp, 0, &st)) == (size_t)-1)
-               return (NULL);
-       if ((mbs = malloc(len + 1)) == NULL)
-               return (NULL);
-       st = initial;
-       wcsrtombs(mbs, &ws, len + 1, &st);
-
-       return (mbs);
-}
index 61ba3f8..a8348ce 100644 (file)
@@ -1,4 +1,5 @@
-/*-
+/*
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
  *             at Electronni Visti IA, Kiev, Ukraine.
  *                     All rights reserved.
@@ -28,8 +29,6 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- * $FreeBSD: head/lib/libc/string/wcsxfrm.c 227753 2011-11-20 14:45:42Z theraven $
  */
 
 #include <stdlib.h>
 #include <wchar.h>
 #include "collate.h"
 
-static char *__mbsdup(const wchar_t *);
-
-/*
- * Placeholder wcsxfrm() implementation. See wcscoll.c for a description of
- * the logic used.
- */
 size_t
 wcsxfrm_l(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len, locale_t locale)
 {
-       int prim, sec, l;
        size_t slen;
-       char *mbsrc, *s, *ss;
        FIX_LOCALE(locale);
        struct xlocale_collate *table =
                (struct xlocale_collate*)locale->components[XLC_COLLATE];
@@ -59,67 +50,33 @@ wcsxfrm_l(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len,
                return (0);
        }
 
-       if (table->__collate_load_error || MB_CUR_MAX > 1) {
-               slen = wcslen(src);
-               if (len > 0) {
-                       if (slen < len)
-                               wcscpy(dest, src);
-                       else {
-                               wcsncpy(dest, src, len - 1);
-                               dest[len - 1] = L'\0';
-                       }
-               }
-               return (slen);
+       if ((table->__collate_load_error) ||
+           ((slen = _collate_wxfrm(table, src, dest, len)) == (size_t)-1)) {
+               goto error;
        }
 
-       mbsrc = __mbsdup(src);
-       slen = 0;
-       prim = sec = 0;
-       ss = s = __collate_substitute(table, mbsrc);
-       while (*s != '\0') {
-               while (*s != '\0' && prim == 0) {
-                       __collate_lookup(table, s, &l, &prim, &sec);
-                       s += l;
-               }
-               if (prim != 0) {
-                       if (len > 1) {
-                               *dest++ = (wchar_t)prim;
-                               len--;
-                       }
-                       slen++;
-                       prim = 0;
-               }
+       /* Add null termination at the correct location. */
+       if (len > slen) {
+               dest[slen] = 0;
+       } else if (len) {
+               dest[len-1] = 0;
        }
-       free(ss);
-       free(mbsrc);
-       if (len != 0)
-               *dest = L'\0';
 
        return (slen);
+
+error:
+       slen = wcslen(src);
+       if (slen < len)
+               (void) wcscpy(dest, src);
+       else {
+               (void) wcsncpy(dest, src, len - 1);
+               dest[len - 1] = L'\0';
+       }
+       return (slen);
 }
+
 size_t
 wcsxfrm(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len)
 {
        return wcsxfrm_l(dest, src, len, __get_locale());
 }
-
-static char *
-__mbsdup(const wchar_t *ws)
-{
-       static const mbstate_t initial;
-       mbstate_t st;
-       const wchar_t *wcp;
-       size_t len;
-       char *mbs;
-
-       wcp = ws;
-       st = initial;
-       if ((len = wcsrtombs(NULL, &wcp, 0, &st)) == (size_t)-1)
-               return (NULL);
-       if ((mbs = malloc(len + 1)) == NULL)
-               return (NULL);
-       st = initial;
-       wcsrtombs(mbs, &ws, len + 1, &st);
-
-       return (mbs);
-}