Add citrus backend code and iconv front end. This is intentionally
[dragonfly.git] / lib / libc / citrus / modules / citrus_iconv_std.c
1 /*      $NetBSD: src/lib/libc/citrus/modules/citrus_iconv_std.c,v 1.10 2005/02/11 06:21:21 simonb Exp $ */
2 /*      $DragonFly: src/lib/libc/citrus/modules/citrus_iconv_std.c,v 1.1 2005/03/11 23:33:53 joerg Exp $ */
3
4 /*-
5  * Copyright (c)2003 Citrus Project,
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/types.h>
31 #include <sys/endian.h>
32 #include <sys/queue.h>
33 #include <assert.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39
40 #include "citrus_namespace.h"
41 #include "citrus_types.h"
42 #include "citrus_module.h"
43 #include "citrus_region.h"
44 #include "citrus_mmap.h"
45 #include "citrus_hash.h"
46 #include "citrus_iconv.h"
47 #include "citrus_stdenc.h"
48 #include "citrus_mapper.h"
49 #include "citrus_csmapper.h"
50 #include "citrus_memstream.h"
51 #include "citrus_iconv_std.h"
52 #include "citrus_esdb.h"
53
54 /* ---------------------------------------------------------------------- */
55
56 _CITRUS_ICONV_DECLS(iconv_std);
57 _CITRUS_ICONV_DEF_OPS(iconv_std);
58
59
60 /* ---------------------------------------------------------------------- */
61
62 int
63 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops,
64                                u_int32_t expected_version)
65 {
66         if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops))
67                 return (EINVAL);
68
69         memcpy(ops, &_citrus_iconv_std_iconv_ops,
70                sizeof(_citrus_iconv_std_iconv_ops));
71
72         return (0);
73 }
74
75 /* ---------------------------------------------------------------------- */
76
77 /*
78  * convenience routines for stdenc.
79  */
80 static __inline void
81 save_encoding_state(struct _citrus_iconv_std_encoding *se)
82 {
83         if (se->se_ps)
84                 memcpy(se->se_pssaved, se->se_ps,
85                        _stdenc_get_state_size(se->se_handle));
86 }
87
88 static __inline void
89 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
90 {
91         if (se->se_ps)
92                 memcpy(se->se_ps, se->se_pssaved,
93                        _stdenc_get_state_size(se->se_handle));
94 }
95
96 static __inline void
97 init_encoding_state(struct _citrus_iconv_std_encoding *se)
98 {
99         if (se->se_ps)
100                 _stdenc_init_state(se->se_handle, se->se_ps);
101 }
102
103 static __inline int
104 mbtocsx(struct _citrus_iconv_std_encoding *se,
105         _csid_t *csid, _index_t *idx, const char **s, size_t n,
106         size_t *nresult)
107 {
108         return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
109                               nresult);
110 }
111
112 static __inline int
113 cstombx(struct _citrus_iconv_std_encoding *se,
114         char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult)
115 {
116         return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
117                               nresult);
118 }
119
120 static __inline int
121 wctombx(struct _citrus_iconv_std_encoding *se,
122         char *s, size_t n, _wc_t wc, size_t *nresult)
123 {
124         return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult);
125 }
126
127 static __inline int
128 put_state_resetx(struct _citrus_iconv_std_encoding *se,
129                  char *s, size_t n, size_t *nresult)
130 {
131         return _stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult);
132 }
133
134 /*
135  * init encoding context
136  */
137 static int
138 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
139               void *ps1, void *ps2)
140 {
141         int ret;
142
143         se->se_handle = cs;
144         se->se_ps = ps1;
145         se->se_pssaved = ps2;
146
147         if (se->se_ps)
148                 ret = _stdenc_init_state(cs, se->se_ps);
149         if (!ret && se->se_pssaved)
150                 ret = _stdenc_init_state(cs, se->se_pssaved);
151
152         return ret;
153 }
154
155 static int
156 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
157               unsigned long *rnorm)
158 {
159         int ret;
160         struct _csmapper *cm;
161
162         ret = _csmapper_open(&cm, src, dst, 0, rnorm);
163         if (ret)
164                 return ret;
165         if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
166             _csmapper_get_state_size(cm) != 0) {
167                 _csmapper_close(cm);
168                 return EINVAL;
169         }
170
171         *rcm = cm;
172
173         return 0;
174 }
175
176 static void
177 close_dsts(struct _citrus_iconv_std_dst_list *dl)
178 {
179         struct _citrus_iconv_std_dst *sd;
180
181         while ((sd=TAILQ_FIRST(dl)) != NULL) {
182                 TAILQ_REMOVE(dl, sd, sd_entry);
183                 _csmapper_close(sd->sd_mapper);
184                 free(sd);
185         }
186 }
187
188 static int
189 open_dsts(struct _citrus_iconv_std_dst_list *dl,
190           const struct _esdb_charset *ec, const struct _esdb *dbdst)
191 {
192         int i, ret;
193         struct _citrus_iconv_std_dst *sd, *sdtmp;
194         unsigned long norm;
195
196         sd = malloc(sizeof(*sd));
197         if (sd == NULL)
198                 return errno;
199
200         for (i=0; i<dbdst->db_num_charsets; i++) {
201                 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
202                                     dbdst->db_charsets[i].ec_csname, &norm);
203                 if (ret == 0) {
204                         sd->sd_csid = dbdst->db_charsets[i].ec_csid;
205                         sd->sd_norm = norm;
206                         /* insert this mapper by sorted order. */
207                         TAILQ_FOREACH(sdtmp, dl, sd_entry) {
208                                 if (sdtmp->sd_norm > norm) {
209                                         TAILQ_INSERT_BEFORE(sdtmp, sd,
210                                                             sd_entry);
211                                         sd = NULL;
212                                         break;
213                                 }
214                         }
215                         if (sd)
216                                 TAILQ_INSERT_TAIL(dl, sd, sd_entry);
217                         sd = malloc(sizeof(*sd));
218                         if (sd == NULL) {
219                                 ret = errno;
220                                 close_dsts(dl);
221                                 return ret;
222                         }
223                 } else if (ret != ENOENT) {
224                         close_dsts(dl);
225                         free(sd);
226                         return ret;
227                 }
228         }
229         free(sd);
230         return 0;
231 }
232
233 static void
234 close_srcs(struct _citrus_iconv_std_src_list *sl)
235 {
236         struct _citrus_iconv_std_src *ss;
237
238         while ((ss=TAILQ_FIRST(sl)) != NULL) {
239                 TAILQ_REMOVE(sl, ss, ss_entry);
240                 close_dsts(&ss->ss_dsts);
241                 free(ss);
242         }
243 }
244
245 static int
246 open_srcs(struct _citrus_iconv_std_src_list *sl,
247           const struct _esdb *dbsrc, const struct _esdb *dbdst)
248 {
249         int i, ret, count = 0;
250         struct _citrus_iconv_std_src *ss;
251
252         ss = malloc(sizeof(*ss));
253         if (ss == NULL)
254                 return errno;
255
256         TAILQ_INIT(&ss->ss_dsts);
257
258         for (i=0; i<dbsrc->db_num_charsets; i++) {
259                 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
260                 if (ret)
261                         goto err;
262                 if (!TAILQ_EMPTY(&ss->ss_dsts)) {
263                         ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
264                         TAILQ_INSERT_TAIL(sl, ss, ss_entry);
265                         ss = malloc(sizeof(*ss));
266                         if (ss == NULL) {
267                                 ret = errno;
268                                 goto err;
269                         }
270                         count++;
271                         TAILQ_INIT(&ss->ss_dsts);
272                 }
273         }
274         free(ss);
275
276         return count ? 0 : ENOENT;
277
278 err:
279         free(ss);
280         close_srcs(sl);
281         return ret;
282 }
283
284 /* do convert a character */
285 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
286 static int
287 /*ARGSUSED*/
288 do_conv(const struct _citrus_iconv_std_shared *is,
289         struct _citrus_iconv_std_context *sc, _csid_t *csid, _index_t *idx)
290 {
291         _index_t tmpidx;
292         int ret;
293         struct _citrus_iconv_std_src *ss;
294         struct _citrus_iconv_std_dst *sd;
295
296         TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
297                 if (ss->ss_csid == *csid) {
298                         TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
299                                 ret = _csmapper_convert(sd->sd_mapper,
300                                                         &tmpidx, *idx, NULL);
301                                 switch (ret) {
302                                 case _MAPPER_CONVERT_SUCCESS:
303                                         *csid = sd->sd_csid;
304                                         *idx = tmpidx;
305                                         return 0;
306                                 case _MAPPER_CONVERT_NONIDENTICAL:
307                                         break;
308                                 case _MAPPER_CONVERT_SRC_MORE:
309                                         /*FALLTHROUGH*/
310                                 case _MAPPER_CONVERT_DST_MORE:
311                                         /*FALLTHROUGH*/
312                                 case _MAPPER_CONVERT_FATAL:
313                                         return EINVAL;
314                                 case _MAPPER_CONVERT_ILSEQ:
315                                         return EILSEQ;
316                                 }
317                         }
318                         break;
319                 }
320         }
321
322         return E_NO_CORRESPONDING_CHAR;
323 }
324 /* ---------------------------------------------------------------------- */
325
326 static int
327 /*ARGSUSED*/
328 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
329                                     const char * __restrict curdir,
330                                     const char * __restrict src,
331                                     const char * __restrict dst,
332                                     const void * __restrict var, size_t lenvar)
333 {
334         int ret;
335         struct _citrus_iconv_std_shared *is;
336         struct _citrus_esdb esdbsrc, esdbdst;
337
338         is = malloc(sizeof(*is));
339         if (is==NULL) {
340                 ret = errno;
341                 goto err0;
342         }
343         ret = _citrus_esdb_open(&esdbsrc, src);
344         if (ret)
345                 goto err1;
346         ret = _citrus_esdb_open(&esdbdst, dst);
347         if (ret)
348                 goto err2;
349         ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
350                            esdbsrc.db_variable, esdbsrc.db_len_variable);
351         if (ret)
352                 goto err3;
353         ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
354                            esdbdst.db_variable, esdbdst.db_len_variable);
355         if (ret)
356                 goto err4;
357         is->is_use_invalid = esdbdst.db_use_invalid;
358         is->is_invalid = esdbdst.db_invalid;
359
360         TAILQ_INIT(&is->is_srcs);
361         ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
362         if (ret)
363                 goto err5;
364
365         _esdb_close(&esdbsrc);
366         _esdb_close(&esdbdst);
367         ci->ci_closure = is;
368
369         return 0;
370
371 err5:
372         _stdenc_close(is->is_dst_encoding);
373 err4:
374         _stdenc_close(is->is_src_encoding);
375 err3:
376         _esdb_close(&esdbdst);
377 err2:
378         _esdb_close(&esdbsrc);
379 err1:
380         free(is);
381 err0:
382         return ret;
383 }
384
385 static void
386 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
387 {
388         struct _citrus_iconv_std_shared *is = ci->ci_closure;
389
390         if (is == NULL)
391                 return;
392
393         _stdenc_close(is->is_src_encoding);
394         _stdenc_close(is->is_dst_encoding);
395         close_srcs(&is->is_srcs);
396         free(is);
397 }
398
399 static int
400 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
401 {
402         const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
403         struct _citrus_iconv_std_context *sc;
404         int ret;
405         size_t szpssrc, szpsdst, sz;
406         char *ptr;
407
408         szpssrc = _stdenc_get_state_size(is->is_src_encoding);
409         szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
410
411         sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
412         sc = malloc(sz);
413         if (sc == NULL)
414                 return errno;
415
416         ptr = (char *)&sc[1];
417         if (szpssrc)
418                 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
419                               ptr, ptr+szpssrc);
420         else
421                 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
422                               NULL, NULL);
423         ptr += szpssrc*2;
424         if (szpsdst)
425                 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
426                               ptr, ptr+szpsdst);
427         else
428                 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
429                               NULL, NULL);
430
431         cv->cv_closure = (void *)sc;
432
433         return 0;
434 }
435
436 static void
437 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
438 {
439         free(cv->cv_closure);
440 }
441
442 static int
443 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
444                                 const char * __restrict * __restrict in,
445                                 size_t * __restrict inbytes,
446                                 char * __restrict * __restrict out,
447                                 size_t * __restrict outbytes, u_int32_t flags,
448                                 size_t * __restrict invalids)
449 {
450         const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
451         struct _citrus_iconv_std_context *sc = cv->cv_closure;
452         _index_t idx;
453         _csid_t csid;
454         int ret;
455         size_t szrin, szrout;
456         size_t inval;
457         const char *tmpin;
458
459         inval = 0;
460         if (in==NULL || *in==NULL) {
461                 /* special cases */
462                 if (out!=NULL && *out!=NULL) {
463                         /* init output state and store the shift sequence */
464                         save_encoding_state(&sc->sc_src_encoding);
465                         save_encoding_state(&sc->sc_dst_encoding);
466                         szrout = 0;
467
468                         ret = put_state_resetx(&sc->sc_dst_encoding,
469                                                *out, *outbytes,
470                                                &szrout);
471                         if (ret)
472                                 goto err;
473
474                         if (szrout == (size_t)-2) {
475                                 /* too small to store the character */
476                                 ret = EINVAL;
477                                 goto err;
478                         }
479                         *out += szrout;
480                         *outbytes -= szrout;
481                 } else
482                         /* otherwise, discard the shift sequence */
483                         init_encoding_state(&sc->sc_dst_encoding);
484                 init_encoding_state(&sc->sc_src_encoding);
485                 *invalids = 0;
486                 return 0;
487         }
488
489         /* normal case */
490         for (;;) {
491                 if (*inbytes==0)
492                         break;
493
494                 /* save the encoding states for the error recovery */
495                 save_encoding_state(&sc->sc_src_encoding);
496                 save_encoding_state(&sc->sc_dst_encoding);
497
498                 /* mb -> csid/index */
499                 tmpin = *in;
500                 szrin = szrout = 0;
501                 ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx,
502                               &tmpin, *inbytes, &szrin);
503                 if (ret)
504                         goto err;
505
506                 if (szrin == (size_t)-2) {
507                         /* incompleted character */
508                         ret = EINVAL;
509                         goto err;
510                 }
511                 /* convert the character */
512                 ret = do_conv(is, sc, &csid, &idx);
513                 if (ret) {
514                         if (ret == E_NO_CORRESPONDING_CHAR) {
515                                 inval++;
516                                 szrout = 0;
517                                 if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 &&
518                                     is->is_use_invalid) {
519                                         ret = wctombx(&sc->sc_dst_encoding,
520                                                       *out, *outbytes,
521                                                       is->is_invalid,
522                                                       &szrout);
523                                         if (ret)
524                                                 goto err;
525                                 }
526                                 goto next;
527                         } else {
528                                 goto err;
529                         }
530                 }
531                 /* csid/index -> mb */
532                 ret = cstombx(&sc->sc_dst_encoding,
533                               *out, *outbytes, csid, idx, &szrout);
534                 if (ret)
535                         goto err;
536 next:
537                 _DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout);
538                 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
539                 *in = tmpin;
540                 *outbytes -= szrout;
541                 *out += szrout;
542         }
543         *invalids = inval;
544
545         return 0;
546
547 err:
548         restore_encoding_state(&sc->sc_src_encoding);
549         restore_encoding_state(&sc->sc_dst_encoding);
550 err_norestore:
551         *invalids = inval;
552
553         return ret;
554 }