65b075cd7b27161a7af0ba5a405e1b5292221c1a
[dragonfly.git] / lib / i18n_module / iconv_std / citrus_iconv_std.c
1 /* $FreeBSD: head/lib/libiconv_modules/iconv_std/citrus_iconv_std.c 252583 2013-07-03 18:27:45Z peter $ */
2 /*      $NetBSD: citrus_iconv_std.c,v 1.15 2006/11/13 19:08:19 tnozaki Exp $    */
3
4 /*-
5  * Copyright (c)2003 Citrus Project,
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 #include <sys/endian.h>
32 #include <sys/queue.h>
33
34 #include <assert.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <stdbool.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41
42 #include "citrus_namespace.h"
43 #include "citrus_types.h"
44 #include "citrus_module.h"
45 #include "citrus_region.h"
46 #include "citrus_mmap.h"
47 #include "citrus_hash.h"
48 #include "citrus_iconv.h"
49 #include "citrus_stdenc.h"
50 #include "citrus_mapper.h"
51 #include "citrus_csmapper.h"
52 #include "citrus_memstream.h"
53 #include "citrus_iconv_std.h"
54 #include "citrus_esdb.h"
55
56 /* ---------------------------------------------------------------------- */
57
58 _CITRUS_ICONV_DECLS(iconv_std);
59 _CITRUS_ICONV_DEF_OPS(iconv_std);
60
61
62 /* ---------------------------------------------------------------------- */
63
64 int
65 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops)
66 {
67
68         memcpy(ops, &_citrus_iconv_std_iconv_ops,
69             sizeof(_citrus_iconv_std_iconv_ops));
70
71         return (0);
72 }
73
74 /* ---------------------------------------------------------------------- */
75
76 /*
77  * convenience routines for stdenc.
78  */
79 static __inline void
80 save_encoding_state(struct _citrus_iconv_std_encoding *se)
81 {
82
83         if (se->se_ps)
84                 memcpy(se->se_pssaved, se->se_ps,
85                     _stdenc_get_state_size(se->se_handle));
86 }
87
88 static __inline void
89 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
90 {
91
92         if (se->se_ps)
93                 memcpy(se->se_ps, se->se_pssaved,
94                     _stdenc_get_state_size(se->se_handle));
95 }
96
97 static __inline void
98 init_encoding_state(struct _citrus_iconv_std_encoding *se)
99 {
100
101         if (se->se_ps)
102                 _stdenc_init_state(se->se_handle, se->se_ps);
103 }
104
105 static __inline int
106 mbtocsx(struct _citrus_iconv_std_encoding *se,
107     _csid_t *csid, _index_t *idx, const char **s, size_t n, size_t *nresult,
108     struct iconv_hooks *hooks)
109 {
110
111         return (_stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
112                               nresult, hooks));
113 }
114
115 static __inline int
116 cstombx(struct _citrus_iconv_std_encoding *se,
117     char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult,
118     struct iconv_hooks *hooks)
119 {
120
121         return (_stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
122                               nresult, hooks));
123 }
124
125 static __inline int
126 wctombx(struct _citrus_iconv_std_encoding *se,
127     char *s, size_t n, _wc_t wc, size_t *nresult,
128     struct iconv_hooks *hooks)
129 {
130
131         return (_stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult,
132                              hooks));
133 }
134
135 static __inline int
136 put_state_resetx(struct _citrus_iconv_std_encoding *se, char *s, size_t n,
137     size_t *nresult)
138 {
139
140         return (_stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult));
141 }
142
143 static __inline int
144 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
145 {
146         struct _stdenc_state_desc ssd;
147         int ret;
148
149         ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
150             _STDENC_SDID_GENERIC, &ssd);
151         if (!ret)
152                 *rstate = ssd.u.generic.state;
153
154         return (ret);
155 }
156
157 /*
158  * init encoding context
159  */
160 static int
161 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
162     void *ps1, void *ps2)
163 {
164         int ret = -1;
165
166         se->se_handle = cs;
167         se->se_ps = ps1;
168         se->se_pssaved = ps2;
169
170         if (se->se_ps)
171                 ret = _stdenc_init_state(cs, se->se_ps);
172         if (!ret && se->se_pssaved)
173                 ret = _stdenc_init_state(cs, se->se_pssaved);
174
175         return (ret);
176 }
177
178 static int
179 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
180     unsigned long *rnorm)
181 {
182         struct _csmapper *cm;
183         int ret;
184
185         ret = _csmapper_open(&cm, src, dst, 0, rnorm);
186         if (ret)
187                 return (ret);
188         if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
189             _csmapper_get_state_size(cm) != 0) {
190                 _csmapper_close(cm);
191                 return (EINVAL);
192         }
193
194         *rcm = cm;
195
196         return (0);
197 }
198
199 static void
200 close_dsts(struct _citrus_iconv_std_dst_list *dl)
201 {
202         struct _citrus_iconv_std_dst *sd;
203
204         while ((sd = TAILQ_FIRST(dl)) != NULL) {
205                 TAILQ_REMOVE(dl, sd, sd_entry);
206                 _csmapper_close(sd->sd_mapper);
207                 free(sd);
208         }
209 }
210
211 static int
212 open_dsts(struct _citrus_iconv_std_dst_list *dl,
213     const struct _esdb_charset *ec, const struct _esdb *dbdst)
214 {
215         struct _citrus_iconv_std_dst *sd, *sdtmp;
216         unsigned long norm;
217         int i, ret;
218
219         sd = malloc(sizeof(*sd));
220         if (sd == NULL)
221                 return (errno);
222
223         for (i = 0; i < dbdst->db_num_charsets; i++) {
224                 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
225                     dbdst->db_charsets[i].ec_csname, &norm);
226                 if (ret == 0) {
227                         sd->sd_csid = dbdst->db_charsets[i].ec_csid;
228                         sd->sd_norm = norm;
229                         /* insert this mapper by sorted order. */
230                         TAILQ_FOREACH(sdtmp, dl, sd_entry) {
231                                 if (sdtmp->sd_norm > norm) {
232                                         TAILQ_INSERT_BEFORE(sdtmp, sd,
233                                             sd_entry);
234                                         sd = NULL;
235                                         break;
236                                 }
237                         }
238                         if (sd)
239                                 TAILQ_INSERT_TAIL(dl, sd, sd_entry);
240                         sd = malloc(sizeof(*sd));
241                         if (sd == NULL) {
242                                 ret = errno;
243                                 close_dsts(dl);
244                                 return (ret);
245                         }
246                 } else if (ret != ENOENT) {
247                         close_dsts(dl);
248                         free(sd);
249                         return (ret);
250                 }
251         }
252         free(sd);
253         return (0);
254 }
255
256 static void
257 close_srcs(struct _citrus_iconv_std_src_list *sl)
258 {
259         struct _citrus_iconv_std_src *ss;
260
261         while ((ss = TAILQ_FIRST(sl)) != NULL) {
262                 TAILQ_REMOVE(sl, ss, ss_entry);
263                 close_dsts(&ss->ss_dsts);
264                 free(ss);
265         }
266 }
267
268 static int
269 open_srcs(struct _citrus_iconv_std_src_list *sl,
270     const struct _esdb *dbsrc, const struct _esdb *dbdst)
271 {
272         struct _citrus_iconv_std_src *ss;
273         int count = 0, i, ret;
274
275         ss = malloc(sizeof(*ss));
276         if (ss == NULL)
277                 return (errno);
278
279         TAILQ_INIT(&ss->ss_dsts);
280
281         for (i = 0; i < dbsrc->db_num_charsets; i++) {
282                 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
283                 if (ret)
284                         goto err;
285                 if (!TAILQ_EMPTY(&ss->ss_dsts)) {
286                         ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
287                         TAILQ_INSERT_TAIL(sl, ss, ss_entry);
288                         ss = malloc(sizeof(*ss));
289                         if (ss == NULL) {
290                                 ret = errno;
291                                 goto err;
292                         }
293                         count++;
294                         TAILQ_INIT(&ss->ss_dsts);
295                 }
296         }
297         free(ss);
298
299         return (count ? 0 : ENOENT);
300
301 err:
302         free(ss);
303         close_srcs(sl);
304         return (ret);
305 }
306
307 /* do convert a character */
308 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
309 static int
310 /*ARGSUSED*/
311 do_conv(const struct _citrus_iconv_std_shared *is,
312         _csid_t *csid, _index_t *idx)
313 {
314         struct _citrus_iconv_std_dst *sd;
315         struct _citrus_iconv_std_src *ss;
316         _index_t tmpidx;
317         int ret;
318
319         TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
320                 if (ss->ss_csid == *csid) {
321                         TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
322                                 ret = _csmapper_convert(sd->sd_mapper,
323                                     &tmpidx, *idx, NULL);
324                                 switch (ret) {
325                                 case _MAPPER_CONVERT_SUCCESS:
326                                         *csid = sd->sd_csid;
327                                         *idx = tmpidx;
328                                         return (0);
329                                 case _MAPPER_CONVERT_NONIDENTICAL:
330                                         break;
331                                 case _MAPPER_CONVERT_SRC_MORE:
332                                         /*FALLTHROUGH*/
333                                 case _MAPPER_CONVERT_DST_MORE:
334                                         /*FALLTHROUGH*/
335                                 case _MAPPER_CONVERT_ILSEQ:
336                                         return (EILSEQ);
337                                 case _MAPPER_CONVERT_FATAL:
338                                         return (EINVAL);
339                                 }
340                         }
341                         break;
342                 }
343         }
344
345         return (E_NO_CORRESPONDING_CHAR);
346 }
347 /* ---------------------------------------------------------------------- */
348
349 static int
350 /*ARGSUSED*/
351 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
352     const char * __restrict src, const char * __restrict dst)
353 {
354         struct _citrus_esdb esdbdst, esdbsrc;
355         struct _citrus_iconv_std_shared *is;
356         int ret;
357
358         is = malloc(sizeof(*is));
359         if (is == NULL) {
360                 ret = errno;
361                 goto err0;
362         }
363         ret = _citrus_esdb_open(&esdbsrc, src);
364         if (ret)
365                 goto err1;
366         ret = _citrus_esdb_open(&esdbdst, dst);
367         if (ret)
368                 goto err2;
369         ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
370             esdbsrc.db_variable, esdbsrc.db_len_variable);
371         if (ret)
372                 goto err3;
373         ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
374             esdbdst.db_variable, esdbdst.db_len_variable);
375         if (ret)
376                 goto err4;
377         is->is_use_invalid = esdbdst.db_use_invalid;
378         is->is_invalid = esdbdst.db_invalid;
379
380         TAILQ_INIT(&is->is_srcs);
381         ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
382         if (ret)
383                 goto err5;
384
385         _esdb_close(&esdbsrc);
386         _esdb_close(&esdbdst);
387         ci->ci_closure = is;
388
389         return (0);
390
391 err5:
392         _stdenc_close(is->is_dst_encoding);
393 err4:
394         _stdenc_close(is->is_src_encoding);
395 err3:
396         _esdb_close(&esdbdst);
397 err2:
398         _esdb_close(&esdbsrc);
399 err1:
400         free(is);
401 err0:
402         return (ret);
403 }
404
405 static void
406 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
407 {
408         struct _citrus_iconv_std_shared *is = ci->ci_closure;
409
410         if (is == NULL)
411                 return;
412
413         _stdenc_close(is->is_src_encoding);
414         _stdenc_close(is->is_dst_encoding);
415         close_srcs(&is->is_srcs);
416         free(is);
417 }
418
419 static int
420 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
421 {
422         const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
423         struct _citrus_iconv_std_context *sc;
424         char *ptr;
425         size_t sz, szpsdst, szpssrc;
426
427         szpssrc = _stdenc_get_state_size(is->is_src_encoding);
428         szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
429
430         sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
431         sc = malloc(sz);
432         if (sc == NULL)
433                 return (errno);
434
435         ptr = (char *)&sc[1];
436         if (szpssrc > 0)
437                 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
438                     ptr, ptr+szpssrc);
439         else
440                 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
441                     NULL, NULL);
442         ptr += szpssrc*2;
443         if (szpsdst > 0)
444                 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
445                     ptr, ptr+szpsdst);
446         else
447                 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
448                     NULL, NULL);
449
450         cv->cv_closure = (void *)sc;
451
452         return (0);
453 }
454
455 static void
456 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
457 {
458
459         free(cv->cv_closure);
460 }
461
462 static int
463 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
464     const char * __restrict * __restrict in, size_t * __restrict inbytes,
465     char * __restrict * __restrict out, size_t * __restrict outbytes,
466     uint32_t flags, size_t * __restrict invalids)
467 {
468         const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
469         struct _citrus_iconv_std_context *sc = cv->cv_closure;
470         _csid_t csid;
471         _index_t idx;
472         const char *tmpin;
473         size_t inval, szrin, szrout;
474         int ret, state = 0;
475
476         inval = 0;
477         if (in == NULL || *in == NULL) {
478                 /* special cases */
479                 if (out != NULL && *out != NULL) {
480                         /* init output state and store the shift sequence */
481                         save_encoding_state(&sc->sc_src_encoding);
482                         save_encoding_state(&sc->sc_dst_encoding);
483                         szrout = 0;
484
485                         ret = put_state_resetx(&sc->sc_dst_encoding,
486                             *out, *outbytes, &szrout);
487                         if (ret)
488                                 goto err;
489
490                         if (szrout == (size_t)-2) {
491                                 /* too small to store the character */
492                                 ret = EINVAL;
493                                 goto err;
494                         }
495                         *out += szrout;
496                         *outbytes -= szrout;
497                 } else
498                         /* otherwise, discard the shift sequence */
499                         init_encoding_state(&sc->sc_dst_encoding);
500                 init_encoding_state(&sc->sc_src_encoding);
501                 *invalids = 0;
502                 return (0);
503         }
504
505         /* normal case */
506         for (;;) {
507                 if (*inbytes == 0) {
508                         ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
509                         if (state == _STDENC_SDGEN_INITIAL ||
510                             state == _STDENC_SDGEN_STABLE)
511                                 break;
512                 }
513
514                 /* save the encoding states for the error recovery */
515                 save_encoding_state(&sc->sc_src_encoding);
516                 save_encoding_state(&sc->sc_dst_encoding);
517
518                 /* mb -> csid/index */
519                 tmpin = *in;
520                 szrin = szrout = 0;
521                 ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, &tmpin,
522                     *inbytes, &szrin, cv->cv_shared->ci_hooks);
523                 if (ret)
524                         goto err;
525
526                 if (szrin == (size_t)-2) {
527                         /* incompleted character */
528                         ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
529                         if (ret) {
530                                 ret = EINVAL;
531                                 goto err;
532                         }
533                         switch (state) {
534                         case _STDENC_SDGEN_INITIAL:
535                         case _STDENC_SDGEN_STABLE:
536                                 /* fetch shift sequences only. */
537                                 goto next;
538                         }
539                         ret = EINVAL;
540                         goto err;
541                 }
542                 /* convert the character */
543                 ret = do_conv(is, &csid, &idx);
544                 if (ret) {
545                         if (ret == E_NO_CORRESPONDING_CHAR) {
546                                 inval++;
547                                 szrout = 0;
548                                 if ((((flags & _CITRUS_ICONV_F_HIDE_INVALID) == 0) &&
549                                     !cv->cv_shared->ci_discard_ilseq) &&
550                                     is->is_use_invalid) {
551                                         ret = wctombx(&sc->sc_dst_encoding,
552                                             *out, *outbytes, is->is_invalid,
553                                             &szrout, cv->cv_shared->ci_hooks);
554                                         if (ret)
555                                                 goto err;
556                                 }
557                                 goto next;
558                         } else
559                                 goto err;
560                 }
561                 /* csid/index -> mb */
562                 ret = cstombx(&sc->sc_dst_encoding,
563                     *out, *outbytes, csid, idx, &szrout,
564                     cv->cv_shared->ci_hooks);
565                 if (ret)
566                         goto err;
567 next:
568                 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
569                 *in = tmpin;
570                 *outbytes -= szrout;
571                 *out += szrout;
572         }
573         *invalids = inval;
574
575         return (0);
576
577 err:
578         restore_encoding_state(&sc->sc_src_encoding);
579         restore_encoding_state(&sc->sc_dst_encoding);
580         *invalids = inval;
581
582         return (ret);
583 }