Initial import of binutils 2.22 on the new vendor branch
[dragonfly.git] / lib / libc / citrus / modules / citrus_iconv_std.c
1 /* $NetBSD: citrus_iconv_std.c,v 1.15 2006/11/13 19:08:19 tnozaki Exp $ */
2 /* $DragonFly: src/lib/libc/citrus/modules/citrus_iconv_std.c,v 1.2 2008/04/10 10:21:01 hasso Exp $ */
3
4 /*-
5  * Copyright (c)2003 Citrus Project,
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/types.h>
31 #include <sys/endian.h>
32 #include <sys/queue.h>
33 #include <assert.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39
40 #include "citrus_namespace.h"
41 #include "citrus_types.h"
42 #include "citrus_module.h"
43 #include "citrus_region.h"
44 #include "citrus_mmap.h"
45 #include "citrus_hash.h"
46 #include "citrus_iconv.h"
47 #include "citrus_stdenc.h"
48 #include "citrus_mapper.h"
49 #include "citrus_csmapper.h"
50 #include "citrus_memstream.h"
51 #include "citrus_iconv_std.h"
52 #include "citrus_esdb.h"
53
54 /* ---------------------------------------------------------------------- */
55
56 _CITRUS_ICONV_DECLS(iconv_std);
57 _CITRUS_ICONV_DEF_OPS(iconv_std);
58
59
60 /* ---------------------------------------------------------------------- */
61
62 int
63 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops,
64                                u_int32_t expected_version)
65 {
66         if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops))
67                 return (EINVAL);
68
69         memcpy(ops, &_citrus_iconv_std_iconv_ops,
70                sizeof(_citrus_iconv_std_iconv_ops));
71
72         return (0);
73 }
74
75 /* ---------------------------------------------------------------------- */
76
77 /*
78  * convenience routines for stdenc.
79  */
80 static __inline void
81 save_encoding_state(struct _citrus_iconv_std_encoding *se)
82 {
83         if (se->se_ps)
84                 memcpy(se->se_pssaved, se->se_ps,
85                        _stdenc_get_state_size(se->se_handle));
86 }
87
88 static __inline void
89 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
90 {
91         if (se->se_ps)
92                 memcpy(se->se_ps, se->se_pssaved,
93                        _stdenc_get_state_size(se->se_handle));
94 }
95
96 static __inline void
97 init_encoding_state(struct _citrus_iconv_std_encoding *se)
98 {
99         if (se->se_ps)
100                 _stdenc_init_state(se->se_handle, se->se_ps);
101 }
102
103 static __inline int
104 mbtocsx(struct _citrus_iconv_std_encoding *se,
105         _csid_t *csid, _index_t *idx, const char **s, size_t n,
106         size_t *nresult)
107 {
108         return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
109                               nresult);
110 }
111
112 static __inline int
113 cstombx(struct _citrus_iconv_std_encoding *se,
114         char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult)
115 {
116         return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
117                               nresult);
118 }
119
120 static __inline int
121 wctombx(struct _citrus_iconv_std_encoding *se,
122         char *s, size_t n, _wc_t wc, size_t *nresult)
123 {
124         return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult);
125 }
126
127 static __inline int
128 put_state_resetx(struct _citrus_iconv_std_encoding *se,
129                  char *s, size_t n, size_t *nresult)
130 {
131         return _stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult);
132 }
133
134 static __inline int
135 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
136 {
137         int ret;
138         struct _stdenc_state_desc ssd;
139
140         ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
141                                      _STDENC_SDID_GENERIC, &ssd);
142         if (!ret)
143                 *rstate = ssd.u.generic.state;
144
145         return ret;
146 }
147
148 /*
149  * init encoding context
150  */
151 static int
152 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
153               void *ps1, void *ps2)
154 {
155         int ret = -1;
156
157         se->se_handle = cs;
158         se->se_ps = ps1;
159         se->se_pssaved = ps2;
160
161         if (se->se_ps)
162                 ret = _stdenc_init_state(cs, se->se_ps);
163         if (!ret && se->se_pssaved)
164                 ret = _stdenc_init_state(cs, se->se_pssaved);
165
166         return ret;
167 }
168
169 static int
170 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
171               unsigned long *rnorm)
172 {
173         int ret;
174         struct _csmapper *cm;
175
176         ret = _csmapper_open(&cm, src, dst, 0, rnorm);
177         if (ret)
178                 return ret;
179         if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
180             _csmapper_get_state_size(cm) != 0) {
181                 _csmapper_close(cm);
182                 return EINVAL;
183         }
184
185         *rcm = cm;
186
187         return 0;
188 }
189
190 static void
191 close_dsts(struct _citrus_iconv_std_dst_list *dl)
192 {
193         struct _citrus_iconv_std_dst *sd;
194
195         while ((sd=TAILQ_FIRST(dl)) != NULL) {
196                 TAILQ_REMOVE(dl, sd, sd_entry);
197                 _csmapper_close(sd->sd_mapper);
198                 free(sd);
199         }
200 }
201
202 static int
203 open_dsts(struct _citrus_iconv_std_dst_list *dl,
204           const struct _esdb_charset *ec, const struct _esdb *dbdst)
205 {
206         int i, ret;
207         struct _citrus_iconv_std_dst *sd, *sdtmp;
208         unsigned long norm;
209
210         sd = malloc(sizeof(*sd));
211         if (sd == NULL)
212                 return errno;
213
214         for (i=0; i<dbdst->db_num_charsets; i++) {
215                 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
216                                     dbdst->db_charsets[i].ec_csname, &norm);
217                 if (ret == 0) {
218                         sd->sd_csid = dbdst->db_charsets[i].ec_csid;
219                         sd->sd_norm = norm;
220                         /* insert this mapper by sorted order. */
221                         TAILQ_FOREACH(sdtmp, dl, sd_entry) {
222                                 if (sdtmp->sd_norm > norm) {
223                                         TAILQ_INSERT_BEFORE(sdtmp, sd,
224                                                             sd_entry);
225                                         sd = NULL;
226                                         break;
227                                 }
228                         }
229                         if (sd)
230                                 TAILQ_INSERT_TAIL(dl, sd, sd_entry);
231                         sd = malloc(sizeof(*sd));
232                         if (sd == NULL) {
233                                 ret = errno;
234                                 close_dsts(dl);
235                                 return ret;
236                         }
237                 } else if (ret != ENOENT) {
238                         close_dsts(dl);
239                         free(sd);
240                         return ret;
241                 }
242         }
243         free(sd);
244         return 0;
245 }
246
247 static void
248 close_srcs(struct _citrus_iconv_std_src_list *sl)
249 {
250         struct _citrus_iconv_std_src *ss;
251
252         while ((ss=TAILQ_FIRST(sl)) != NULL) {
253                 TAILQ_REMOVE(sl, ss, ss_entry);
254                 close_dsts(&ss->ss_dsts);
255                 free(ss);
256         }
257 }
258
259 static int
260 open_srcs(struct _citrus_iconv_std_src_list *sl,
261           const struct _esdb *dbsrc, const struct _esdb *dbdst)
262 {
263         int i, ret, count = 0;
264         struct _citrus_iconv_std_src *ss;
265
266         ss = malloc(sizeof(*ss));
267         if (ss == NULL)
268                 return errno;
269
270         TAILQ_INIT(&ss->ss_dsts);
271
272         for (i=0; i<dbsrc->db_num_charsets; i++) {
273                 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
274                 if (ret)
275                         goto err;
276                 if (!TAILQ_EMPTY(&ss->ss_dsts)) {
277                         ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
278                         TAILQ_INSERT_TAIL(sl, ss, ss_entry);
279                         ss = malloc(sizeof(*ss));
280                         if (ss == NULL) {
281                                 ret = errno;
282                                 goto err;
283                         }
284                         count++;
285                         TAILQ_INIT(&ss->ss_dsts);
286                 }
287         }
288         free(ss);
289
290         return count ? 0 : ENOENT;
291
292 err:
293         free(ss);
294         close_srcs(sl);
295         return ret;
296 }
297
298 /* do convert a character */
299 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
300 static int
301 /*ARGSUSED*/
302 do_conv(const struct _citrus_iconv_std_shared *is,
303         struct _citrus_iconv_std_context *sc, _csid_t *csid, _index_t *idx)
304 {
305         _index_t tmpidx;
306         int ret;
307         struct _citrus_iconv_std_src *ss;
308         struct _citrus_iconv_std_dst *sd;
309
310         TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
311                 if (ss->ss_csid == *csid) {
312                         TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
313                                 ret = _csmapper_convert(sd->sd_mapper,
314                                                         &tmpidx, *idx, NULL);
315                                 switch (ret) {
316                                 case _MAPPER_CONVERT_SUCCESS:
317                                         *csid = sd->sd_csid;
318                                         *idx = tmpidx;
319                                         return 0;
320                                 case _MAPPER_CONVERT_NONIDENTICAL:
321                                         break;
322                                 case _MAPPER_CONVERT_SRC_MORE:
323                                         /*FALLTHROUGH*/
324                                 case _MAPPER_CONVERT_DST_MORE:
325                                         /*FALLTHROUGH*/
326                                 case _MAPPER_CONVERT_FATAL:
327                                         return EINVAL;
328                                 case _MAPPER_CONVERT_ILSEQ:
329                                         return EILSEQ;
330                                 }
331                         }
332                         break;
333                 }
334         }
335
336         return E_NO_CORRESPONDING_CHAR;
337 }
338 /* ---------------------------------------------------------------------- */
339
340 static int
341 /*ARGSUSED*/
342 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
343                                     const char * __restrict curdir,
344                                     const char * __restrict src,
345                                     const char * __restrict dst,
346                                     const void * __restrict var, size_t lenvar)
347 {
348         int ret;
349         struct _citrus_iconv_std_shared *is;
350         struct _citrus_esdb esdbsrc, esdbdst;
351
352         is = malloc(sizeof(*is));
353         if (is==NULL) {
354                 ret = errno;
355                 goto err0;
356         }
357         ret = _citrus_esdb_open(&esdbsrc, src);
358         if (ret)
359                 goto err1;
360         ret = _citrus_esdb_open(&esdbdst, dst);
361         if (ret)
362                 goto err2;
363         ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
364                            esdbsrc.db_variable, esdbsrc.db_len_variable);
365         if (ret)
366                 goto err3;
367         ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
368                            esdbdst.db_variable, esdbdst.db_len_variable);
369         if (ret)
370                 goto err4;
371         is->is_use_invalid = esdbdst.db_use_invalid;
372         is->is_invalid = esdbdst.db_invalid;
373
374         TAILQ_INIT(&is->is_srcs);
375         ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
376         if (ret)
377                 goto err5;
378
379         _esdb_close(&esdbsrc);
380         _esdb_close(&esdbdst);
381         ci->ci_closure = is;
382
383         return 0;
384
385 err5:
386         _stdenc_close(is->is_dst_encoding);
387 err4:
388         _stdenc_close(is->is_src_encoding);
389 err3:
390         _esdb_close(&esdbdst);
391 err2:
392         _esdb_close(&esdbsrc);
393 err1:
394         free(is);
395 err0:
396         return ret;
397 }
398
399 static void
400 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
401 {
402         struct _citrus_iconv_std_shared *is = ci->ci_closure;
403
404         if (is == NULL)
405                 return;
406
407         _stdenc_close(is->is_src_encoding);
408         _stdenc_close(is->is_dst_encoding);
409         close_srcs(&is->is_srcs);
410         free(is);
411 }
412
413 static int
414 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
415 {
416         const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
417         struct _citrus_iconv_std_context *sc;
418         size_t szpssrc, szpsdst, sz;
419         char *ptr;
420
421         szpssrc = _stdenc_get_state_size(is->is_src_encoding);
422         szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
423
424         sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
425         sc = malloc(sz);
426         if (sc == NULL)
427                 return errno;
428
429         ptr = (char *)&sc[1];
430         if (szpssrc)
431                 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
432                               ptr, ptr+szpssrc);
433         else
434                 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
435                               NULL, NULL);
436         ptr += szpssrc*2;
437         if (szpsdst)
438                 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
439                               ptr, ptr+szpsdst);
440         else
441                 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
442                               NULL, NULL);
443
444         cv->cv_closure = (void *)sc;
445
446         return 0;
447 }
448
449 static void
450 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
451 {
452         free(cv->cv_closure);
453 }
454
455 static int
456 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
457                                 const char * __restrict * __restrict in,
458                                 size_t * __restrict inbytes,
459                                 char * __restrict * __restrict out,
460                                 size_t * __restrict outbytes, u_int32_t flags,
461                                 size_t * __restrict invalids)
462 {
463         const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
464         struct _citrus_iconv_std_context *sc = cv->cv_closure;
465         _index_t idx;
466         _csid_t csid;
467         int ret, state;
468         size_t szrin, szrout;
469         size_t inval;
470         const char *tmpin;
471
472         inval = 0;
473         if (in==NULL || *in==NULL) {
474                 /* special cases */
475                 if (out!=NULL && *out!=NULL) {
476                         /* init output state and store the shift sequence */
477                         save_encoding_state(&sc->sc_src_encoding);
478                         save_encoding_state(&sc->sc_dst_encoding);
479                         szrout = 0;
480
481                         ret = put_state_resetx(&sc->sc_dst_encoding,
482                                                *out, *outbytes,
483                                                &szrout);
484                         if (ret)
485                                 goto err;
486
487                         if (szrout == (size_t)-2) {
488                                 /* too small to store the character */
489                                 ret = EINVAL;
490                                 goto err;
491                         }
492                         *out += szrout;
493                         *outbytes -= szrout;
494                 } else
495                         /* otherwise, discard the shift sequence */
496                         init_encoding_state(&sc->sc_dst_encoding);
497                 init_encoding_state(&sc->sc_src_encoding);
498                 *invalids = 0;
499                 return 0;
500         }
501
502         /* normal case */
503         for (;;) {
504                 if (*inbytes==0) {
505                         ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
506                         if (state == _STDENC_SDGEN_INITIAL ||
507                             state == _STDENC_SDGEN_STABLE)
508                                 break;
509                 }
510
511                 /* save the encoding states for the error recovery */
512                 save_encoding_state(&sc->sc_src_encoding);
513                 save_encoding_state(&sc->sc_dst_encoding);
514
515                 /* mb -> csid/index */
516                 tmpin = *in;
517                 szrin = szrout = 0;
518                 ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx,
519                               &tmpin, *inbytes, &szrin);
520                 if (ret)
521                         goto err;
522
523                 if (szrin == (size_t)-2) {
524                         /* incompleted character */
525                         ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
526                         if (ret) {
527                                 ret = EINVAL;
528                                 goto err;
529                         }
530                         switch (state) {
531                         case _STDENC_SDGEN_INITIAL:
532                         case _STDENC_SDGEN_STABLE:
533                                 /* fetch shift sequences only. */
534                                 goto next;
535                         }
536                         ret = EINVAL;
537                         goto err;
538                 }
539                 /* convert the character */
540                 ret = do_conv(is, sc, &csid, &idx);
541                 if (ret) {
542                         if (ret == E_NO_CORRESPONDING_CHAR) {
543                                 inval++;
544                                 szrout = 0;
545                                 if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 &&
546                                     is->is_use_invalid) {
547                                         ret = wctombx(&sc->sc_dst_encoding,
548                                                       *out, *outbytes,
549                                                       is->is_invalid,
550                                                       &szrout);
551                                         if (ret)
552                                                 goto err;
553                                 }
554                                 goto next;
555                         } else {
556                                 goto err;
557                         }
558                 }
559                 /* csid/index -> mb */
560                 ret = cstombx(&sc->sc_dst_encoding,
561                               *out, *outbytes, csid, idx, &szrout);
562                 if (ret)
563                         goto err;
564 next:
565                 _DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout);
566                 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
567                 *in = tmpin;
568                 *outbytes -= szrout;
569                 *out += szrout;
570         }
571         *invalids = inval;
572
573         return 0;
574
575 err:
576         restore_encoding_state(&sc->sc_src_encoding);
577         restore_encoding_state(&sc->sc_dst_encoding);
578         *invalids = inval;
579
580         return ret;
581 }