keylogin(1): Fix a warning and raise WARNS to 6.
[dragonfly.git] / lib / libc / citrus / modules / citrus_utf1632.c
1 /* $NetBSD: citrus_utf1632.c,v 1.8 2008/03/20 11:47:45 tnozaki Exp $ */
2
3 /*-
4  * Copyright (c)2003 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/types.h>
30 #include <sys/endian.h>
31 #include <assert.h>
32 #include <errno.h>
33 #include <limits.h>
34 #include <locale.h>
35 #include <stddef.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <wchar.h>
40
41 #include "citrus_namespace.h"
42 #include "citrus_types.h"
43 #include "citrus_module.h"
44 #include "citrus_stdenc.h"
45 #include "citrus_bcs.h"
46
47 #include "citrus_utf1632.h"
48
49
50 /* ----------------------------------------------------------------------
51  * private stuffs used by templates
52  */
53
54 typedef struct {
55         u_int8_t                ch[4];
56         int                     chlen;
57         int                     current_endian;
58 } _UTF1632State;
59
60 typedef struct {
61         int             preffered_endian;
62         unsigned int    cur_max;
63 #define _ENDIAN_UNKNOWN 0
64 #define _ENDIAN_BIG     1
65 #define _ENDIAN_LITTLE  2
66         u_int32_t       mode;
67 #define _MODE_UTF32             0x00000001U
68 #define _MODE_FORCE_ENDIAN      0x00000002U
69 } _UTF1632EncodingInfo;
70
71 #define _FUNCNAME(m)                    _citrus_UTF1632_##m
72 #define _ENCODING_INFO                  _UTF1632EncodingInfo
73 #define _ENCODING_STATE                 _UTF1632State
74 #define _ENCODING_MB_CUR_MAX(_ei_)      ((_ei_)->cur_max)
75 #define _ENCODING_IS_STATE_DEPENDENT    0
76 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)        0
77
78
79 static __inline void
80 /*ARGSUSED*/
81 _citrus_UTF1632_init_state(_UTF1632EncodingInfo *ei __unused, _UTF1632State *s)
82 {
83         memset(s, 0, sizeof(*s));
84 }
85
86 static int
87 _citrus_UTF1632_mbrtowc_priv(_UTF1632EncodingInfo *ei, wchar_t *pwc,
88                              const char **s, size_t n, _UTF1632State *psenc,
89                              size_t *nresult)
90 {
91         int chlenbak, endian, needlen;
92         wchar_t wc;
93         size_t result;
94         const char *s0;
95
96         _DIAGASSERT(nresult != NULL);
97         _DIAGASSERT(ei != NULL);
98         _DIAGASSERT(s != NULL);
99         _DIAGASSERT(psenc != NULL);
100
101         s0 = *s;
102
103         if (s0 == NULL) {
104                 _citrus_UTF1632_init_state(ei, psenc);
105                 *nresult = 0; /* state independent */
106                 return (0);
107         }
108
109         result = 0;
110         chlenbak = psenc->chlen;
111
112 refetch:
113         if ((ei->mode & _MODE_UTF32) != 0 || chlenbak>=2)
114                 needlen = 4;
115         else
116                 needlen = 2;
117
118         while (chlenbak < needlen) {
119                 if (n==0)
120                         goto restart;
121                 psenc->ch[chlenbak++] = *s0++;
122                 n--;
123                 result++;
124         }
125
126         /* judge endian marker */
127         if ((ei->mode & _MODE_UTF32) == 0) {
128                 /* UTF16 */
129                 if (psenc->ch[0]==0xFE && psenc->ch[1]==0xFF) {
130                         psenc->current_endian = _ENDIAN_BIG;
131                         chlenbak = 0;
132                         goto refetch;
133                 } else if (psenc->ch[0]==0xFF && psenc->ch[1]==0xFE) {
134                         psenc->current_endian = _ENDIAN_LITTLE;
135                         chlenbak = 0;
136                         goto refetch;
137                 }
138         } else {
139                 /* UTF32 */
140                 if (psenc->ch[0]==0x00 && psenc->ch[1]==0x00 &&
141                     psenc->ch[2]==0xFE && psenc->ch[3]==0xFF) {
142                         psenc->current_endian = _ENDIAN_BIG;
143                         chlenbak = 0;
144                         goto refetch;
145                 } else if (psenc->ch[0]==0xFF && psenc->ch[1]==0xFE &&
146                            psenc->ch[2]==0x00 && psenc->ch[3]==0x00) {
147                         psenc->current_endian = _ENDIAN_LITTLE;
148                         chlenbak = 0;
149                         goto refetch;
150                 }
151         }
152         if ((ei->mode & _MODE_FORCE_ENDIAN) != 0 ||
153             psenc->current_endian == _ENDIAN_UNKNOWN)
154                 endian = ei->preffered_endian;
155         else
156                 endian = psenc->current_endian;
157
158         /* get wc */
159         if ((ei->mode & _MODE_UTF32) == 0) {
160                 /* UTF16 */
161                 if (needlen==2) {
162                         switch (endian) {
163                         case _ENDIAN_LITTLE:
164                                 wc = (psenc->ch[0] |
165                                       ((wchar_t)psenc->ch[1] << 8));
166                                 break;
167                         case _ENDIAN_BIG:
168                                 wc = (psenc->ch[1] |
169                                       ((wchar_t)psenc->ch[0] << 8));
170                                 break;
171                         default:
172                                 goto ilseq;
173                         }
174                         if (wc >= 0xD800 && wc <= 0xDBFF) {
175                                 /* surrogate high */
176                                 needlen=4;
177                                 goto refetch;
178                         }
179                 } else {
180                         /* surrogate low */
181                         wc -= 0xD800; /* wc : surrogate high (see above) */
182                         wc <<= 10;
183                         switch (endian) {
184                         case _ENDIAN_LITTLE:
185                                 if (psenc->ch[2]<0xDC || psenc->ch[2]>0xDF)
186                                         goto ilseq;
187                                 wc |= psenc->ch[2];
188                                 wc |= (wchar_t)(psenc->ch[3] & 3) << 8;
189                                 break;
190                         case _ENDIAN_BIG:
191                                 if (psenc->ch[3]<0xDC || psenc->ch[3]>0xDF)
192                                         goto ilseq;
193                                 wc |= psenc->ch[3];
194                                 wc |= (wchar_t)(psenc->ch[2] & 3) << 8;
195                                 break;
196                         default:
197                                 goto ilseq;
198                         }
199                         wc += 0x10000;
200                 }
201         } else {
202                 /* UTF32 */
203                 switch (endian) {
204                 case _ENDIAN_LITTLE:
205                         wc = (psenc->ch[0] |
206                               ((wchar_t)psenc->ch[1] << 8) |
207                               ((wchar_t)psenc->ch[2] << 16) |
208                               ((wchar_t)psenc->ch[3] << 24));
209                         break;
210                 case _ENDIAN_BIG:
211                         wc = (psenc->ch[3] |
212                               ((wchar_t)psenc->ch[2] << 8) |
213                               ((wchar_t)psenc->ch[1] << 16) |
214                               ((wchar_t)psenc->ch[0] << 24));
215                         break;
216                 default:
217                         goto ilseq;
218                 }
219                 if (wc >= 0xD800 && wc <= 0xDFFF)
220                         goto ilseq;
221         }
222
223
224         *pwc = wc;
225         psenc->chlen = 0;
226         *nresult = result;
227         *s = s0;
228
229         return (0);
230
231 ilseq:
232         *nresult = (size_t)-1;
233         psenc->chlen = 0;
234         return (EILSEQ);
235
236 restart:
237         *nresult = (size_t)-2;
238         psenc->chlen = chlenbak;
239         *s = s0;
240         return (0);
241 }
242
243 static int
244 _citrus_UTF1632_wcrtomb_priv(_UTF1632EncodingInfo *ei, char *s, size_t n,
245                              wchar_t wc, _UTF1632State *psenc,
246                              size_t *nresult)
247 {
248         wchar_t wc2;
249         static const char _bom[4] = {
250 #if BYTE_ORDER == BIG_ENDIAN
251             0x00, 0x00, 0xFE, 0xFF,
252 #else
253             0xFF, 0xFE, 0x00, 0x00,
254 #endif
255         };
256         const char *bom = &_bom[0];
257         size_t cnt;
258
259         _DIAGASSERT(ei != NULL);
260         _DIAGASSERT(nresult != NULL);
261         _DIAGASSERT(s != NULL);
262
263         cnt = (size_t)0;
264         if (psenc->current_endian == _ENDIAN_UNKNOWN) {
265                 if ((ei->mode & _MODE_FORCE_ENDIAN) == 0) {
266                         if (ei->mode & _MODE_UTF32) {
267                                 cnt = 4;
268                         } else {
269                                 cnt = 2;
270 #if BYTE_ORDER == BIG_ENDIAN
271                                 bom += 2;
272 #endif
273                         }
274                         if (n < cnt)
275                                 goto e2big;
276                         memcpy(s, bom, cnt);
277                         s += cnt, n -= cnt;
278                 }
279                 psenc->current_endian = ei->preffered_endian;
280         }
281
282         wc2 = 0;
283         if ((ei->mode & _MODE_UTF32)==0) {
284                 /* UTF16 */
285                 if (wc>0xFFFF) {
286                         /* surrogate */
287                         if (wc>0x10FFFF)
288                                 goto ilseq;
289                         if (n < 4)
290                                 goto e2big;
291                         cnt += 4;
292                         wc -= 0x10000;
293                         wc2 = (wc & 0x3FF) | 0xDC00;
294                         wc = (wc>>10) | 0xD800;
295                 } else {
296                         if (n < 2)
297                                 goto e2big;
298                         cnt += 2;
299                 }
300
301 surrogate:
302                 switch (psenc->current_endian) {
303                 case _ENDIAN_BIG:
304                         s[1] = wc;
305                         s[0] = (wc >>= 8);
306                         break;
307                 case _ENDIAN_LITTLE:
308                         s[0] = wc;
309                         s[1] = (wc >>= 8);
310                         break;
311                 }
312                 if (wc2!=0) {
313                         wc = wc2;
314                         wc2 = 0;
315                         s += 2;
316                         goto surrogate;
317                 }
318         } else {
319                 /* UTF32 */
320                 if (wc >= 0xD800 && wc <= 0xDFFF)
321                         goto ilseq;
322                 if (n < 4)
323                         goto e2big;
324                 cnt += 4;
325                 switch (psenc->current_endian) {
326                 case _ENDIAN_BIG:
327                         s[3] = wc;
328                         s[2] = (wc >>= 8);
329                         s[1] = (wc >>= 8);
330                         s[0] = (wc >>= 8);
331                         break;
332                 case _ENDIAN_LITTLE:
333                         s[0] = wc;
334                         s[1] = (wc >>= 8);
335                         s[2] = (wc >>= 8);
336                         s[3] = (wc >>= 8);
337                         break;
338                 }
339         }
340         *nresult = cnt;
341
342         return 0;
343
344 ilseq:
345         *nresult = (size_t)-1;
346         return EILSEQ;
347 e2big:
348         *nresult = (size_t)-1;
349         return E2BIG;
350 }
351
352 static void
353 parse_variable(_UTF1632EncodingInfo * __restrict ei,
354                const void * __restrict var, size_t lenvar)
355 {
356 #define MATCH(x, act)                                           \
357 do {                                                            \
358         if (lenvar >= (sizeof(#x)-1) &&                         \
359             _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) {       \
360                 act;                                            \
361                 lenvar -= sizeof(#x)-1;                         \
362                 p += sizeof(#x)-1;                              \
363         }                                                       \
364 } while (/*CONSTCOND*/0)
365         const char *p;
366         p = var;
367         while (lenvar>0) {
368                 switch (*p) {
369                 case 'B':
370                 case 'b':
371                         MATCH(big, ei->preffered_endian = _ENDIAN_BIG);
372                         break;
373                 case 'L':
374                 case 'l':
375                         MATCH(little, ei->preffered_endian = _ENDIAN_LITTLE);
376                         break;
377                 case 'F':
378                 case 'f':
379                         MATCH(force, ei->mode |= _MODE_FORCE_ENDIAN);
380                         break;
381                 case 'U':
382                 case 'u':
383                         MATCH(utf32, ei->mode |= _MODE_UTF32);
384                         break;
385                 }
386                 p++;
387                 lenvar--;
388         }
389 }
390
391 static int
392 /*ARGSUSED*/
393 _citrus_UTF1632_encoding_module_init(_UTF1632EncodingInfo * __restrict ei,
394                                      const void * __restrict var,
395                                      size_t lenvar)
396 {
397         _DIAGASSERT(ei != NULL);
398
399         memset((void *)ei, 0, sizeof(*ei));
400
401         parse_variable(ei, var, lenvar);
402
403         if ((ei->mode&_MODE_UTF32)==0)
404                 ei->cur_max = 6; /* endian + surrogate */
405         else
406                 ei->cur_max = 8; /* endian + normal */
407
408         if (ei->preffered_endian == _ENDIAN_UNKNOWN) {
409 #if BYTE_ORDER == BIG_ENDIAN
410                 ei->preffered_endian = _ENDIAN_BIG;
411 #else
412                 ei->preffered_endian = _ENDIAN_LITTLE;
413 #endif
414         }
415
416         return (0);
417 }
418
419 static void
420 /*ARGSUSED*/
421 _citrus_UTF1632_encoding_module_uninit(_UTF1632EncodingInfo *ei __unused)
422 {
423 }
424
425 static __inline int
426 /*ARGSUSED*/
427 _citrus_UTF1632_stdenc_wctocs(_UTF1632EncodingInfo * __restrict ei __unused,
428                               _csid_t * __restrict csid,
429                               _index_t * __restrict idx,
430                               _wc_t wc)
431 {
432
433         _DIAGASSERT(csid != NULL && idx != NULL);
434
435         *csid = 0;
436         *idx = (_index_t)wc;
437
438         return (0);
439 }
440
441 static __inline int
442 /*ARGSUSED*/
443 _citrus_UTF1632_stdenc_cstowc(_UTF1632EncodingInfo * __restrict ei __unused,
444                               _wc_t * __restrict wc,
445                               _csid_t csid, _index_t idx)
446 {
447
448         _DIAGASSERT(wc != NULL);
449
450         if (csid != 0)
451                 return (EILSEQ);
452
453         *wc = (_wc_t)idx;
454
455         return (0);
456 }
457
458 static __inline int
459 /*ARGSUSED*/
460 _citrus_UTF1632_stdenc_get_state_desc_generic(_UTF1632EncodingInfo * __restrict ei __unused,
461                                               _UTF1632State * __restrict psenc,
462                                               int * __restrict rstate)
463 {
464
465         if (psenc->chlen == 0)
466                 *rstate = _STDENC_SDGEN_INITIAL;
467         else
468                 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR;
469
470         return 0;
471 }
472
473 /* ----------------------------------------------------------------------
474  * public interface for stdenc
475  */
476
477 _CITRUS_STDENC_DECLS(UTF1632);
478 _CITRUS_STDENC_DEF_OPS(UTF1632);
479
480 #include "citrus_stdenc_template.h"