1 /* $NetBSD: citrus_hz.c,v 1.1 2006/11/22 23:38:27 tnozaki Exp $ */
2 /* $DragonFly: src/lib/libc/citrus/modules/citrus_hz.c,v 1.1 2008/04/10 10:21:01 hasso Exp $ */
5 * Copyright (c)2004, 2006 Citrus Project,
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/queue.h>
32 #include <sys/types.h>
43 #include "citrus_namespace.h"
44 #include "citrus_types.h"
45 #include "citrus_bcs.h"
46 #include "citrus_module.h"
47 #include "citrus_ctype.h"
48 #include "citrus_stdenc.h"
50 #include "citrus_hz.h"
51 #include "citrus_prop.h"
56 * CTRL/ASCII 00000000 00000000 00000000 gxxxxxxx
57 * GB2312 00000000 00000000 0xxxxxxx gxxxxxxx
58 * 94/96*n (~M) 0mmmmmmm 0xxxxxxx 0xxxxxxx gxxxxxxx
61 #define ESCAPE_CHAR '~'
64 CTRL = 0, ASCII = 1, GB2312 = 2, CS94 = 3, CS96 = 4
68 int start, end, width;
71 static const range_t ranges[] = {
72 #define RANGE(start, end) { start, end, (end - start) + 1 }
73 /* CTRL */ RANGE(0x00, 0x1F),
74 /* ASCII */ RANGE(0x20, 0x7F),
75 /* GB2312 */ RANGE(0x21, 0x7E),
76 /* CS94 */ RANGE(0x21, 0x7E),
77 /* CS96 */ RANGE(0x20, 0x7F),
81 typedef struct escape_t escape_t;
89 typedef TAILQ_HEAD(escape_list, escape_t) escape_list;
91 TAILQ_ENTRY(escape_t) entry;
93 graphic_t *left, *right;
97 #define GL(escape) ((escape)->left)
98 #define GR(escape) ((escape)->right)
99 #define SET(escape) ((escape)->set)
100 #define ESC(escape) ((escape)->ch)
101 #define INIT(escape) (TAILQ_FIRST(SET(escape)))
103 static __inline escape_t *
104 find_escape(escape_list *set, int ch)
108 _DIAGASSERT(set != NULL);
110 TAILQ_FOREACH(escape, set, entry) {
111 if (ESC(escape) == ch)
120 graphic_t *ascii, *gb2312;
123 #define E0SET(ei) (&(ei)->e0)
124 #define E1SET(ei) (&(ei)->e1)
125 #define INIT0(ei) (TAILQ_FIRST(E0SET(ei)))
126 #define INIT1(ei) (TAILQ_FIRST(E1SET(ei)))
137 /* for future multi-locale facility */
142 _HZState s_mbsrtowcs;
144 _HZState s_wcsrtombs;
149 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
150 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
152 #define _FUNCNAME(m) _citrus_HZ_##m
153 #define _ENCODING_INFO _HZEncodingInfo
154 #define _CTYPE_INFO _HZCTypeInfo
155 #define _ENCODING_STATE _HZState
156 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
157 #define _ENCODING_IS_STATE_DEPENDENT 1
158 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->inuse == NULL)
161 _citrus_HZ_init_state(_HZEncodingInfo * __restrict ei,
162 _HZState * __restrict psenc)
164 _DIAGASSERT(ei != NULL);
165 _DIAGASSERT(psenc != NULL);
168 psenc->inuse = INIT0(ei);
173 _citrus_HZ_pack_state(_HZEncodingInfo * __restrict ei,
174 void *__restrict pspriv, const _HZState * __restrict psenc)
176 /* ei may be unused */
177 _DIAGASSERT(pspriv != NULL);
178 _DIAGASSERT(psenc != NULL);
180 memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
185 _citrus_HZ_unpack_state(_HZEncodingInfo * __restrict ei,
186 _HZState * __restrict psenc, const void * __restrict pspriv)
188 /* ei may be unused */
189 _DIAGASSERT(psenc != NULL);
190 _DIAGASSERT(pspriv != NULL);
192 memcpy((void *)psenc, pspriv, sizeof(*psenc));
196 _citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei,
197 wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
198 _HZState * __restrict psenc, size_t * __restrict nresult)
202 int bit, head, tail, len, ch;
204 escape_t *candidate, *init;
205 const range_t *range;
207 _DIAGASSERT(ei != NULL);
208 /* pwc may be null */
209 _DIAGASSERT(s != NULL);
210 _DIAGASSERT(psenc != NULL);
211 _DIAGASSERT(nresult != NULL);
214 _citrus_HZ_init_state(ei, psenc);
219 if (psenc->chlen < 0 || psenc->inuse == NULL)
223 bit = head = tail = 0;
225 for (len = 0; len <= MB_LEN_MAX; /**/) {
226 if (psenc->chlen == tail) {
229 *nresult = (size_t)-2;
232 psenc->ch[psenc->chlen++] = *s0++;
235 ch = (unsigned char)psenc->ch[tail++];
237 if ((ch & ~0x80) <= 0x1F) {
238 if (psenc->inuse != INIT0(ei))
244 graphic = GR(psenc->inuse);
248 graphic = GL(psenc->inuse);
249 if (ch == ESCAPE_CHAR)
255 } else if (tail == 2 && psenc->ch[0] == ESCAPE_CHAR) {
256 if (tail < psenc->chlen)
258 if (ch == ESCAPE_CHAR) {
260 } else if (ch == '\n') {
261 if (psenc->inuse != INIT0(ei))
263 tail = psenc->chlen = 0;
268 _DIAGASSERT(init != NULL);
269 if (psenc->inuse == init) {
271 } else if (INIT(psenc->inuse) == init) {
276 if (candidate == NULL) {
277 candidate = find_escape(
278 SET(psenc->inuse), ch);
279 if (candidate == NULL) {
286 psenc->inuse = candidate;
287 tail = psenc->chlen = 0;
290 } else if (ch & 0x80) {
291 if (graphic != GR(psenc->inuse))
295 if (graphic != GL(psenc->inuse))
298 _DIAGASSERT(graphic != NULL);
299 range = &ranges[(size_t)graphic->charset];
300 if (range->start > ch || range->end < ch)
304 if (graphic->length == (tail - head)) {
305 if (graphic->charset > GB2312)
306 bit |= ESC(psenc->inuse) << 24;
311 *nresult = (size_t)-1;
314 if (tail < psenc->chlen)
320 *nresult = (wc == 0) ? 0 : len;
326 _citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei,
327 char * __restrict s, size_t n, wchar_t wc,
328 _HZState * __restrict psenc, size_t * __restrict nresult)
331 escape_t *candidate, *init;
334 const range_t *range;
336 _DIAGASSERT(ei != NULL);
337 _DIAGASSERT(s != NULL);
338 _DIAGASSERT(psenc != NULL);
339 _DIAGASSERT(nresult != NULL);
341 if (psenc->chlen != 0 || psenc->inuse == NULL)
349 if ((uint32_t)wc <= 0x1F) {
350 candidate = INIT0(ei);
352 ? candidate->left : candidate->right;
355 range = &ranges[(size_t)CTRL];
357 } else if ((uint32_t)wc <= 0x7F) {
361 candidate = graphic->escape;
362 range = &ranges[(size_t)graphic->charset];
363 len = graphic->length;
364 } else if ((uint32_t)wc <= 0x7F7F) {
365 graphic = ei->gb2312;
368 candidate = graphic->escape;
369 range = &ranges[(size_t)graphic->charset];
370 len = graphic->length;
372 ch = (wc >> 24) & 0xFF;
373 candidate = find_escape(E0SET(ei), ch);
374 if (candidate == NULL) {
375 candidate = find_escape(E1SET(ei), ch);
376 if (candidate == NULL)
381 ? candidate->left : candidate->right;
384 range = &ranges[(size_t)graphic->charset];
385 len = graphic->length;
387 if (psenc->inuse != candidate) {
389 if (SET(psenc->inuse) == SET(candidate)) {
390 if (INIT(psenc->inuse) != init ||
391 psenc->inuse == init || candidate == init)
393 } else if (candidate == (init = INIT(candidate))) {
400 psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
401 psenc->ch[psenc->chlen++] = ESC(init);
406 psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
407 psenc->ch[psenc->chlen++] = ESC(candidate);
408 psenc->inuse = candidate;
413 ch = (wc >> (len * 8)) & 0xFF;
414 if (range->start > ch || range->end < ch)
416 psenc->ch[psenc->chlen++] = ch | bit;
418 memcpy(s, psenc->ch, psenc->chlen);
419 *nresult = psenc->chlen;
425 *nresult = (size_t)-1;
430 _citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei,
431 char * __restrict s, size_t n, _HZState * __restrict psenc,
432 size_t * __restrict nresult)
436 _DIAGASSERT(ei != NULL);
437 _DIAGASSERT(s != NULL);
438 _DIAGASSERT(psenc != NULL);
439 _DIAGASSERT(nresult != NULL);
441 if (psenc->chlen != 0 || psenc->inuse == NULL)
443 candidate = INIT0(ei);
444 if (psenc->inuse != candidate) {
448 psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
449 psenc->ch[psenc->chlen++] = ESC(candidate);
453 if (psenc->chlen > 0)
454 memcpy(s, psenc->ch, psenc->chlen);
455 *nresult = psenc->chlen;
456 _citrus_HZ_init_state(ei, psenc);
462 _citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei,
463 _HZState * __restrict psenc, int * __restrict rstate)
465 _DIAGASSERT(ei != NULL);
466 _DIAGASSERT(psenc != NULL);
467 _DIAGASSERT(rstate != NULL);
469 if (psenc->chlen < 0 || psenc->inuse == NULL)
471 *rstate = (psenc->chlen == 0)
472 ? ((psenc->inuse == INIT0(ei))
473 ? _STDENC_SDGEN_INITIAL
474 : _STDENC_SDGEN_STABLE)
475 : ((psenc->ch[0] == ESCAPE_CHAR)
476 ? _STDENC_SDGEN_INCOMPLETE_SHIFT
477 : _STDENC_SDGEN_INCOMPLETE_CHAR);
484 _citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei,
485 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
489 _DIAGASSERT(csid != NULL);
490 _DIAGASSERT(idx != NULL);
498 if ((uint32_t)wc <= 0x7F) {
499 *csid = (_csid_t)bit;
501 } else if ((uint32_t)wc <= 0x7F7F) {
502 *csid = (_csid_t)(bit | 0x8000);
505 *csid = (_index_t)(wc & ~0x00FFFF7F);
506 *idx = (_csid_t)(wc & 0x00FFFF7F);
514 _citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei,
515 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
517 _DIAGASSERT(ei != NULL);
518 _DIAGASSERT(wc != NULL);
524 *wc |= (wchar_t)0x80;
530 *wc |= (wchar_t)csid;
537 _citrus_HZ_encoding_module_uninit(_HZEncodingInfo *ei)
541 _DIAGASSERT(ei != NULL);
542 while ((escape = TAILQ_FIRST(E0SET(ei))) != NULL) {
543 TAILQ_REMOVE(E0SET(ei), escape, entry);
548 while ((escape = TAILQ_FIRST(E1SET(ei))) != NULL) {
549 TAILQ_REMOVE(E1SET(ei), escape, entry);
557 _citrus_HZ_parse_char(void **context, const char *name, const char *s)
562 _DIAGASSERT(context != NULL && *context != NULL);
563 _DIAGASSERT(name != NULL);
564 _DIAGASSERT(s != NULL);
566 p = (void **)*context;
567 escape = (escape_t *)p[0];
568 if (escape->ch != '\0')
571 if (escape->ch == ESCAPE_CHAR || *s != '\0')
578 _citrus_HZ_parse_graphic(void **context, const char *name, const char *s)
585 _DIAGASSERT(context != NULL && *context != NULL);
586 _DIAGASSERT(name != NULL);
587 _DIAGASSERT(s != NULL);
589 p = (void **)*context;
590 escape = (escape_t *)p[0];
591 ei = (_HZEncodingInfo *)p[1];
592 graphic = malloc(sizeof(*graphic));
595 memset(graphic, 0, sizeof(*graphic));
596 if (strcmp("GL", name) == 0) {
597 if (GL(escape) != NULL)
599 GL(escape) = graphic;
600 } else if (strcmp("GR", name) == 0) {
601 if (GR(escape) != NULL)
603 GR(escape) = graphic;
609 graphic->escape = escape;
610 if (_bcs_strncasecmp("ASCII", s, 5) == 0) {
613 graphic->charset = ASCII;
617 } else if (_bcs_strncasecmp("GB2312", s, 6) == 0) {
620 graphic->charset = GB2312;
622 ei->gb2312 = graphic;
624 } else if (strncmp("94*", s, 3) == 0) {
625 graphic->charset = CS94;
626 } else if (strncmp("96*", s, 3) == 0) {
627 graphic->charset = CS96;
633 case '1': case '2': case '3':
634 graphic->length = (size_t)(*s - '0');
644 static const _citrus_prop_hint_t escape_hints[] = {
645 _CITRUS_PROP_HINT_STR("CH", &_citrus_HZ_parse_char),
646 _CITRUS_PROP_HINT_STR("GL", &_citrus_HZ_parse_graphic),
647 _CITRUS_PROP_HINT_STR("GR", &_citrus_HZ_parse_graphic),
648 _CITRUS_PROP_HINT_END
652 _citrus_HZ_parse_escape(void **context, const char *name, const char *s)
658 _DIAGASSERT(context != NULL);
659 _DIAGASSERT(name != NULL);
660 _DIAGASSERT(s != NULL);
662 ei = (_HZEncodingInfo *)*context;
663 escape = malloc(sizeof(*escape));
666 memset(escape, 0, sizeof(*escape));
667 if (strcmp("0", name) == 0) {
668 escape->set = E0SET(ei);
669 TAILQ_INSERT_TAIL(E0SET(ei), escape, entry);
670 } else if (strcmp("1", name) == 0) {
671 escape->set = E1SET(ei);
672 TAILQ_INSERT_TAIL(E1SET(ei), escape, entry);
677 p[0] = (void *)escape;
679 return _citrus_prop_parse_variable(
680 escape_hints, (void *)&p[0], s, strlen(s));
683 static const _citrus_prop_hint_t root_hints[] = {
684 _CITRUS_PROP_HINT_STR("0", &_citrus_HZ_parse_escape),
685 _CITRUS_PROP_HINT_STR("1", &_citrus_HZ_parse_escape),
686 _CITRUS_PROP_HINT_END
690 _citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei,
691 const void * __restrict var, size_t lenvar)
695 _DIAGASSERT(ei != NULL);
697 memset(ei, 0, sizeof(*ei));
698 TAILQ_INIT(E0SET(ei));
699 TAILQ_INIT(E1SET(ei));
700 errnum = _citrus_prop_parse_variable(
701 root_hints, (void *)ei, var, lenvar);
703 _citrus_HZ_encoding_module_uninit(ei);
707 /* ----------------------------------------------------------------------
708 * public interface for ctype
711 _CITRUS_CTYPE_DECLS(HZ);
712 _CITRUS_CTYPE_DEF_OPS(HZ);
714 #include "citrus_ctype_template.h"
716 /* ----------------------------------------------------------------------
717 * public interface for stdenc
720 _CITRUS_STDENC_DECLS(HZ);
721 _CITRUS_STDENC_DEF_OPS(HZ);
723 #include "citrus_stdenc_template.h"