2 * Copyright 2013 Garrett D'Amore <garrett@damore.org>
3 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
4 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
6 * The Regents of the University of California. All rights reserved.
8 * This code is derived from software contributed to Berkeley by
9 * Paul Borman at Krystal Technologies.
11 * Copyright (c) 2011 The FreeBSD Foundation
12 * All rights reserved.
13 * Portions of this software were developed by David Chisnall
14 * under sponsorship from the FreeBSD Foundation.
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * @(#)euc.c 8.1 (Berkeley) 6/4/93
43 #include <sys/param.h>
53 static size_t _EUC_mbrtowc_impl(wchar_t * __restrict, const char * __restrict,
54 size_t, mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
55 static size_t _EUC_wcrtomb_impl(char * __restrict, wchar_t,
56 mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
58 static size_t _EUC_CN_mbrtowc(wchar_t * __restrict, const char * __restrict,
59 size_t, mbstate_t * __restrict);
60 static size_t _EUC_JP_mbrtowc(wchar_t * __restrict, const char * __restrict,
61 size_t, mbstate_t * __restrict);
62 static size_t _EUC_KR_mbrtowc(wchar_t * __restrict, const char * __restrict,
63 size_t, mbstate_t * __restrict);
64 static size_t _EUC_TW_mbrtowc(wchar_t * __restrict, const char * __restrict,
65 size_t, mbstate_t * __restrict);
67 static size_t _EUC_CN_wcrtomb(char * __restrict, wchar_t,
68 mbstate_t * __restrict);
69 static size_t _EUC_JP_wcrtomb(char * __restrict, wchar_t,
70 mbstate_t * __restrict);
71 static size_t _EUC_KR_wcrtomb(char * __restrict, wchar_t,
72 mbstate_t * __restrict);
73 static size_t _EUC_TW_wcrtomb(char * __restrict, wchar_t,
74 mbstate_t * __restrict);
76 static size_t _EUC_CN_mbsnrtowcs(wchar_t * __restrict,
77 const char ** __restrict, size_t, size_t,
78 mbstate_t * __restrict);
79 static size_t _EUC_JP_mbsnrtowcs(wchar_t * __restrict,
80 const char ** __restrict, size_t, size_t,
81 mbstate_t * __restrict);
82 static size_t _EUC_KR_mbsnrtowcs(wchar_t * __restrict,
83 const char ** __restrict, size_t, size_t,
84 mbstate_t * __restrict);
85 static size_t _EUC_TW_mbsnrtowcs(wchar_t * __restrict,
86 const char ** __restrict, size_t, size_t,
87 mbstate_t * __restrict);
89 static size_t _EUC_CN_wcsnrtombs(char * __restrict,
90 const wchar_t ** __restrict, size_t, size_t,
91 mbstate_t * __restrict);
92 static size_t _EUC_JP_wcsnrtombs(char * __restrict,
93 const wchar_t ** __restrict, size_t, size_t,
94 mbstate_t * __restrict);
95 static size_t _EUC_KR_wcsnrtombs(char * __restrict,
96 const wchar_t ** __restrict, size_t, size_t,
97 mbstate_t * __restrict);
98 static size_t _EUC_TW_wcsnrtombs(char * __restrict,
99 const wchar_t ** __restrict, size_t, size_t,
100 mbstate_t * __restrict);
102 static int _EUC_mbsinit(const mbstate_t *);
111 _EUC_mbsinit(const mbstate_t *ps)
114 return (ps == NULL || ((const _EucState *)ps)->want == 0);
118 * EUC-CN uses CS0, CS1 and CS2 (4 bytes).
121 _EUC_CN_init(struct xlocale_ctype *l, _RuneLocale *rl)
123 l->__mbrtowc = _EUC_CN_mbrtowc;
124 l->__wcrtomb = _EUC_CN_wcrtomb;
125 l->__mbsnrtowcs = _EUC_CN_mbsnrtowcs;
126 l->__wcsnrtombs = _EUC_CN_wcsnrtombs;
127 l->__mbsinit = _EUC_mbsinit;
131 l->__mb_sb_limit = 256;
136 _EUC_CN_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
137 size_t n, mbstate_t * __restrict ps)
139 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
143 _EUC_CN_mbsnrtowcs(wchar_t * __restrict dst,
144 const char ** __restrict src,
145 size_t nms, size_t len, mbstate_t * __restrict ps)
147 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc));
151 _EUC_CN_wcrtomb(char * __restrict s, wchar_t wc,
152 mbstate_t * __restrict ps)
154 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
158 _EUC_CN_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
159 size_t nwc, size_t len, mbstate_t * __restrict ps)
161 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb));
165 * EUC-KR uses only CS0 and CS1.
168 _EUC_KR_init(struct xlocale_ctype *l, _RuneLocale *rl)
170 l->__mbrtowc = _EUC_KR_mbrtowc;
171 l->__wcrtomb = _EUC_KR_wcrtomb;
172 l->__mbsnrtowcs = _EUC_KR_mbsnrtowcs;
173 l->__wcsnrtombs = _EUC_KR_wcsnrtombs;
174 l->__mbsinit = _EUC_mbsinit;
178 l->__mb_sb_limit = 128;
183 _EUC_KR_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
184 size_t n, mbstate_t * __restrict ps)
186 return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0));
190 _EUC_KR_mbsnrtowcs(wchar_t * __restrict dst,
191 const char ** __restrict src,
192 size_t nms, size_t len, mbstate_t * __restrict ps)
194 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc));
198 _EUC_KR_wcrtomb(char * __restrict s, wchar_t wc,
199 mbstate_t * __restrict ps)
201 return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0));
205 _EUC_KR_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
206 size_t nwc, size_t len, mbstate_t * __restrict ps)
208 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb));
212 * EUC-JP uses CS0, CS1, CS2, and CS3.
215 _EUC_JP_init(struct xlocale_ctype *l, _RuneLocale *rl)
217 l->__mbrtowc = _EUC_JP_mbrtowc;
218 l->__wcrtomb = _EUC_JP_wcrtomb;
219 l->__mbsnrtowcs = _EUC_JP_mbsnrtowcs;
220 l->__wcsnrtombs = _EUC_JP_wcsnrtombs;
221 l->__mbsinit = _EUC_mbsinit;
225 l->__mb_sb_limit = 196;
230 _EUC_JP_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
231 size_t n, mbstate_t * __restrict ps)
233 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3));
237 _EUC_JP_mbsnrtowcs(wchar_t * __restrict dst,
238 const char ** __restrict src,
239 size_t nms, size_t len, mbstate_t * __restrict ps)
241 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc));
245 _EUC_JP_wcrtomb(char * __restrict s, wchar_t wc,
246 mbstate_t * __restrict ps)
248 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3));
252 _EUC_JP_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
253 size_t nwc, size_t len, mbstate_t * __restrict ps)
255 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb));
259 * EUC-TW uses CS0, CS1, and CS2.
262 _EUC_TW_init(struct xlocale_ctype *l, _RuneLocale *rl)
264 l->__mbrtowc = _EUC_TW_mbrtowc;
265 l->__wcrtomb = _EUC_TW_wcrtomb;
266 l->__mbsnrtowcs = _EUC_TW_mbsnrtowcs;
267 l->__wcsnrtombs = _EUC_TW_wcsnrtombs;
268 l->__mbsinit = _EUC_mbsinit;
272 l->__mb_sb_limit = 256;
277 _EUC_TW_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
278 size_t n, mbstate_t * __restrict ps)
280 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
284 _EUC_TW_mbsnrtowcs(wchar_t * __restrict dst,
285 const char ** __restrict src,
286 size_t nms, size_t len, mbstate_t * __restrict ps)
288 return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc));
292 _EUC_TW_wcrtomb(char * __restrict s, wchar_t wc,
293 mbstate_t * __restrict ps)
295 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
299 _EUC_TW_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
300 size_t nwc, size_t len, mbstate_t * __restrict ps)
302 return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb));
310 _EUC_mbrtowc_impl(wchar_t * __restrict pwc, const char * __restrict s,
311 size_t n, mbstate_t * __restrict ps,
312 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
317 unsigned char ch, chs;
319 es = (_EucState *)ps;
321 if (es->want < 0 || es->want > MB_CUR_MAX) {
333 /* Incomplete multibyte sequence */
337 /* Fast path for plain ASCII (CS0) */
338 if (((ch = (unsigned char)*s) & 0x80) == 0) {
341 return (ch != '\0' ? 1 : 0);
347 } else if (ch == cs2) {
349 } else if (ch == cs3) {
364 for (i = 0; i < MIN(want, n); i++) {
371 /* Incomplete multibyte sequence */
380 return (wc == L'\0' ? 0 : want);
384 _EUC_wcrtomb_impl(char * __restrict s, wchar_t wc,
385 mbstate_t * __restrict ps,
386 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
392 es = (_EucState *)ps;
400 /* Reset to initial shift state (no-op) */
403 if ((wc & ~0x7f) == 0) {
404 /* Fast path for plain ASCII (CS0) */
409 /* Determine the "length" */
410 if ((unsigned)wc > 0xffffff) {
412 } else if ((unsigned)wc > 0xffff) {
414 } else if ((unsigned)wc > 0xff) {
420 if (len > MB_CUR_MAX) {
425 /* This first check excludes CS1, which is implicitly valid. */
426 if ((wc < 0xa100) || (wc > 0xffff)) {
427 /* Check for valid CS2 or CS3 */
428 nm = (wc >> ((len - 1) * 8));
430 if (len != cs2width) {
434 } else if (nm == cs3) {
435 if (len != cs3width) {
445 /* Stash the bytes, least significant last */
446 for (i = len - 1; i >= 0; i--) {