Commit | Line | Data |
---|---|---|
4776d4e8 JM |
1 | /* |
2 | * Copyright 2013 Garrett D'Amore <garrett@damore.org> | |
3 | * Copyright 2010 Nexenta Systems, Inc. All rights reserved. | |
0d5acd74 JM |
4 | * Copyright (c) 2002-2004 Tim J. Robbins |
5 | * All rights reserved. | |
6 | * | |
7 | * Copyright (c) 2011 The FreeBSD Foundation | |
8 | * All rights reserved. | |
9 | * Portions of this software were developed by David Chisnall | |
10 | * under sponsorship from the FreeBSD Foundation. | |
11 | * | |
12 | * Redistribution and use in source and binary forms, with or without | |
13 | * modification, are permitted provided that the following conditions | |
14 | * are met: | |
15 | * 1. Redistributions of source code must retain the above copyright | |
16 | * notice, this list of conditions and the following disclaimer. | |
17 | * 2. Redistributions in binary form must reproduce the above copyright | |
18 | * notice, this list of conditions and the following disclaimer in the | |
19 | * documentation and/or other materials provided with the distribution. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |
22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
31 | * SUCH DAMAGE. | |
0d5acd74 | 32 | */ |
4776d4e8 | 33 | |
0d5acd74 JM |
34 | /* |
35 | * PRC National Standard GB 18030-2000 encoding of Chinese text. | |
36 | * | |
37 | * See gb18030(5) for details. | |
38 | */ | |
39 | ||
40 | #include <sys/param.h> | |
41 | ||
42 | #include <errno.h> | |
43 | #include <runetype.h> | |
44 | #include <stdlib.h> | |
45 | #include <string.h> | |
46 | #include <wchar.h> | |
47 | #include "mblocal.h" | |
48 | ||
49 | static size_t _GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict, | |
50 | size_t, mbstate_t * __restrict); | |
51 | static int _GB18030_mbsinit(const mbstate_t *); | |
52 | static size_t _GB18030_wcrtomb(char * __restrict, wchar_t, | |
53 | mbstate_t * __restrict); | |
4776d4e8 JM |
54 | static size_t _GB18030_mbsnrtowcs(wchar_t * __restrict, |
55 | const char ** __restrict, size_t, size_t, | |
56 | mbstate_t * __restrict); | |
57 | static size_t _GB18030_wcsnrtombs(char * __restrict, | |
58 | const wchar_t ** __restrict, size_t, size_t, | |
59 | mbstate_t * __restrict); | |
60 | ||
0d5acd74 JM |
61 | |
62 | typedef struct { | |
63 | int count; | |
64 | u_char bytes[4]; | |
65 | } _GB18030State; | |
66 | ||
67 | int | |
68 | _GB18030_init(struct xlocale_ctype *l, _RuneLocale *rl) | |
69 | { | |
70 | ||
71 | l->__mbrtowc = _GB18030_mbrtowc; | |
72 | l->__wcrtomb = _GB18030_wcrtomb; | |
73 | l->__mbsinit = _GB18030_mbsinit; | |
4776d4e8 JM |
74 | l->__mbsnrtowcs = _GB18030_mbsnrtowcs; |
75 | l->__wcsnrtombs = _GB18030_wcsnrtombs; | |
0d5acd74 JM |
76 | l->runes = rl; |
77 | l->__mb_cur_max = 4; | |
78 | l->__mb_sb_limit = 128; | |
79 | ||
80 | return (0); | |
81 | } | |
82 | ||
83 | static int | |
84 | _GB18030_mbsinit(const mbstate_t *ps) | |
85 | { | |
86 | ||
87 | return (ps == NULL || ((const _GB18030State *)ps)->count == 0); | |
88 | } | |
89 | ||
90 | static size_t | |
91 | _GB18030_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, | |
92 | size_t n, mbstate_t * __restrict ps) | |
93 | { | |
94 | _GB18030State *gs; | |
95 | wchar_t wch; | |
96 | int ch, len, ocount; | |
97 | size_t ncopy; | |
98 | ||
99 | gs = (_GB18030State *)ps; | |
100 | ||
101 | if (gs->count < 0 || gs->count > sizeof(gs->bytes)) { | |
102 | errno = EINVAL; | |
103 | return ((size_t)-1); | |
104 | } | |
105 | ||
106 | if (s == NULL) { | |
107 | s = ""; | |
108 | n = 1; | |
109 | pwc = NULL; | |
110 | } | |
111 | ||
112 | ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count); | |
4776d4e8 | 113 | (void) memcpy(gs->bytes + gs->count, s, ncopy); |
0d5acd74 JM |
114 | ocount = gs->count; |
115 | gs->count += ncopy; | |
116 | s = (char *)gs->bytes; | |
117 | n = gs->count; | |
118 | ||
119 | if (n == 0) | |
120 | /* Incomplete multibyte sequence */ | |
121 | return ((size_t)-2); | |
122 | ||
123 | /* | |
124 | * Single byte: [00-7f] | |
125 | * Two byte: [81-fe][40-7e,80-fe] | |
126 | * Four byte: [81-fe][30-39][81-fe][30-39] | |
127 | */ | |
128 | ch = (unsigned char)*s++; | |
129 | if (ch <= 0x7f) { | |
130 | len = 1; | |
131 | wch = ch; | |
132 | } else if (ch >= 0x81 && ch <= 0xfe) { | |
133 | wch = ch; | |
134 | if (n < 2) | |
135 | return ((size_t)-2); | |
136 | ch = (unsigned char)*s++; | |
137 | if ((ch >= 0x40 && ch <= 0x7e) || (ch >= 0x80 && ch <= 0xfe)) { | |
138 | wch = (wch << 8) | ch; | |
139 | len = 2; | |
140 | } else if (ch >= 0x30 && ch <= 0x39) { | |
141 | /* | |
142 | * Strip high bit off the wide character we will | |
143 | * eventually output so that it is positive when | |
144 | * cast to wint_t on 32-bit twos-complement machines. | |
145 | */ | |
146 | wch = ((wch & 0x7f) << 8) | ch; | |
147 | if (n < 3) | |
148 | return ((size_t)-2); | |
149 | ch = (unsigned char)*s++; | |
150 | if (ch < 0x81 || ch > 0xfe) | |
151 | goto ilseq; | |
152 | wch = (wch << 8) | ch; | |
153 | if (n < 4) | |
154 | return ((size_t)-2); | |
155 | ch = (unsigned char)*s++; | |
156 | if (ch < 0x30 || ch > 0x39) | |
157 | goto ilseq; | |
158 | wch = (wch << 8) | ch; | |
159 | len = 4; | |
160 | } else | |
161 | goto ilseq; | |
162 | } else | |
163 | goto ilseq; | |
164 | ||
165 | if (pwc != NULL) | |
166 | *pwc = wch; | |
167 | gs->count = 0; | |
168 | return (wch == L'\0' ? 0 : len - ocount); | |
169 | ilseq: | |
170 | errno = EILSEQ; | |
171 | return ((size_t)-1); | |
172 | } | |
173 | ||
174 | static size_t | |
175 | _GB18030_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) | |
176 | { | |
177 | _GB18030State *gs; | |
178 | size_t len; | |
179 | int c; | |
180 | ||
181 | gs = (_GB18030State *)ps; | |
182 | ||
183 | if (gs->count != 0) { | |
184 | errno = EINVAL; | |
185 | return ((size_t)-1); | |
186 | } | |
187 | ||
188 | if (s == NULL) | |
189 | /* Reset to initial shift state (no-op) */ | |
190 | return (1); | |
191 | if ((wc & ~0x7fffffff) != 0) | |
192 | goto ilseq; | |
193 | if (wc & 0x7f000000) { | |
194 | /* Replace high bit that mbrtowc() removed. */ | |
195 | wc |= 0x80000000; | |
196 | c = (wc >> 24) & 0xff; | |
197 | if (c < 0x81 || c > 0xfe) | |
198 | goto ilseq; | |
199 | *s++ = c; | |
200 | c = (wc >> 16) & 0xff; | |
201 | if (c < 0x30 || c > 0x39) | |
202 | goto ilseq; | |
203 | *s++ = c; | |
204 | c = (wc >> 8) & 0xff; | |
205 | if (c < 0x81 || c > 0xfe) | |
206 | goto ilseq; | |
207 | *s++ = c; | |
208 | c = wc & 0xff; | |
209 | if (c < 0x30 || c > 0x39) | |
210 | goto ilseq; | |
211 | *s++ = c; | |
212 | len = 4; | |
213 | } else if (wc & 0x00ff0000) | |
214 | goto ilseq; | |
215 | else if (wc & 0x0000ff00) { | |
216 | c = (wc >> 8) & 0xff; | |
217 | if (c < 0x81 || c > 0xfe) | |
218 | goto ilseq; | |
219 | *s++ = c; | |
220 | c = wc & 0xff; | |
221 | if (c < 0x40 || c == 0x7f || c == 0xff) | |
222 | goto ilseq; | |
223 | *s++ = c; | |
224 | len = 2; | |
225 | } else if (wc <= 0x7f) { | |
226 | *s++ = wc; | |
227 | len = 1; | |
228 | } else | |
229 | goto ilseq; | |
230 | ||
231 | return (len); | |
232 | ilseq: | |
233 | errno = EILSEQ; | |
234 | return ((size_t)-1); | |
235 | } | |
4776d4e8 JM |
236 | |
237 | static size_t | |
238 | _GB18030_mbsnrtowcs(wchar_t * __restrict dst, | |
239 | const char ** __restrict src, size_t nms, size_t len, | |
240 | mbstate_t * __restrict ps) | |
241 | { | |
242 | return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GB18030_mbrtowc)); | |
243 | } | |
244 | ||
245 | static size_t | |
246 | _GB18030_wcsnrtombs(char * __restrict dst, | |
247 | const wchar_t ** __restrict src, size_t nwc, size_t len, | |
248 | mbstate_t * __restrict ps) | |
249 | { | |
250 | return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GB18030_wcrtomb)); | |
251 | } |