Commit | Line | Data |
---|---|---|
c4074784 AP |
1 | /*- |
2 | * Copyright (c) 2003 Ryuichiro Imura | |
3 | * All rights reserved. | |
4 | * | |
5 | * Redistribution and use in source and binary forms, with or without | |
6 | * modification, are permitted provided that the following conditions | |
7 | * are met: | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * 2. Redistributions in binary form must reproduce the above copyright | |
11 | * notice, this list of conditions and the following disclaimer in the | |
12 | * documentation and/or other materials provided with the distribution. | |
13 | * | |
14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |
15 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |
18 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
19 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
20 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
21 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
22 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
23 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
24 | * SUCH DAMAGE. | |
25 | * | |
8527a700 | 26 | * $FreeBSD: head/lib/libkiconv/quirks.c 298896 2016-05-01 19:37:33Z pfg $ |
c4074784 AP |
27 | */ |
28 | ||
0d5acd74 JM |
29 | /* |
30 | * kiconv(3) requires shared linked, and reduce module size | |
31 | * when statically linked. | |
32 | */ | |
33 | ||
34 | #ifdef PIC | |
35 | ||
c4074784 AP |
36 | /* |
37 | * Why do we need quirks? | |
38 | * Since each vendors has their own Unicode mapping rules, | |
39 | * we need some quirks until iconv(3) supports them. | |
40 | * We can define Microsoft mappings here. | |
0d5acd74 | 41 | * |
c4074784 AP |
42 | * For example, the eucJP and Unocode mapping rule is based on |
43 | * the JIS standard. Since Microsoft uses cp932 for Unicode mapping | |
8527a700 | 44 | * which is not truly based on the JIS standard, reading a file |
c4074784 AP |
45 | * system created by Microsoft Windows family using eucJP/Unicode |
46 | * mapping rule will cause a problem. That's why we define eucJP-ms here. | |
8527a700 | 47 | * The eucJP-ms has been defined by The Open Group Japan Vendor Council. |
c4074784 AP |
48 | * |
49 | * Well, Apple Mac OS also has their own Unicode mappings, | |
50 | * but we won't require these quirks here, because HFS doesn't have | |
51 | * Unicode and HFS+ has decomposed Unicode which can not be | |
52 | * handled by this xlat16 converter. | |
53 | */ | |
54 | ||
55 | #include <sys/types.h> | |
56 | #include <sys/iconv.h> | |
57 | ||
58 | #include <stdio.h> | |
59 | #include <string.h> | |
60 | ||
61 | #include "quirks.h" | |
62 | ||
63 | /* | |
64 | * All lists of quirk character set | |
65 | */ | |
66 | static struct { | |
67 | int vendor; /* reserved for non MS mapping */ | |
68 | const char *base_codeset, *quirk_codeset; | |
69 | } quirk_list[] = { | |
70 | { KICONV_VENDOR_MICSFT, "eucJP", "eucJP-ms" }, | |
71 | { KICONV_VENDOR_MICSFT, "EUC-JP", "eucJP-ms" }, | |
72 | { KICONV_VENDOR_MICSFT, "SJIS", "SJIS-ms" }, | |
73 | { KICONV_VENDOR_MICSFT, "Shift_JIS", "SJIS-ms" }, | |
74 | { KICONV_VENDOR_MICSFT, "Big5", "Big5-ms" } | |
75 | }; | |
76 | ||
77 | /* | |
78 | * The character list to replace for Japanese MS-Windows. | |
79 | */ | |
80 | static struct quirk_replace_list quirk_jis_cp932[] = { | |
81 | { 0x00a2, 0xffe0 }, /* Cent Sign, Fullwidth Cent Sign */ | |
82 | { 0x00a3, 0xffe1 }, /* Pound Sign, Fullwidth Pound Sign */ | |
83 | { 0x00ac, 0xffe2 }, /* Not Sign, Fullwidth Not Sign */ | |
84 | { 0x2016, 0x2225 }, /* Double Vertical Line, Parallel To */ | |
85 | { 0x203e, 0x007e }, /* Overline, Tilde */ | |
86 | { 0x2212, 0xff0d }, /* Minus Sign, Fullwidth Hyphenminus */ | |
87 | { 0x301c, 0xff5e } /* Wave Dash, Fullwidth Tilde */ | |
88 | }; | |
89 | ||
90 | /* | |
91 | * All entries of quirks | |
92 | */ | |
93 | #define NumOf(n) (sizeof((n)) / sizeof((n)[0])) | |
94 | static struct { | |
95 | const char *quirk_codeset, *iconv_codeset, *pair_codeset; | |
96 | struct quirk_replace_list (*replace_list)[]; | |
97 | size_t num_of_replaces; | |
98 | } quirk_table[] = { | |
99 | { | |
100 | "eucJP-ms", "eucJP", ENCODING_UNICODE, | |
101 | (struct quirk_replace_list (*)[])&quirk_jis_cp932, | |
102 | NumOf(quirk_jis_cp932) | |
103 | }, | |
104 | { | |
105 | "SJIS-ms", "CP932", ENCODING_UNICODE, | |
106 | /* XXX - quirk_replace_list should be NULL */ | |
107 | (struct quirk_replace_list (*)[])&quirk_jis_cp932, | |
108 | NumOf(quirk_jis_cp932) | |
109 | }, | |
110 | { | |
111 | "Big5-ms", "CP950", ENCODING_UNICODE, | |
112 | NULL, 0 | |
113 | } | |
114 | }; | |
115 | ||
116 | ||
117 | const char * | |
118 | kiconv_quirkcs(const char* base, int vendor) | |
119 | { | |
120 | size_t i; | |
121 | ||
122 | /* | |
123 | * We should compare codeset names ignoring case here, | |
124 | * so that quirk could be used for all of the user input | |
125 | * patterns. | |
126 | */ | |
127 | for (i = 0; i < NumOf(quirk_list); i++) | |
128 | if (quirk_list[i].vendor == vendor && | |
129 | strcasecmp(quirk_list[i].base_codeset, base) == 0) | |
130 | return (quirk_list[i].quirk_codeset); | |
131 | ||
132 | return (base); | |
133 | } | |
134 | ||
135 | /* | |
136 | * Internal Functions | |
137 | */ | |
138 | const char * | |
139 | search_quirk(const char *given_codeset, | |
140 | const char *pair_codeset, | |
141 | struct quirk_replace_list **replace_list, | |
142 | size_t *num_of_replaces) | |
143 | { | |
144 | size_t i; | |
145 | ||
146 | *replace_list = NULL; | |
147 | *num_of_replaces = 0; | |
148 | for (i = 0; i < NumOf(quirk_table); i++) | |
149 | if (strcmp(quirk_table[i].quirk_codeset, given_codeset) == 0) { | |
150 | if (strcmp(quirk_table[i].pair_codeset, pair_codeset) == 0) { | |
151 | *replace_list = *quirk_table[i].replace_list; | |
152 | *num_of_replaces = quirk_table[i].num_of_replaces; | |
153 | } | |
154 | return (quirk_table[i].iconv_codeset); | |
155 | } | |
156 | ||
157 | return (given_codeset); | |
158 | } | |
159 | ||
160 | uint16_t | |
161 | quirk_vendor2unix(uint16_t c, struct quirk_replace_list *replace_list, size_t num) | |
162 | { | |
163 | size_t i; | |
164 | ||
165 | for (i = 0; i < num; i++) | |
166 | if (replace_list[i].vendor_code == c) | |
167 | return (replace_list[i].standard_code); | |
168 | ||
169 | return (c); | |
170 | } | |
171 | ||
172 | uint16_t | |
173 | quirk_unix2vendor(uint16_t c, struct quirk_replace_list *replace_list, size_t num) | |
174 | { | |
175 | size_t i; | |
176 | ||
177 | for (i = 0; i < num; i++) | |
178 | if (replace_list[i].standard_code == c) | |
179 | return (replace_list[i].vendor_code); | |
180 | ||
181 | return (c); | |
182 | } | |
0d5acd74 JM |
183 | |
184 | #else /* statically linked */ | |
185 | ||
186 | #include <sys/types.h> | |
187 | #include <sys/iconv.h> | |
188 | ||
189 | const char * | |
190 | kiconv_quirkcs(const char* base __unused, int vendor __unused) | |
191 | { | |
192 | ||
193 | return (base); | |
194 | } | |
195 | ||
196 | #endif /* PIC */ |