Commit | Line | Data |
---|---|---|
f28e3132 | 1 | #!/usr/local/bin/perl -wC |
252345eb JM |
2 | |
3 | use strict; | |
f28e3132 | 4 | use File::Copy; |
252345eb JM |
5 | use XML::Parser; |
6 | use Tie::IxHash; | |
7 | use Data::Dumper; | |
8 | use Getopt::Long; | |
9 | use Digest::SHA qw(sha1_hex); | |
10 | require "charmaps.pm"; | |
11 | ||
12 | ||
13 | if ($#ARGV < 2) { | |
14 | print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n"; | |
15 | exit(1); | |
16 | } | |
17 | ||
18 | my $DEFENCODING = "UTF-8"; | |
19 | my @filter = (); | |
20 | ||
21 | my $CLDRDIR = undef; | |
22 | my $UNIDATADIR = undef; | |
23 | my $ETCDIR = undef; | |
24 | my $TYPE = undef; | |
25 | my $doonly = undef; | |
26 | ||
27 | my $result = GetOptions ( | |
28 | "cldr=s" => \$CLDRDIR, | |
29 | "unidata=s" => \$UNIDATADIR, | |
30 | "etc=s" => \$ETCDIR, | |
31 | "type=s" => \$TYPE, | |
32 | "lc=s" => \$doonly | |
33 | ); | |
34 | ||
35 | my %convertors = (); | |
36 | ||
37 | my %ucd = (); | |
38 | my %values = (); | |
39 | my %hashtable = (); | |
40 | my %languages = (); | |
41 | my %translations = (); | |
42 | my %encodings = (); | |
43 | my %alternativemonths = (); | |
44 | get_languages(); | |
45 | ||
46 | my %utf8map = (); | |
47 | my %utf8aliases = (); | |
48 | get_unidata($UNIDATADIR); | |
49 | get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm"); | |
50 | get_encodings("$ETCDIR/charmaps"); | |
51 | ||
52 | my %keys = (); | |
53 | tie(%keys, "Tie::IxHash"); | |
54 | tie(%hashtable, "Tie::IxHash"); | |
55 | ||
56 | my %FILESNAMES = ( | |
57 | "monetdef" => "LC_MONETARY", | |
58 | "timedef" => "LC_TIME", | |
59 | "msgdef" => "LC_MESSAGES", | |
f28e3132 | 60 | "numericdef" => "LC_NUMERIC", |
6e46cba7 | 61 | "colldef" => "LC_COLLATE", |
ce02c398 | 62 | "ctypedef" => "LC_CTYPE" |
252345eb JM |
63 | ); |
64 | ||
65 | my %callback = ( | |
66 | mdorder => \&callback_mdorder, | |
67 | altmon => \&callback_altmon, | |
ddddc53a | 68 | cformat => \&callback_cformat, |
fb572e12 | 69 | cbabmon => \&callback_abmon, |
252345eb JM |
70 | data => undef, |
71 | ); | |
72 | ||
73 | my %DESC = ( | |
74 | ||
75 | # numericdef | |
76 | "decimal_point" => "decimal_point", | |
77 | "thousands_sep" => "thousands_sep", | |
78 | "grouping" => "grouping", | |
79 | ||
80 | # monetdef | |
81 | "int_curr_symbol" => "int_curr_symbol (last character always " . | |
82 | "SPACE)", | |
83 | "currency_symbol" => "currency_symbol", | |
84 | "mon_decimal_point" => "mon_decimal_point", | |
85 | "mon_thousands_sep" => "mon_thousands_sep", | |
86 | "mon_grouping" => "mon_grouping", | |
87 | "positive_sign" => "positive_sign", | |
88 | "negative_sign" => "negative_sign", | |
89 | "int_frac_digits" => "int_frac_digits", | |
90 | "frac_digits" => "frac_digits", | |
91 | "p_cs_precedes" => "p_cs_precedes", | |
92 | "p_sep_by_space" => "p_sep_by_space", | |
93 | "n_cs_precedes" => "n_cs_precedes", | |
94 | "n_sep_by_space" => "n_sep_by_space", | |
95 | "p_sign_posn" => "p_sign_posn", | |
96 | "n_sign_posn" => "n_sign_posn", | |
97 | ||
98 | # msgdef | |
99 | "yesexpr" => "yesexpr", | |
100 | "noexpr" => "noexpr", | |
101 | "yesstr" => "yesstr", | |
102 | "nostr" => "nostr", | |
103 | ||
104 | # timedef | |
105 | "abmon" => "Short month names", | |
106 | "mon" => "Long month names (as in a date)", | |
107 | "abday" => "Short weekday names", | |
108 | "day" => "Long weekday names", | |
109 | "t_fmt" => "X_fmt", | |
110 | "d_fmt" => "x_fmt", | |
ddddc53a | 111 | "c_fmt" => "c_fmt", |
252345eb JM |
112 | "am_pm" => "AM/PM", |
113 | "d_t_fmt" => "date_fmt", | |
114 | "altmon" => "Long month names (without case ending)", | |
115 | "md_order" => "md_order", | |
116 | "t_fmt_ampm" => "ampm_fmt", | |
117 | ); | |
118 | ||
f28e3132 JM |
119 | if ($TYPE eq "colldef") { |
120 | transform_collation(); | |
121 | make_makefile(); | |
122 | } | |
123 | ||
ce02c398 JM |
124 | if ($TYPE eq "ctypedef") { |
125 | transform_ctypes(); | |
126 | make_makefile(); | |
127 | } | |
128 | ||
252345eb JM |
129 | if ($TYPE eq "numericdef") { |
130 | %keys = ( | |
131 | "decimal_point" => "s", | |
132 | "thousands_sep" => "s", | |
133 | "grouping" => "ai", | |
134 | ); | |
135 | get_fields(); | |
136 | print_fields(); | |
137 | make_makefile(); | |
138 | } | |
139 | ||
140 | if ($TYPE eq "monetdef") { | |
141 | %keys = ( | |
142 | "int_curr_symbol" => "s", | |
143 | "currency_symbol" => "s", | |
144 | "mon_decimal_point" => "s", | |
145 | "mon_thousands_sep" => "s", | |
146 | "mon_grouping" => "ai", | |
147 | "positive_sign" => "s", | |
148 | "negative_sign" => "s", | |
149 | "int_frac_digits" => "i", | |
150 | "frac_digits" => "i", | |
151 | "p_cs_precedes" => "i", | |
152 | "p_sep_by_space" => "i", | |
153 | "n_cs_precedes" => "i", | |
154 | "n_sep_by_space" => "i", | |
155 | "p_sign_posn" => "i", | |
156 | "n_sign_posn" => "i" | |
157 | ); | |
158 | get_fields(); | |
159 | print_fields(); | |
160 | make_makefile(); | |
161 | } | |
162 | ||
163 | if ($TYPE eq "msgdef") { | |
164 | %keys = ( | |
165 | "yesexpr" => "s", | |
166 | "noexpr" => "s", | |
167 | "yesstr" => "s", | |
168 | "nostr" => "s" | |
169 | ); | |
170 | get_fields(); | |
171 | print_fields(); | |
172 | make_makefile(); | |
173 | } | |
174 | ||
175 | if ($TYPE eq "timedef") { | |
176 | %keys = ( | |
fb572e12 | 177 | "abmon" => "<cbabmon<abmon<as", |
252345eb JM |
178 | "mon" => "as", |
179 | "abday" => "as", | |
180 | "day" => "as", | |
181 | "t_fmt" => "s", | |
182 | "d_fmt" => "s", | |
ddddc53a | 183 | "c_fmt" => "<cformat<d_t_fmt<s", |
252345eb JM |
184 | "am_pm" => "as", |
185 | "d_fmt" => "s", | |
186 | "d_t_fmt" => "s", | |
187 | "altmon" => "<altmon<mon<as", | |
188 | "md_order" => "<mdorder<d_fmt<s", | |
189 | "t_fmt_ampm" => "s", | |
190 | ); | |
191 | get_fields(); | |
192 | print_fields(); | |
193 | make_makefile(); | |
194 | } | |
195 | ||
ddddc53a JM |
196 | sub callback_cformat { |
197 | my $s = shift; | |
198 | $s =~ s/ %Z//; | |
199 | $s =~ s/ %z//; | |
200 | return $s; | |
201 | }; | |
202 | ||
252345eb JM |
203 | sub callback_mdorder { |
204 | my $s = shift; | |
205 | return undef if (!defined $s); | |
206 | $s =~ s/[^dm]//g; | |
207 | return $s; | |
208 | }; | |
209 | ||
210 | sub callback_altmon { | |
211 | # if the language/country is known in %alternative months then | |
212 | # return that, otherwise repeat mon | |
213 | my $s = shift; | |
214 | ||
215 | if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) { | |
bdbc34a9 JM |
216 | my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}}); |
217 | my @cleaned; | |
218 | foreach (@altnames) | |
219 | { | |
220 | $_ =~ s/^\s+//; | |
221 | $_ =~ s/\s+$//; | |
222 | push @cleaned, $_; | |
223 | } | |
224 | return join(";",@cleaned); | |
252345eb JM |
225 | } |
226 | ||
227 | return $s; | |
228 | } | |
229 | ||
fb572e12 JM |
230 | sub callback_abmon { |
231 | # for specified CJK locales, pad result with a space to enable | |
232 | # columns to line up (style established in FreeBSD in 2001) | |
233 | my $s = shift; | |
234 | my $nl = $callback{data}{l} . "_" . $callback{data}{c}; | |
235 | ||
236 | if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' || | |
237 | $nl eq 'zh_HK' || $nl eq 'zh_TW') { | |
238 | my @monthnames = split(";", $s); | |
239 | my @cleaned; | |
240 | foreach (@monthnames) | |
241 | { | |
242 | if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ || | |
243 | ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/)) | |
244 | { | |
245 | $_ =~ s/^"/"<space>/; | |
246 | } | |
247 | push @cleaned, $_; | |
248 | } | |
249 | return join(";",@cleaned); | |
250 | } | |
251 | return $s; | |
252 | } | |
253 | ||
252345eb JM |
254 | ############################ |
255 | ||
256 | sub get_unidata { | |
257 | my $directory = shift; | |
258 | ||
259 | open(FIN, "$directory/UnicodeData.txt") | |
260 | or die("Cannot open $directory/UnicodeData.txt");; | |
261 | my @lines = <FIN>; | |
262 | chomp(@lines); | |
263 | close(FIN); | |
264 | ||
265 | foreach my $l (@lines) { | |
266 | my @a = split(/;/, $l); | |
267 | ||
268 | $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name | |
269 | $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code | |
270 | } | |
271 | } | |
272 | ||
273 | sub get_utf8map { | |
274 | my $file = shift; | |
275 | ||
276 | open(FIN, $file); | |
277 | my @lines = <FIN>; | |
278 | close(FIN); | |
279 | chomp(@lines); | |
280 | ||
281 | my $prev_k = undef; | |
282 | my $prev_v = ""; | |
283 | my $incharmap = 0; | |
284 | foreach my $l (@lines) { | |
285 | $l =~ s/\r//; | |
286 | next if ($l =~ /^\#/); | |
287 | next if ($l eq ""); | |
288 | ||
289 | if ($l eq "CHARMAP") { | |
290 | $incharmap = 1; | |
291 | next; | |
292 | } | |
293 | ||
294 | next if (!$incharmap); | |
295 | last if ($l eq "END CHARMAP"); | |
296 | ||
297 | $l =~ /^<([^\s]+)>\s+(.*)/; | |
298 | my $k = $1; | |
299 | my $v = $2; | |
300 | $k =~ s/_/ /g; # unicode char string | |
301 | $v =~ s/\\x//g; # UTF-8 char code | |
302 | $utf8map{$k} = $v; | |
303 | ||
304 | $utf8aliases{$k} = $prev_k if ($prev_v eq $v); | |
305 | ||
306 | $prev_v = $v; | |
307 | $prev_k = $k; | |
308 | } | |
309 | } | |
310 | ||
311 | sub get_encodings { | |
312 | my $dir = shift; | |
313 | foreach my $e (sort(keys(%encodings))) { | |
314 | if (!open(FIN, "$dir/$e.TXT")) { | |
315 | print "Cannot open charmap for $e\n"; | |
316 | next; | |
317 | ||
318 | } | |
319 | $encodings{$e} = 1; | |
320 | my @lines = <FIN>; | |
321 | close(FIN); | |
322 | chomp(@lines); | |
323 | foreach my $l (@lines) { | |
324 | $l =~ s/\r//; | |
325 | next if ($l =~ /^\#/); | |
326 | next if ($l eq ""); | |
327 | ||
328 | my @a = split(" ", $l); | |
329 | next if ($#a < 1); | |
330 | $a[0] =~ s/^0[xX]//; # local char code | |
331 | $a[1] =~ s/^0[xX]//; # unicode char code | |
332 | $convertors{$e}{uc($a[1])} = uc($a[0]); | |
333 | } | |
334 | } | |
335 | } | |
336 | ||
337 | sub get_languages { | |
338 | my %data = get_xmldata($ETCDIR); | |
339 | %languages = %{$data{L}}; | |
340 | %translations = %{$data{T}}; | |
341 | %alternativemonths = %{$data{AM}}; | |
342 | %encodings = %{$data{E}}; | |
343 | ||
344 | return if (!defined $doonly); | |
345 | ||
346 | my @a = split(/_/, $doonly); | |
347 | if ($#a == 1) { | |
348 | $filter[0] = $a[0]; | |
349 | $filter[1] = "x"; | |
350 | $filter[2] = $a[1]; | |
351 | } elsif ($#a == 2) { | |
352 | $filter[0] = $a[0]; | |
353 | $filter[1] = $a[1]; | |
354 | $filter[2] = $a[2]; | |
355 | } | |
356 | ||
357 | print Dumper(@filter); | |
358 | return; | |
359 | } | |
360 | ||
ce02c398 JM |
361 | sub transform_ctypes { |
362 | foreach my $l (sort keys(%languages)) { | |
363 | foreach my $f (sort keys(%{$languages{$l}})) { | |
364 | foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { | |
365 | next if ($#filter == 2 && ($filter[0] ne $l | |
366 | || $filter[1] ne $f || $filter[2] ne $c)); | |
367 | next if (defined $languages{$l}{$f}{definitions} | |
368 | && $languages{$l}{$f}{definitions} !~ /$TYPE/); | |
369 | $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread | |
370 | my $file; | |
371 | $file = $l . "_"; | |
372 | $file .= $f . "_" if ($f ne "x"); | |
373 | $file .= $c; | |
374 | my $actfile = $file; | |
375 | ||
236ac5fc | 376 | my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src"; |
ce02c398 | 377 | if (! -f $filename) { |
236ac5fc | 378 | print STDERR "Cannot open $filename\n"; |
ce02c398 JM |
379 | next; |
380 | } | |
381 | open(FIN, "$filename"); | |
382 | print "Reading from $filename for ${l}_${f}_${c}\n"; | |
383 | $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read | |
384 | my @lines; | |
385 | my $shex; | |
386 | my $uhex; | |
387 | while (<FIN>) { | |
236ac5fc | 388 | push @lines, $_; |
ce02c398 JM |
389 | } |
390 | close(FIN); | |
391 | $shex = sha1_hex(join("\n", @lines)); | |
392 | $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; | |
393 | $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; | |
394 | open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); | |
236ac5fc JM |
395 | print FOUT @lines; |
396 | close(FOUT); | |
397 | foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { | |
398 | next if ($enc eq $DEFENCODING); | |
399 | $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; | |
400 | if (! -f $filename) { | |
401 | print STDERR "Cannot open $filename\n"; | |
6e46cba7 JM |
402 | next; |
403 | } | |
236ac5fc JM |
404 | @lines = (); |
405 | open(FIN, "$filename"); | |
406 | while (<FIN>) { | |
407 | if ((/^comment_char\s/) || (/^escape_char\s/)){ | |
408 | push @lines, $_; | |
409 | } | |
410 | if (/^LC_CTYPE/../^END LC_CTYPE/) { | |
411 | push @lines, $_; | |
6e46cba7 JM |
412 | } |
413 | } | |
236ac5fc | 414 | close(FIN); |
ce02c398 JM |
415 | $uhex = sha1_hex(join("\n", @lines) . $enc); |
416 | $languages{$l}{$f}{data}{$c}{$enc} = $uhex; | |
417 | $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1; | |
236ac5fc JM |
418 | open(FOUT, ">$TYPE.draft/$actfile.$enc.src"); |
419 | print FOUT <<EOF; | |
420 | # Warning: Do not edit. This file is automatically extracted from the | |
421 | # tools in /usr/src/tools/tools/locale. The data is obtained from the | |
422 | # CLDR project, obtained from http://cldr.unicode.org/ | |
423 | # ----------------------------------------------------------------------------- | |
424 | EOF | |
425 | print FOUT @lines; | |
426 | close(FOUT); | |
ce02c398 JM |
427 | } |
428 | } | |
429 | } | |
430 | } | |
431 | } | |
432 | ||
433 | ||
f28e3132 JM |
434 | sub transform_collation { |
435 | foreach my $l (sort keys(%languages)) { | |
436 | foreach my $f (sort keys(%{$languages{$l}})) { | |
437 | foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { | |
438 | next if ($#filter == 2 && ($filter[0] ne $l | |
439 | || $filter[1] ne $f || $filter[2] ne $c)); | |
440 | next if (defined $languages{$l}{$f}{definitions} | |
441 | && $languages{$l}{$f}{definitions} !~ /$TYPE/); | |
442 | $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread | |
443 | my $file; | |
444 | $file = $l . "_"; | |
445 | $file .= $f . "_" if ($f ne "x"); | |
446 | $file .= $c; | |
447 | my $actfile = $file; | |
448 | ||
449 | my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; | |
450 | $filename = "$ETCDIR/$file.$DEFENCODING.src" | |
451 | if (! -f $filename); | |
452 | if (! -f $filename | |
453 | && defined $languages{$l}{$f}{fallback}) { | |
454 | $file = $languages{$l}{$f}{fallback}; | |
455 | $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; | |
456 | } | |
457 | $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src" | |
458 | if (! -f $filename); | |
459 | if (! -f $filename) { | |
460 | print STDERR | |
461 | "Cannot open $file.$DEFENCODING.src or fallback\n"; | |
462 | next; | |
463 | } | |
464 | open(FIN, "$filename"); | |
465 | print "Reading from $filename for ${l}_${f}_${c}\n"; | |
466 | $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read | |
467 | my @lines; | |
468 | my $shex; | |
469 | while (<FIN>) { | |
470 | if ((/^comment_char\s/) || (/^escape_char\s/)){ | |
471 | push @lines, $_; | |
472 | } | |
473 | if (/^LC_COLLATE/../^END LC_COLLATE/) { | |
474 | $_ =~ s/[ ]+/ /g; | |
475 | push @lines, $_; | |
476 | } | |
477 | } | |
478 | close(FIN); | |
479 | $shex = sha1_hex(join("\n", @lines)); | |
480 | $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; | |
481 | $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; | |
482 | open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); | |
483 | print FOUT <<EOF; | |
484 | # Warning: Do not edit. This file is automatically extracted from the | |
485 | # tools in /usr/src/tools/tools/locale. The data is obtained from the | |
486 | # CLDR project, obtained from http://cldr.unicode.org/ | |
487 | # ----------------------------------------------------------------------------- | |
488 | EOF | |
489 | print FOUT @lines; | |
490 | close(FOUT); | |
491 | ||
492 | foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { | |
493 | next if ($enc eq $DEFENCODING); | |
494 | copy ("$TYPE.draft/$actfile.$DEFENCODING.src", | |
495 | "$TYPE.draft/$actfile.$enc.src"); | |
496 | $languages{$l}{$f}{data}{$c}{$enc} = $shex; | |
497 | $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1; | |
498 | } | |
499 | } | |
500 | } | |
501 | } | |
502 | } | |
503 | ||
252345eb JM |
504 | sub get_fields { |
505 | foreach my $l (sort keys(%languages)) { | |
506 | foreach my $f (sort keys(%{$languages{$l}})) { | |
507 | foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { | |
508 | next if ($#filter == 2 && ($filter[0] ne $l | |
509 | || $filter[1] ne $f || $filter[2] ne $c)); | |
510 | next if (defined $languages{$l}{$f}{definitions} | |
511 | && $languages{$l}{$f}{definitions} !~ /$TYPE/); | |
512 | ||
513 | $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread | |
514 | my $file; | |
515 | $file = $l . "_"; | |
516 | $file .= $f . "_" if ($f ne "x"); | |
517 | $file .= $c; | |
518 | ||
519 | my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; | |
520 | $filename = "$ETCDIR/$file.$DEFENCODING.src" | |
521 | if (! -f $filename); | |
522 | if (! -f $filename | |
523 | && defined $languages{$l}{$f}{fallback}) { | |
524 | $file = $languages{$l}{$f}{fallback}; | |
525 | $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; | |
526 | } | |
527 | $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src" | |
528 | if (! -f $filename); | |
529 | if (! -f $filename) { | |
530 | print STDERR | |
531 | "Cannot open $file.$DEFENCODING.src or fallback\n"; | |
532 | next; | |
533 | } | |
534 | open(FIN, "$filename"); | |
535 | print "Reading from $filename for ${l}_${f}_${c}\n"; | |
536 | $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read | |
537 | my @lines = <FIN>; | |
538 | chomp(@lines); | |
539 | close(FIN); | |
540 | my $continue = 0; | |
541 | foreach my $k (keys(%keys)) { | |
542 | foreach my $line (@lines) { | |
543 | $line =~ s/\r//; | |
544 | next if (!$continue && $line !~ /^$k\s/); | |
545 | if ($continue) { | |
546 | $line =~ s/^\s+//; | |
547 | } else { | |
548 | $line =~ s/^$k\s+//; | |
549 | } | |
550 | ||
551 | $values{$l}{$c}{$k} = "" | |
552 | if (!defined $values{$l}{$c}{$k}); | |
553 | ||
554 | $continue = ($line =~ /\/$/); | |
555 | $line =~ s/\/$// if ($continue); | |
556 | ||
557 | while ($line =~ /_/) { | |
558 | $line =~ | |
559 | s/\<([^>_]+)_([^>]+)\>/<$1 $2>/; | |
560 | } | |
561 | die "_ in data - $line" if ($line =~ /_/); | |
562 | $values{$l}{$c}{$k} .= $line; | |
563 | ||
564 | last if (!$continue); | |
565 | } | |
566 | } | |
567 | } | |
568 | } | |
569 | } | |
570 | } | |
571 | ||
572 | sub decodecldr { | |
573 | my $e = shift; | |
574 | my $s = shift; | |
575 | ||
576 | my $v = undef; | |
577 | ||
578 | if ($e eq "UTF-8") { | |
579 | # | |
580 | # Conversion to UTF-8 can be done from the Unicode name to | |
581 | # the UTF-8 character code. | |
582 | # | |
583 | $v = $utf8map{$s}; | |
584 | die "Cannot convert $s in $e (charmap)" if (!defined $v); | |
585 | } else { | |
586 | # | |
587 | # Conversion to these encodings can be done from the Unicode | |
588 | # name to Unicode code to the encodings code. | |
589 | # | |
590 | my $ucc = undef; | |
591 | $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s}); | |
592 | $ucc = $ucd{name2code}{$utf8aliases{$s}} | |
593 | if (!defined $ucc | |
594 | && $utf8aliases{$s} | |
595 | && defined $ucd{name2code}{$utf8aliases{$s}}); | |
596 | ||
597 | if (!defined $ucc) { | |
598 | if (defined $translations{$e}{$s}{hex}) { | |
599 | $v = $translations{$e}{$s}{hex}; | |
600 | $ucc = 0; | |
601 | } elsif (defined $translations{$e}{$s}{ucc}) { | |
602 | $ucc = $translations{$e}{$s}{ucc}; | |
603 | } | |
604 | } | |
605 | ||
606 | die "Cannot convert $s in $e (ucd string)" if (!defined $ucc); | |
607 | $v = $convertors{$e}{$ucc} if (!defined $v); | |
608 | ||
609 | $v = $translations{$e}{$s}{hex} | |
610 | if (!defined $v && defined $translations{$e}{$s}{hex}); | |
611 | ||
612 | if (!defined $v && defined $translations{$e}{$s}{unicode}) { | |
613 | my $ucn = $translations{$e}{$s}{unicode}; | |
614 | $ucc = $ucd{name2code}{$ucn} | |
615 | if (defined $ucd{name2code}{$ucn}); | |
616 | $ucc = $ucd{name2code}{$utf8aliases{$ucn}} | |
617 | if (!defined $ucc | |
618 | && defined $ucd{name2code}{$utf8aliases{$ucn}}); | |
619 | $v = $convertors{$e}{$ucc}; | |
620 | } | |
621 | ||
622 | die "Cannot convert $s in $e (charmap)" if (!defined $v); | |
623 | } | |
624 | ||
625 | return pack("C", hex($v)) if (length($v) == 2); | |
626 | return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2))) | |
627 | if (length($v) == 4); | |
628 | return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)), | |
629 | hex(substr($v, 4, 2))) if (length($v) == 6); | |
630 | print STDERR "Cannot convert $e $s\n"; | |
631 | return "length = " . length($v); | |
632 | ||
633 | } | |
634 | ||
635 | sub translate { | |
636 | my $enc = shift; | |
637 | my $v = shift; | |
638 | ||
639 | return $translations{$enc}{$v} if (defined $translations{$enc}{$v}); | |
640 | return undef; | |
641 | } | |
642 | ||
643 | sub print_fields { | |
644 | foreach my $l (sort keys(%languages)) { | |
645 | foreach my $f (sort keys(%{$languages{$l}})) { | |
646 | foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { | |
647 | next if ($#filter == 2 && ($filter[0] ne $l | |
648 | || $filter[1] ne $f || $filter[2] ne $c)); | |
649 | next if (defined $languages{$l}{$f}{definitions} | |
650 | && $languages{$l}{$f}{definitions} !~ /$TYPE/); | |
651 | foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { | |
652 | if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { | |
653 | print "Skipping ${l}_" . | |
654 | ($f eq "x" ? "" : "${f}_") . | |
655 | "${c} - not read\n"; | |
656 | next; | |
657 | } | |
658 | my $file = $l; | |
659 | $file .= "_" . $f if ($f ne "x"); | |
660 | $file .= "_" . $c; | |
661 | print "Writing to $file in $enc\n"; | |
662 | ||
663 | if ($enc ne $DEFENCODING && | |
664 | !defined $convertors{$enc}) { | |
665 | print "Failed! Cannot convert to $enc.\n"; | |
666 | next; | |
667 | }; | |
668 | ||
669 | open(FOUT, ">$TYPE.draft/$file.$enc.new"); | |
670 | my $okay = 1; | |
671 | my $output = ""; | |
672 | print FOUT <<EOF; | |
673 | # Warning: Do not edit. This file is automatically generated from the | |
674 | # tools in /usr/src/tools/tools/locale. The data is obtained from the | |
675 | # CLDR project, obtained from http://cldr.unicode.org/ | |
676 | # ----------------------------------------------------------------------------- | |
677 | EOF | |
678 | foreach my $k (keys(%keys)) { | |
679 | my $f = $keys{$k}; | |
680 | ||
681 | die("Unknown $k in \%DESC") | |
682 | if (!defined $DESC{$k}); | |
683 | ||
684 | $output .= "#\n# $DESC{$k}\n"; | |
685 | ||
686 | # Replace one row with another | |
687 | if ($f =~ /^>/) { | |
688 | $k = substr($f, 1); | |
689 | $f = $keys{$k}; | |
690 | } | |
691 | ||
692 | # Callback function | |
693 | if ($f =~ /^\</) { | |
694 | $callback{data}{c} = $c; | |
695 | $callback{data}{k} = $k; | |
696 | $callback{data}{l} = $l; | |
697 | $callback{data}{e} = $enc; | |
698 | my @a = split(/\</, substr($f, 1)); | |
699 | my $rv = | |
700 | &{$callback{$a[0]}}($values{$l}{$c}{$a[1]}); | |
701 | $values{$l}{$c}{$k} = $rv; | |
702 | $f = $a[2]; | |
703 | $callback{data} = (); | |
704 | } | |
705 | ||
706 | my $v = $values{$l}{$c}{$k}; | |
707 | $v = "undef" if (!defined $v); | |
708 | ||
709 | if ($f eq "i") { | |
710 | $output .= "$v\n"; | |
711 | next; | |
712 | } | |
713 | if ($f eq "ai") { | |
714 | $output .= "$v\n"; | |
715 | next; | |
716 | } | |
717 | if ($f eq "s") { | |
718 | $v =~ s/^"//; | |
719 | $v =~ s/"$//; | |
720 | my $cm = ""; | |
721 | while ($v =~ /^(.*?)<(.*?)>(.*)/) { | |
722 | my $p1 = $1; | |
723 | $cm = $2; | |
724 | my $p3 = $3; | |
725 | ||
726 | my $rv = decodecldr($enc, $cm); | |
727 | # $rv = translate($enc, $cm) | |
728 | # if (!defined $rv); | |
729 | if (!defined $rv) { | |
730 | print STDERR | |
731 | "Could not convert $k ($cm) from $DEFENCODING to $enc\n"; | |
732 | $okay = 0; | |
733 | next; | |
734 | } | |
735 | ||
736 | $v = $p1 . $rv . $p3; | |
737 | } | |
738 | $output .= "$v\n"; | |
739 | next; | |
740 | } | |
741 | if ($f eq "as") { | |
742 | foreach my $v (split(/;/, $v)) { | |
743 | $v =~ s/^"//; | |
744 | $v =~ s/"$//; | |
745 | my $cm = ""; | |
746 | while ($v =~ /^(.*?)<(.*?)>(.*)/) { | |
747 | my $p1 = $1; | |
748 | $cm = $2; | |
749 | my $p3 = $3; | |
750 | ||
751 | my $rv = | |
752 | decodecldr($enc, | |
753 | $cm); | |
754 | # $rv = translate($enc, | |
755 | # $cm) | |
756 | # if (!defined $rv); | |
757 | if (!defined $rv) { | |
758 | print STDERR | |
759 | "Could not convert $k ($cm) from $DEFENCODING to $enc\n"; | |
760 | $okay = 0; | |
761 | next; | |
762 | } | |
763 | ||
764 | $v = $1 . $rv . $3; | |
765 | } | |
766 | $output .= "$v\n"; | |
767 | } | |
768 | next; | |
769 | } | |
770 | ||
771 | die("$k is '$f'"); | |
772 | ||
773 | } | |
774 | ||
775 | $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output); | |
776 | $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1; | |
777 | print FOUT "$output# EOF\n"; | |
778 | close(FOUT); | |
779 | ||
780 | if ($okay) { | |
781 | rename("$TYPE.draft/$file.$enc.new", | |
782 | "$TYPE.draft/$file.$enc.src"); | |
783 | } else { | |
784 | rename("$TYPE.draft/$file.$enc.new", | |
785 | "$TYPE.draft/$file.$enc.failed"); | |
786 | } | |
787 | } | |
788 | } | |
789 | } | |
790 | } | |
791 | } | |
792 | ||
793 | sub make_makefile { | |
794 | return if ($#filter > -1); | |
795 | print "Creating Makefile for $TYPE\n"; | |
ce02c398 JM |
796 | my $SRCOUT; |
797 | my $SRCOUT2; | |
e4c5f643 | 798 | my $SRCOUT3; |
ce02c398 JM |
799 | my $MAPLOC; |
800 | if ($TYPE eq "colldef") { | |
1ebae810 JM |
801 | $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" . |
802 | "\t-f \${MAPLOC}/map.UTF-8 " . | |
803 | "\${.OBJDIR}/\${.IMPSRC:T:R}"; | |
804 | $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . | |
805 | "locale/etc/final-maps\n"; | |
ce02c398 JM |
806 | $SRCOUT2 = "LC_COLLATE"; |
807 | } | |
808 | elsif ($TYPE eq "ctypedef") { | |
1ebae810 | 809 | $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" . |
ce02c398 | 810 | "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:C/^.*\\.//} " . |
1ebae810 JM |
811 | "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " . |
812 | " || true"; | |
ce02c398 JM |
813 | $SRCOUT2 = "LC_CTYPE"; |
814 | $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . | |
815 | "locale/etc/final-maps\n"; | |
e4c5f643 JM |
816 | $SRCOUT3 = "## SYMPAIRS\n\n" . |
817 | ".for PAIR in \${SYMPAIRS}\n" . | |
818 | "\${PAIR:C/^.*://:S/src\$/LC_CTYPE/}: " . | |
819 | "\${PAIR:C/:.*//}\n" . | |
820 | "\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" . | |
821 | "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " . | |
822 | "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " . | |
823 | " || true\n" . | |
824 | ".endfor\n\n"; | |
ce02c398 JM |
825 | } |
826 | else { | |
52ffb7ff | 827 | $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}"; |
ce02c398 | 828 | $SRCOUT2 = "out"; |
3e67d5f3 | 829 | $MAPLOC = ""; |
ce02c398 | 830 | } |
252345eb JM |
831 | open(FOUT, ">$TYPE.draft/Makefile"); |
832 | print FOUT <<EOF; | |
833 | # Warning: Do not edit. This file is automatically generated from the | |
834 | # tools in /usr/src/tools/tools/locale. | |
835 | ||
aacb5cb6 | 836 | LOCALEDIR= \${SHAREDIR}/locale |
252345eb | 837 | FILESNAME= $FILESNAMES{$TYPE} |
f28e3132 | 838 | .SUFFIXES: .src .${SRCOUT2} |
ce02c398 | 839 | ${MAPLOC} |
f28e3132 JM |
840 | .src.${SRCOUT2}: |
841 | $SRCOUT | |
252345eb JM |
842 | |
843 | ## PLACEHOLDER | |
844 | ||
845 | EOF | |
846 | ||
847 | foreach my $hash (keys(%hashtable)) { | |
f28e3132 JM |
848 | # For colldef, weight LOCALES to UTF-8 |
849 | # Sort as upper-case and reverse to achieve it | |
850 | # Make en_US, ru_RU, and ca_AD preferred | |
851 | my @files; | |
852 | if ($TYPE eq "colldef") { | |
853 | @files = sort { | |
854 | if ($a eq 'en_x_US.UTF-8' || | |
855 | $a eq 'ru_x_RU.UTF-8' || | |
856 | $a eq 'ca_x_AD.UTF-8') { return -1; } | |
857 | elsif ($b eq 'en_x_US.UTF-8' || | |
858 | $b eq 'ru_x_RU.UTF-8' || | |
859 | $b eq 'ca_x_AD.UTF-8') { return 1; } | |
860 | else { return uc($b) cmp uc($a); } | |
861 | } keys(%{$hashtable{$hash}}); | |
ce02c398 JM |
862 | } elsif ($TYPE eq "ctypedef") { |
863 | @files = sort { | |
236ac5fc JM |
864 | if ($a eq 'en_x_US.UTF-8') { return -1; } |
865 | elsif ($b eq 'en_x_US.UTF-8') { return 1; } | |
866 | if ($a =~ /^en_x_US/) { return -1; } | |
867 | elsif ($b =~ /^en_x_US/) { return 1; } | |
868 | ||
869 | if ($a =~ /^en_x_GB.ISO8859-15/ || | |
ce02c398 | 870 | $a =~ /^ru_x_RU/) { return -1; } |
236ac5fc | 871 | elsif ($b =~ /^en_x_GB.ISO8859-15/ || |
ce02c398 JM |
872 | $b =~ /ru_x_RU/) { return 1; } |
873 | else { return uc($b) cmp uc($a); } | |
874 | ||
ce02c398 | 875 | } keys(%{$hashtable{$hash}}); |
f28e3132 | 876 | } else { |
f223f854 | 877 | @files = sort { |
0c101607 JM |
878 | if ($a =~ /_Comm_/ || |
879 | $b eq 'en_x_US.UTF-8') { return 1; } | |
880 | elsif ($b =~ /_Comm_/ || | |
881 | $a eq 'en_x_US.UTF-8') { return -1; } | |
f223f854 JM |
882 | else { return uc($b) cmp uc($a); } |
883 | } keys(%{$hashtable{$hash}}); | |
f28e3132 | 884 | } |
252345eb JM |
885 | if ($#files > 0) { |
886 | my $link = shift(@files); | |
887 | $link =~ s/_x_/_/; # strip family if none there | |
888 | foreach my $file (@files) { | |
889 | my @a = split(/_/, $file); | |
890 | my @b = split(/\./, $a[-1]); | |
891 | $file =~ s/_x_/_/; | |
892 | print FOUT "SAME+=\t\t$link:$file\n"; | |
893 | undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]}); | |
894 | } | |
895 | } | |
896 | } | |
897 | ||
898 | foreach my $l (sort keys(%languages)) { | |
899 | foreach my $f (sort keys(%{$languages{$l}})) { | |
900 | foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { | |
901 | next if ($#filter == 2 && ($filter[0] ne $l | |
902 | || $filter[1] ne $f || $filter[2] ne $c)); | |
903 | next if (defined $languages{$l}{$f}{definitions} | |
904 | && $languages{$l}{$f}{definitions} !~ /$TYPE/); | |
905 | if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING} | |
906 | && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { | |
907 | print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") . | |
908 | "${c} - not read\n"; | |
909 | next; | |
910 | } | |
911 | foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { | |
912 | my $file = $l . "_"; | |
913 | $file .= $f . "_" if ($f ne "x"); | |
914 | $file .= $c; | |
915 | next if (!defined $languages{$l}{$f}{data}{$c}{$e}); | |
916 | print FOUT "LOCALES+=\t$file.$e\n"; | |
917 | } | |
918 | ||
919 | if (defined $languages{$l}{$f}{nc_link}) { | |
920 | foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { | |
921 | my $file = $l . "_"; | |
922 | $file .= $f . "_" if ($f ne "x"); | |
923 | $file .= $c; | |
924 | print FOUT "SAME+=\t\t$file.$e:$languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n"; | |
925 | } | |
926 | } | |
927 | ||
928 | if (defined $languages{$l}{$f}{e_link}) { | |
929 | foreach my $el (split(" ", $languages{$l}{$f}{e_link})) { | |
930 | my @a = split(/:/, $el); | |
931 | my $file = $l . "_"; | |
932 | $file .= $f . "_" if ($f ne "x"); | |
933 | $file .= $c; | |
934 | print FOUT "SAME+=\t\t$file.$a[0]:$file.$a[1]\t# legacy (same charset)\n"; | |
935 | } | |
936 | } | |
937 | ||
938 | } | |
939 | } | |
940 | } | |
941 | ||
942 | print FOUT <<EOF; | |
943 | ||
f28e3132 | 944 | FILES= \${LOCALES:S/\$/.${SRCOUT2}/} |
252345eb JM |
945 | CLEANFILES= \${FILES} |
946 | ||
947 | .for f in \${SAME} | |
0244ffc6 JM |
948 | SYMLINKS+= ../\${f:C/:.*\$//}/\${FILESNAME} \\ |
949 | \${LOCALEDIR}/\${f:C/^.*://}/\${FILESNAME} | |
252345eb JM |
950 | .endfor |
951 | ||
952 | .for f in \${LOCALES} | |
f28e3132 | 953 | FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f} |
252345eb JM |
954 | .endfor |
955 | ||
e4c5f643 | 956 | ${SRCOUT3}.include <bsd.prog.mk> |
252345eb JM |
957 | EOF |
958 | ||
959 | close(FOUT); | |
960 | } |