tools: Do not hardcode path to perl.
[dragonfly.git] / tools / tools / locale / tools / cldr2def.pl
CommitLineData
344be199 1#!/usr/bin/env perl -wC
252345eb
JM
2
3use strict;
f28e3132 4use File::Copy;
252345eb
JM
5use XML::Parser;
6use Tie::IxHash;
7use Data::Dumper;
8use Getopt::Long;
9use Digest::SHA qw(sha1_hex);
10require "charmaps.pm";
11
12
13if ($#ARGV < 2) {
14 print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n";
15 exit(1);
16}
17
18my $DEFENCODING = "UTF-8";
19my @filter = ();
20
21my $CLDRDIR = undef;
22my $UNIDATADIR = undef;
23my $ETCDIR = undef;
24my $TYPE = undef;
25my $doonly = undef;
26
27my $result = GetOptions (
28 "cldr=s" => \$CLDRDIR,
29 "unidata=s" => \$UNIDATADIR,
30 "etc=s" => \$ETCDIR,
31 "type=s" => \$TYPE,
32 "lc=s" => \$doonly
33 );
34
35my %convertors = ();
36
37my %ucd = ();
38my %values = ();
39my %hashtable = ();
40my %languages = ();
41my %translations = ();
42my %encodings = ();
43my %alternativemonths = ();
44get_languages();
45
46my %utf8map = ();
47my %utf8aliases = ();
48get_unidata($UNIDATADIR);
49get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm");
50get_encodings("$ETCDIR/charmaps");
51
52my %keys = ();
53tie(%keys, "Tie::IxHash");
54tie(%hashtable, "Tie::IxHash");
55
56my %FILESNAMES = (
57 "monetdef" => "LC_MONETARY",
58 "timedef" => "LC_TIME",
59 "msgdef" => "LC_MESSAGES",
f28e3132 60 "numericdef" => "LC_NUMERIC",
6e46cba7 61 "colldef" => "LC_COLLATE",
ce02c398 62 "ctypedef" => "LC_CTYPE"
252345eb
JM
63);
64
65my %callback = (
66 mdorder => \&callback_mdorder,
67 altmon => \&callback_altmon,
ddddc53a 68 cformat => \&callback_cformat,
fb572e12 69 cbabmon => \&callback_abmon,
252345eb
JM
70 data => undef,
71);
72
73my %DESC = (
74
75 # numericdef
76 "decimal_point" => "decimal_point",
77 "thousands_sep" => "thousands_sep",
78 "grouping" => "grouping",
79
80 # monetdef
81 "int_curr_symbol" => "int_curr_symbol (last character always " .
82 "SPACE)",
83 "currency_symbol" => "currency_symbol",
84 "mon_decimal_point" => "mon_decimal_point",
85 "mon_thousands_sep" => "mon_thousands_sep",
86 "mon_grouping" => "mon_grouping",
87 "positive_sign" => "positive_sign",
88 "negative_sign" => "negative_sign",
89 "int_frac_digits" => "int_frac_digits",
90 "frac_digits" => "frac_digits",
91 "p_cs_precedes" => "p_cs_precedes",
92 "p_sep_by_space" => "p_sep_by_space",
93 "n_cs_precedes" => "n_cs_precedes",
94 "n_sep_by_space" => "n_sep_by_space",
95 "p_sign_posn" => "p_sign_posn",
96 "n_sign_posn" => "n_sign_posn",
97
98 # msgdef
99 "yesexpr" => "yesexpr",
100 "noexpr" => "noexpr",
101 "yesstr" => "yesstr",
102 "nostr" => "nostr",
103
104 # timedef
105 "abmon" => "Short month names",
106 "mon" => "Long month names (as in a date)",
107 "abday" => "Short weekday names",
108 "day" => "Long weekday names",
109 "t_fmt" => "X_fmt",
110 "d_fmt" => "x_fmt",
ddddc53a 111 "c_fmt" => "c_fmt",
252345eb
JM
112 "am_pm" => "AM/PM",
113 "d_t_fmt" => "date_fmt",
114 "altmon" => "Long month names (without case ending)",
115 "md_order" => "md_order",
116 "t_fmt_ampm" => "ampm_fmt",
117);
118
f28e3132
JM
119if ($TYPE eq "colldef") {
120 transform_collation();
121 make_makefile();
122}
123
ce02c398
JM
124if ($TYPE eq "ctypedef") {
125 transform_ctypes();
126 make_makefile();
127}
128
252345eb
JM
129if ($TYPE eq "numericdef") {
130 %keys = (
131 "decimal_point" => "s",
132 "thousands_sep" => "s",
133 "grouping" => "ai",
134 );
135 get_fields();
136 print_fields();
137 make_makefile();
138}
139
140if ($TYPE eq "monetdef") {
141 %keys = (
142 "int_curr_symbol" => "s",
143 "currency_symbol" => "s",
144 "mon_decimal_point" => "s",
145 "mon_thousands_sep" => "s",
146 "mon_grouping" => "ai",
147 "positive_sign" => "s",
148 "negative_sign" => "s",
149 "int_frac_digits" => "i",
150 "frac_digits" => "i",
151 "p_cs_precedes" => "i",
152 "p_sep_by_space" => "i",
153 "n_cs_precedes" => "i",
154 "n_sep_by_space" => "i",
155 "p_sign_posn" => "i",
156 "n_sign_posn" => "i"
157 );
158 get_fields();
159 print_fields();
160 make_makefile();
161}
162
163if ($TYPE eq "msgdef") {
164 %keys = (
165 "yesexpr" => "s",
166 "noexpr" => "s",
167 "yesstr" => "s",
168 "nostr" => "s"
169 );
170 get_fields();
171 print_fields();
172 make_makefile();
173}
174
175if ($TYPE eq "timedef") {
176 %keys = (
fb572e12 177 "abmon" => "<cbabmon<abmon<as",
252345eb
JM
178 "mon" => "as",
179 "abday" => "as",
180 "day" => "as",
181 "t_fmt" => "s",
182 "d_fmt" => "s",
ddddc53a 183 "c_fmt" => "<cformat<d_t_fmt<s",
252345eb
JM
184 "am_pm" => "as",
185 "d_fmt" => "s",
186 "d_t_fmt" => "s",
187 "altmon" => "<altmon<mon<as",
188 "md_order" => "<mdorder<d_fmt<s",
189 "t_fmt_ampm" => "s",
190 );
191 get_fields();
192 print_fields();
193 make_makefile();
194}
195
ddddc53a
JM
196sub callback_cformat {
197 my $s = shift;
198 $s =~ s/ %Z//;
199 $s =~ s/ %z//;
200 return $s;
201};
202
252345eb
JM
203sub callback_mdorder {
204 my $s = shift;
205 return undef if (!defined $s);
206 $s =~ s/[^dm]//g;
207 return $s;
208};
209
210sub callback_altmon {
211 # if the language/country is known in %alternative months then
212 # return that, otherwise repeat mon
213 my $s = shift;
214
215 if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
bdbc34a9
JM
216 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
217 my @cleaned;
218 foreach (@altnames)
219 {
220 $_ =~ s/^\s+//;
221 $_ =~ s/\s+$//;
222 push @cleaned, $_;
223 }
224 return join(";",@cleaned);
252345eb
JM
225 }
226
227 return $s;
228}
229
fb572e12
JM
230sub callback_abmon {
231 # for specified CJK locales, pad result with a space to enable
232 # columns to line up (style established in FreeBSD in 2001)
233 my $s = shift;
234 my $nl = $callback{data}{l} . "_" . $callback{data}{c};
235
236 if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' ||
237 $nl eq 'zh_HK' || $nl eq 'zh_TW') {
238 my @monthnames = split(";", $s);
239 my @cleaned;
240 foreach (@monthnames)
241 {
242 if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ ||
243 ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/))
244 {
245 $_ =~ s/^"/"<space>/;
246 }
247 push @cleaned, $_;
248 }
249 return join(";",@cleaned);
250 }
251 return $s;
252}
253
252345eb
JM
254############################
255
256sub get_unidata {
257 my $directory = shift;
258
259 open(FIN, "$directory/UnicodeData.txt")
260 or die("Cannot open $directory/UnicodeData.txt");;
261 my @lines = <FIN>;
262 chomp(@lines);
263 close(FIN);
264
265 foreach my $l (@lines) {
266 my @a = split(/;/, $l);
267
268 $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name
269 $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code
270 }
271}
272
273sub get_utf8map {
274 my $file = shift;
275
276 open(FIN, $file);
277 my @lines = <FIN>;
278 close(FIN);
279 chomp(@lines);
280
281 my $prev_k = undef;
282 my $prev_v = "";
283 my $incharmap = 0;
284 foreach my $l (@lines) {
285 $l =~ s/\r//;
286 next if ($l =~ /^\#/);
287 next if ($l eq "");
288
289 if ($l eq "CHARMAP") {
290 $incharmap = 1;
291 next;
292 }
293
294 next if (!$incharmap);
295 last if ($l eq "END CHARMAP");
296
297 $l =~ /^<([^\s]+)>\s+(.*)/;
298 my $k = $1;
299 my $v = $2;
300 $k =~ s/_/ /g; # unicode char string
301 $v =~ s/\\x//g; # UTF-8 char code
302 $utf8map{$k} = $v;
303
304 $utf8aliases{$k} = $prev_k if ($prev_v eq $v);
305
306 $prev_v = $v;
307 $prev_k = $k;
308 }
309}
310
311sub get_encodings {
312 my $dir = shift;
313 foreach my $e (sort(keys(%encodings))) {
314 if (!open(FIN, "$dir/$e.TXT")) {
315 print "Cannot open charmap for $e\n";
316 next;
317
318 }
319 $encodings{$e} = 1;
320 my @lines = <FIN>;
321 close(FIN);
322 chomp(@lines);
323 foreach my $l (@lines) {
324 $l =~ s/\r//;
325 next if ($l =~ /^\#/);
326 next if ($l eq "");
327
328 my @a = split(" ", $l);
329 next if ($#a < 1);
330 $a[0] =~ s/^0[xX]//; # local char code
331 $a[1] =~ s/^0[xX]//; # unicode char code
332 $convertors{$e}{uc($a[1])} = uc($a[0]);
333 }
334 }
335}
336
337sub get_languages {
338 my %data = get_xmldata($ETCDIR);
339 %languages = %{$data{L}};
340 %translations = %{$data{T}};
341 %alternativemonths = %{$data{AM}};
342 %encodings = %{$data{E}};
343
344 return if (!defined $doonly);
345
346 my @a = split(/_/, $doonly);
347 if ($#a == 1) {
348 $filter[0] = $a[0];
349 $filter[1] = "x";
350 $filter[2] = $a[1];
351 } elsif ($#a == 2) {
352 $filter[0] = $a[0];
353 $filter[1] = $a[1];
354 $filter[2] = $a[2];
355 }
356
357 print Dumper(@filter);
358 return;
359}
360
ce02c398
JM
361sub transform_ctypes {
362 foreach my $l (sort keys(%languages)) {
363 foreach my $f (sort keys(%{$languages{$l}})) {
364 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
365 next if ($#filter == 2 && ($filter[0] ne $l
366 || $filter[1] ne $f || $filter[2] ne $c));
367 next if (defined $languages{$l}{$f}{definitions}
368 && $languages{$l}{$f}{definitions} !~ /$TYPE/);
369 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
370 my $file;
371 $file = $l . "_";
372 $file .= $f . "_" if ($f ne "x");
373 $file .= $c;
374 my $actfile = $file;
375
236ac5fc 376 my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src";
ce02c398 377 if (! -f $filename) {
236ac5fc 378 print STDERR "Cannot open $filename\n";
ce02c398
JM
379 next;
380 }
381 open(FIN, "$filename");
382 print "Reading from $filename for ${l}_${f}_${c}\n";
383 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
384 my @lines;
385 my $shex;
386 my $uhex;
387 while (<FIN>) {
236ac5fc 388 push @lines, $_;
ce02c398
JM
389 }
390 close(FIN);
391 $shex = sha1_hex(join("\n", @lines));
392 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
393 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
394 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
236ac5fc
JM
395 print FOUT @lines;
396 close(FOUT);
397 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
398 next if ($enc eq $DEFENCODING);
399 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
400 if (! -f $filename) {
401 print STDERR "Cannot open $filename\n";
6e46cba7
JM
402 next;
403 }
236ac5fc
JM
404 @lines = ();
405 open(FIN, "$filename");
406 while (<FIN>) {
407 if ((/^comment_char\s/) || (/^escape_char\s/)){
408 push @lines, $_;
409 }
410 if (/^LC_CTYPE/../^END LC_CTYPE/) {
411 push @lines, $_;
6e46cba7
JM
412 }
413 }
236ac5fc 414 close(FIN);
ce02c398
JM
415 $uhex = sha1_hex(join("\n", @lines) . $enc);
416 $languages{$l}{$f}{data}{$c}{$enc} = $uhex;
417 $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
236ac5fc
JM
418 open(FOUT, ">$TYPE.draft/$actfile.$enc.src");
419 print FOUT <<EOF;
420# Warning: Do not edit. This file is automatically extracted from the
421# tools in /usr/src/tools/tools/locale. The data is obtained from the
422# CLDR project, obtained from http://cldr.unicode.org/
423# -----------------------------------------------------------------------------
424EOF
425 print FOUT @lines;
426 close(FOUT);
ce02c398
JM
427 }
428 }
429 }
430 }
431}
432
433
f28e3132
JM
434sub transform_collation {
435 foreach my $l (sort keys(%languages)) {
436 foreach my $f (sort keys(%{$languages{$l}})) {
437 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
438 next if ($#filter == 2 && ($filter[0] ne $l
439 || $filter[1] ne $f || $filter[2] ne $c));
440 next if (defined $languages{$l}{$f}{definitions}
441 && $languages{$l}{$f}{definitions} !~ /$TYPE/);
442 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
443 my $file;
444 $file = $l . "_";
445 $file .= $f . "_" if ($f ne "x");
446 $file .= $c;
447 my $actfile = $file;
448
449 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
450 $filename = "$ETCDIR/$file.$DEFENCODING.src"
451 if (! -f $filename);
452 if (! -f $filename
453 && defined $languages{$l}{$f}{fallback}) {
454 $file = $languages{$l}{$f}{fallback};
455 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
456 }
457 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
458 if (! -f $filename);
459 if (! -f $filename) {
460 print STDERR
461 "Cannot open $file.$DEFENCODING.src or fallback\n";
462 next;
463 }
464 open(FIN, "$filename");
465 print "Reading from $filename for ${l}_${f}_${c}\n";
466 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
467 my @lines;
468 my $shex;
469 while (<FIN>) {
470 if ((/^comment_char\s/) || (/^escape_char\s/)){
471 push @lines, $_;
472 }
473 if (/^LC_COLLATE/../^END LC_COLLATE/) {
474 $_ =~ s/[ ]+/ /g;
475 push @lines, $_;
476 }
477 }
478 close(FIN);
479 $shex = sha1_hex(join("\n", @lines));
480 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
481 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
482 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
483 print FOUT <<EOF;
484# Warning: Do not edit. This file is automatically extracted from the
485# tools in /usr/src/tools/tools/locale. The data is obtained from the
486# CLDR project, obtained from http://cldr.unicode.org/
487# -----------------------------------------------------------------------------
488EOF
489 print FOUT @lines;
490 close(FOUT);
491
492 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
493 next if ($enc eq $DEFENCODING);
494 copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
495 "$TYPE.draft/$actfile.$enc.src");
496 $languages{$l}{$f}{data}{$c}{$enc} = $shex;
497 $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
498 }
499 }
500 }
501 }
502}
503
252345eb
JM
504sub get_fields {
505 foreach my $l (sort keys(%languages)) {
506 foreach my $f (sort keys(%{$languages{$l}})) {
507 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
508 next if ($#filter == 2 && ($filter[0] ne $l
509 || $filter[1] ne $f || $filter[2] ne $c));
510 next if (defined $languages{$l}{$f}{definitions}
511 && $languages{$l}{$f}{definitions} !~ /$TYPE/);
512
513 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
514 my $file;
515 $file = $l . "_";
516 $file .= $f . "_" if ($f ne "x");
517 $file .= $c;
518
519 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
520 $filename = "$ETCDIR/$file.$DEFENCODING.src"
521 if (! -f $filename);
522 if (! -f $filename
523 && defined $languages{$l}{$f}{fallback}) {
524 $file = $languages{$l}{$f}{fallback};
525 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
526 }
527 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
528 if (! -f $filename);
529 if (! -f $filename) {
530 print STDERR
531 "Cannot open $file.$DEFENCODING.src or fallback\n";
532 next;
533 }
534 open(FIN, "$filename");
535 print "Reading from $filename for ${l}_${f}_${c}\n";
536 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
537 my @lines = <FIN>;
538 chomp(@lines);
539 close(FIN);
540 my $continue = 0;
541 foreach my $k (keys(%keys)) {
542 foreach my $line (@lines) {
543 $line =~ s/\r//;
544 next if (!$continue && $line !~ /^$k\s/);
545 if ($continue) {
546 $line =~ s/^\s+//;
547 } else {
548 $line =~ s/^$k\s+//;
549 }
550
551 $values{$l}{$c}{$k} = ""
552 if (!defined $values{$l}{$c}{$k});
553
554 $continue = ($line =~ /\/$/);
555 $line =~ s/\/$// if ($continue);
556
557 while ($line =~ /_/) {
558 $line =~
559 s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
560 }
561 die "_ in data - $line" if ($line =~ /_/);
562 $values{$l}{$c}{$k} .= $line;
563
564 last if (!$continue);
565 }
566 }
567 }
568 }
569 }
570}
571
572sub decodecldr {
573 my $e = shift;
574 my $s = shift;
575
576 my $v = undef;
577
578 if ($e eq "UTF-8") {
579 #
580 # Conversion to UTF-8 can be done from the Unicode name to
581 # the UTF-8 character code.
582 #
583 $v = $utf8map{$s};
584 die "Cannot convert $s in $e (charmap)" if (!defined $v);
585 } else {
586 #
587 # Conversion to these encodings can be done from the Unicode
588 # name to Unicode code to the encodings code.
589 #
590 my $ucc = undef;
591 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
592 $ucc = $ucd{name2code}{$utf8aliases{$s}}
593 if (!defined $ucc
594 && $utf8aliases{$s}
595 && defined $ucd{name2code}{$utf8aliases{$s}});
596
597 if (!defined $ucc) {
598 if (defined $translations{$e}{$s}{hex}) {
599 $v = $translations{$e}{$s}{hex};
600 $ucc = 0;
601 } elsif (defined $translations{$e}{$s}{ucc}) {
602 $ucc = $translations{$e}{$s}{ucc};
603 }
604 }
605
606 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
607 $v = $convertors{$e}{$ucc} if (!defined $v);
608
609 $v = $translations{$e}{$s}{hex}
610 if (!defined $v && defined $translations{$e}{$s}{hex});
611
612 if (!defined $v && defined $translations{$e}{$s}{unicode}) {
613 my $ucn = $translations{$e}{$s}{unicode};
614 $ucc = $ucd{name2code}{$ucn}
615 if (defined $ucd{name2code}{$ucn});
616 $ucc = $ucd{name2code}{$utf8aliases{$ucn}}
617 if (!defined $ucc
618 && defined $ucd{name2code}{$utf8aliases{$ucn}});
619 $v = $convertors{$e}{$ucc};
620 }
621
622 die "Cannot convert $s in $e (charmap)" if (!defined $v);
623 }
624
625 return pack("C", hex($v)) if (length($v) == 2);
626 return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
627 if (length($v) == 4);
628 return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
629 hex(substr($v, 4, 2))) if (length($v) == 6);
630 print STDERR "Cannot convert $e $s\n";
631 return "length = " . length($v);
632
633}
634
635sub translate {
636 my $enc = shift;
637 my $v = shift;
638
639 return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
640 return undef;
641}
642
643sub print_fields {
644 foreach my $l (sort keys(%languages)) {
645 foreach my $f (sort keys(%{$languages{$l}})) {
646 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
647 next if ($#filter == 2 && ($filter[0] ne $l
648 || $filter[1] ne $f || $filter[2] ne $c));
649 next if (defined $languages{$l}{$f}{definitions}
650 && $languages{$l}{$f}{definitions} !~ /$TYPE/);
651 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
652 if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
653 print "Skipping ${l}_" .
654 ($f eq "x" ? "" : "${f}_") .
655 "${c} - not read\n";
656 next;
657 }
658 my $file = $l;
659 $file .= "_" . $f if ($f ne "x");
660 $file .= "_" . $c;
661 print "Writing to $file in $enc\n";
662
663 if ($enc ne $DEFENCODING &&
664 !defined $convertors{$enc}) {
665 print "Failed! Cannot convert to $enc.\n";
666 next;
667 };
668
669 open(FOUT, ">$TYPE.draft/$file.$enc.new");
670 my $okay = 1;
671 my $output = "";
672 print FOUT <<EOF;
673# Warning: Do not edit. This file is automatically generated from the
674# tools in /usr/src/tools/tools/locale. The data is obtained from the
675# CLDR project, obtained from http://cldr.unicode.org/
676# -----------------------------------------------------------------------------
677EOF
678 foreach my $k (keys(%keys)) {
679 my $f = $keys{$k};
680
681 die("Unknown $k in \%DESC")
682 if (!defined $DESC{$k});
683
684 $output .= "#\n# $DESC{$k}\n";
685
686 # Replace one row with another
687 if ($f =~ /^>/) {
688 $k = substr($f, 1);
689 $f = $keys{$k};
690 }
691
692 # Callback function
693 if ($f =~ /^\</) {
694 $callback{data}{c} = $c;
695 $callback{data}{k} = $k;
696 $callback{data}{l} = $l;
697 $callback{data}{e} = $enc;
698 my @a = split(/\</, substr($f, 1));
699 my $rv =
700 &{$callback{$a[0]}}($values{$l}{$c}{$a[1]});
701 $values{$l}{$c}{$k} = $rv;
702 $f = $a[2];
703 $callback{data} = ();
704 }
705
706 my $v = $values{$l}{$c}{$k};
707 $v = "undef" if (!defined $v);
708
709 if ($f eq "i") {
710 $output .= "$v\n";
711 next;
712 }
713 if ($f eq "ai") {
714 $output .= "$v\n";
715 next;
716 }
717 if ($f eq "s") {
718 $v =~ s/^"//;
719 $v =~ s/"$//;
720 my $cm = "";
721 while ($v =~ /^(.*?)<(.*?)>(.*)/) {
722 my $p1 = $1;
723 $cm = $2;
724 my $p3 = $3;
725
726 my $rv = decodecldr($enc, $cm);
727# $rv = translate($enc, $cm)
728# if (!defined $rv);
729 if (!defined $rv) {
730 print STDERR
731"Could not convert $k ($cm) from $DEFENCODING to $enc\n";
732 $okay = 0;
733 next;
734 }
735
736 $v = $p1 . $rv . $p3;
737 }
738 $output .= "$v\n";
739 next;
740 }
741 if ($f eq "as") {
742 foreach my $v (split(/;/, $v)) {
743 $v =~ s/^"//;
744 $v =~ s/"$//;
745 my $cm = "";
746 while ($v =~ /^(.*?)<(.*?)>(.*)/) {
747 my $p1 = $1;
748 $cm = $2;
749 my $p3 = $3;
750
751 my $rv =
752 decodecldr($enc,
753 $cm);
754# $rv = translate($enc,
755# $cm)
756# if (!defined $rv);
757 if (!defined $rv) {
758 print STDERR
759"Could not convert $k ($cm) from $DEFENCODING to $enc\n";
760 $okay = 0;
761 next;
762 }
763
764 $v = $1 . $rv . $3;
765 }
766 $output .= "$v\n";
767 }
768 next;
769 }
770
771 die("$k is '$f'");
772
773 }
774
775 $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
776 $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
777 print FOUT "$output# EOF\n";
778 close(FOUT);
779
780 if ($okay) {
781 rename("$TYPE.draft/$file.$enc.new",
782 "$TYPE.draft/$file.$enc.src");
783 } else {
784 rename("$TYPE.draft/$file.$enc.new",
785 "$TYPE.draft/$file.$enc.failed");
786 }
787 }
788 }
789 }
790 }
791}
792
793sub make_makefile {
794 return if ($#filter > -1);
795 print "Creating Makefile for $TYPE\n";
ce02c398
JM
796 my $SRCOUT;
797 my $SRCOUT2;
e4c5f643 798 my $SRCOUT3;
ce02c398
JM
799 my $MAPLOC;
800 if ($TYPE eq "colldef") {
1ebae810
JM
801 $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
802 "\t-f \${MAPLOC}/map.UTF-8 " .
803 "\${.OBJDIR}/\${.IMPSRC:T:R}";
804 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
805 "locale/etc/final-maps\n";
ce02c398
JM
806 $SRCOUT2 = "LC_COLLATE";
807 }
808 elsif ($TYPE eq "ctypedef") {
1ebae810 809 $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
ce02c398 810 "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:C/^.*\\.//} " .
1ebae810
JM
811 "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
812 " || true";
ce02c398
JM
813 $SRCOUT2 = "LC_CTYPE";
814 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
815 "locale/etc/final-maps\n";
e4c5f643
JM
816 $SRCOUT3 = "## SYMPAIRS\n\n" .
817 ".for PAIR in \${SYMPAIRS}\n" .
818 "\${PAIR:C/^.*://:S/src\$/LC_CTYPE/}: " .
819 "\${PAIR:C/:.*//}\n" .
820 "\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
821 "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " .
822 "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " .
823 " || true\n" .
824 ".endfor\n\n";
ce02c398
JM
825 }
826 else {
52ffb7ff 827 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
ce02c398 828 $SRCOUT2 = "out";
3e67d5f3 829 $MAPLOC = "";
ce02c398 830 }
252345eb
JM
831 open(FOUT, ">$TYPE.draft/Makefile");
832 print FOUT <<EOF;
833# Warning: Do not edit. This file is automatically generated from the
834# tools in /usr/src/tools/tools/locale.
835
aacb5cb6 836LOCALEDIR= \${SHAREDIR}/locale
252345eb 837FILESNAME= $FILESNAMES{$TYPE}
f28e3132 838.SUFFIXES: .src .${SRCOUT2}
ce02c398 839${MAPLOC}
f28e3132
JM
840.src.${SRCOUT2}:
841 $SRCOUT
252345eb
JM
842
843## PLACEHOLDER
844
845EOF
846
847 foreach my $hash (keys(%hashtable)) {
f28e3132
JM
848 # For colldef, weight LOCALES to UTF-8
849 # Sort as upper-case and reverse to achieve it
850 # Make en_US, ru_RU, and ca_AD preferred
851 my @files;
852 if ($TYPE eq "colldef") {
853 @files = sort {
854 if ($a eq 'en_x_US.UTF-8' ||
855 $a eq 'ru_x_RU.UTF-8' ||
856 $a eq 'ca_x_AD.UTF-8') { return -1; }
857 elsif ($b eq 'en_x_US.UTF-8' ||
858 $b eq 'ru_x_RU.UTF-8' ||
859 $b eq 'ca_x_AD.UTF-8') { return 1; }
860 else { return uc($b) cmp uc($a); }
861 } keys(%{$hashtable{$hash}});
ce02c398
JM
862 } elsif ($TYPE eq "ctypedef") {
863 @files = sort {
236ac5fc
JM
864 if ($a eq 'en_x_US.UTF-8') { return -1; }
865 elsif ($b eq 'en_x_US.UTF-8') { return 1; }
866 if ($a =~ /^en_x_US/) { return -1; }
867 elsif ($b =~ /^en_x_US/) { return 1; }
868
869 if ($a =~ /^en_x_GB.ISO8859-15/ ||
ce02c398 870 $a =~ /^ru_x_RU/) { return -1; }
236ac5fc 871 elsif ($b =~ /^en_x_GB.ISO8859-15/ ||
ce02c398
JM
872 $b =~ /ru_x_RU/) { return 1; }
873 else { return uc($b) cmp uc($a); }
874
ce02c398 875 } keys(%{$hashtable{$hash}});
f28e3132 876 } else {
f223f854 877 @files = sort {
0c101607
JM
878 if ($a =~ /_Comm_/ ||
879 $b eq 'en_x_US.UTF-8') { return 1; }
880 elsif ($b =~ /_Comm_/ ||
881 $a eq 'en_x_US.UTF-8') { return -1; }
f223f854
JM
882 else { return uc($b) cmp uc($a); }
883 } keys(%{$hashtable{$hash}});
f28e3132 884 }
252345eb
JM
885 if ($#files > 0) {
886 my $link = shift(@files);
887 $link =~ s/_x_/_/; # strip family if none there
888 foreach my $file (@files) {
889 my @a = split(/_/, $file);
890 my @b = split(/\./, $a[-1]);
891 $file =~ s/_x_/_/;
892 print FOUT "SAME+=\t\t$link:$file\n";
893 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
894 }
895 }
896 }
897
898 foreach my $l (sort keys(%languages)) {
899 foreach my $f (sort keys(%{$languages{$l}})) {
900 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
901 next if ($#filter == 2 && ($filter[0] ne $l
902 || $filter[1] ne $f || $filter[2] ne $c));
903 next if (defined $languages{$l}{$f}{definitions}
904 && $languages{$l}{$f}{definitions} !~ /$TYPE/);
905 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
906 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
907 print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
908 "${c} - not read\n";
909 next;
910 }
911 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
912 my $file = $l . "_";
913 $file .= $f . "_" if ($f ne "x");
914 $file .= $c;
915 next if (!defined $languages{$l}{$f}{data}{$c}{$e});
916 print FOUT "LOCALES+=\t$file.$e\n";
917 }
918
919 if (defined $languages{$l}{$f}{nc_link}) {
920 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
921 my $file = $l . "_";
922 $file .= $f . "_" if ($f ne "x");
923 $file .= $c;
924 print FOUT "SAME+=\t\t$file.$e:$languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
925 }
926 }
927
928 if (defined $languages{$l}{$f}{e_link}) {
929 foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
930 my @a = split(/:/, $el);
931 my $file = $l . "_";
932 $file .= $f . "_" if ($f ne "x");
933 $file .= $c;
934 print FOUT "SAME+=\t\t$file.$a[0]:$file.$a[1]\t# legacy (same charset)\n";
935 }
936 }
937
938 }
939 }
940 }
941
942 print FOUT <<EOF;
943
f28e3132 944FILES= \${LOCALES:S/\$/.${SRCOUT2}/}
252345eb
JM
945CLEANFILES= \${FILES}
946
947.for f in \${SAME}
0244ffc6
JM
948SYMLINKS+= ../\${f:C/:.*\$//}/\${FILESNAME} \\
949 \${LOCALEDIR}/\${f:C/^.*://}/\${FILESNAME}
252345eb
JM
950.endfor
951
952.for f in \${LOCALES}
f28e3132 953FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
252345eb
JM
954.endfor
955
e4c5f643 956${SRCOUT3}.include <bsd.prog.mk>
252345eb
JM
957EOF
958
959 close(FOUT);
960}