1 #!/usr/local/bin/perl -wC
9 use Digest::SHA qw(sha1_hex);
10 require "charmaps.pm";
14 print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n";
18 my $DEFENCODING = "UTF-8";
22 my $UNIDATADIR = undef;
27 my $result = GetOptions (
28 "cldr=s" => \$CLDRDIR,
29 "unidata=s" => \$UNIDATADIR,
41 my %translations = ();
43 my %alternativemonths = ();
48 get_unidata($UNIDATADIR);
49 get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm");
50 get_encodings("$ETCDIR/charmaps");
53 tie(%keys, "Tie::IxHash");
54 tie(%hashtable, "Tie::IxHash");
57 "monetdef" => "LC_MONETARY",
58 "timedef" => "LC_TIME",
59 "msgdef" => "LC_MESSAGES",
60 "numericdef" => "LC_NUMERIC",
61 "colldef" => "LC_COLLATE",
62 "ctypedef" => "LC_CTYPE"
66 mdorder => \&callback_mdorder,
67 altmon => \&callback_altmon,
68 cformat => \&callback_cformat,
75 "decimal_point" => "decimal_point",
76 "thousands_sep" => "thousands_sep",
77 "grouping" => "grouping",
80 "int_curr_symbol" => "int_curr_symbol (last character always " .
82 "currency_symbol" => "currency_symbol",
83 "mon_decimal_point" => "mon_decimal_point",
84 "mon_thousands_sep" => "mon_thousands_sep",
85 "mon_grouping" => "mon_grouping",
86 "positive_sign" => "positive_sign",
87 "negative_sign" => "negative_sign",
88 "int_frac_digits" => "int_frac_digits",
89 "frac_digits" => "frac_digits",
90 "p_cs_precedes" => "p_cs_precedes",
91 "p_sep_by_space" => "p_sep_by_space",
92 "n_cs_precedes" => "n_cs_precedes",
93 "n_sep_by_space" => "n_sep_by_space",
94 "p_sign_posn" => "p_sign_posn",
95 "n_sign_posn" => "n_sign_posn",
98 "yesexpr" => "yesexpr",
100 "yesstr" => "yesstr",
104 "abmon" => "Short month names",
105 "mon" => "Long month names (as in a date)",
106 "abday" => "Short weekday names",
107 "day" => "Long weekday names",
112 "d_t_fmt" => "date_fmt",
113 "altmon" => "Long month names (without case ending)",
114 "md_order" => "md_order",
115 "t_fmt_ampm" => "ampm_fmt",
118 if ($TYPE eq "colldef") {
119 transform_collation();
123 if ($TYPE eq "ctypedef") {
128 if ($TYPE eq "numericdef") {
130 "decimal_point" => "s",
131 "thousands_sep" => "s",
139 if ($TYPE eq "monetdef") {
141 "int_curr_symbol" => "s",
142 "currency_symbol" => "s",
143 "mon_decimal_point" => "s",
144 "mon_thousands_sep" => "s",
145 "mon_grouping" => "ai",
146 "positive_sign" => "s",
147 "negative_sign" => "s",
148 "int_frac_digits" => "i",
149 "frac_digits" => "i",
150 "p_cs_precedes" => "i",
151 "p_sep_by_space" => "i",
152 "n_cs_precedes" => "i",
153 "n_sep_by_space" => "i",
154 "p_sign_posn" => "i",
162 if ($TYPE eq "msgdef") {
174 if ($TYPE eq "timedef") {
182 "c_fmt" => "<cformat<d_t_fmt<s",
186 "altmon" => "<altmon<mon<as",
187 "md_order" => "<mdorder<d_fmt<s",
195 sub callback_cformat {
202 sub callback_mdorder {
204 return undef if (!defined $s);
209 sub callback_altmon {
210 # if the language/country is known in %alternative months then
211 # return that, otherwise repeat mon
214 if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
215 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
223 return join(";",@cleaned);
229 ############################
232 my $directory = shift;
234 open(FIN, "$directory/UnicodeData.txt")
235 or die("Cannot open $directory/UnicodeData.txt");;
240 foreach my $l (@lines) {
241 my @a = split(/;/, $l);
243 $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name
244 $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code
259 foreach my $l (@lines) {
261 next if ($l =~ /^\#/);
264 if ($l eq "CHARMAP") {
269 next if (!$incharmap);
270 last if ($l eq "END CHARMAP");
272 $l =~ /^<([^\s]+)>\s+(.*)/;
275 $k =~ s/_/ /g; # unicode char string
276 $v =~ s/\\x//g; # UTF-8 char code
279 $utf8aliases{$k} = $prev_k if ($prev_v eq $v);
288 foreach my $e (sort(keys(%encodings))) {
289 if (!open(FIN, "$dir/$e.TXT")) {
290 print "Cannot open charmap for $e\n";
298 foreach my $l (@lines) {
300 next if ($l =~ /^\#/);
303 my @a = split(" ", $l);
305 $a[0] =~ s/^0[xX]//; # local char code
306 $a[1] =~ s/^0[xX]//; # unicode char code
307 $convertors{$e}{uc($a[1])} = uc($a[0]);
313 my %data = get_xmldata($ETCDIR);
314 %languages = %{$data{L}};
315 %translations = %{$data{T}};
316 %alternativemonths = %{$data{AM}};
317 %encodings = %{$data{E}};
319 return if (!defined $doonly);
321 my @a = split(/_/, $doonly);
332 print Dumper(@filter);
336 sub transform_ctypes {
337 foreach my $l (sort keys(%languages)) {
338 foreach my $f (sort keys(%{$languages{$l}})) {
339 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
340 next if ($#filter == 2 && ($filter[0] ne $l
341 || $filter[1] ne $f || $filter[2] ne $c));
342 next if (defined $languages{$l}{$f}{definitions}
343 && $languages{$l}{$f}{definitions} !~ /$TYPE/);
344 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
347 $file .= $f . "_" if ($f ne "x");
351 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
352 $filename = "$ETCDIR/$file.$DEFENCODING.src"
355 && defined $languages{$l}{$f}{fallback}) {
356 $file = $languages{$l}{$f}{fallback};
357 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
359 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
361 if (! -f $filename) {
363 "Cannot open $file.$DEFENCODING.src or fallback\n";
366 open(FIN, "$filename");
367 print "Reading from $filename for ${l}_${f}_${c}\n";
368 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
373 if ((/^comment_char\s/) || (/^escape_char\s/)){
376 if (/^LC_CTYPE/../^END LC_CTYPE/) {
381 $shex = sha1_hex(join("\n", @lines));
382 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
383 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
384 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
386 # Warning: Do not edit. This file is automatically extracted from the
387 # tools in /usr/src/tools/tools/locale. The data is obtained from the
388 # CLDR project, obtained from http://cldr.unicode.org/
389 # -----------------------------------------------------------------------------
394 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
395 next if ($enc eq $DEFENCODING);
396 copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
397 "$TYPE.draft/$actfile.$enc.src");
398 $uhex = sha1_hex(join("\n", @lines) . $enc);
399 $languages{$l}{$f}{data}{$c}{$enc} = $uhex;
400 $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
408 sub transform_collation {
409 foreach my $l (sort keys(%languages)) {
410 foreach my $f (sort keys(%{$languages{$l}})) {
411 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
412 next if ($#filter == 2 && ($filter[0] ne $l
413 || $filter[1] ne $f || $filter[2] ne $c));
414 next if (defined $languages{$l}{$f}{definitions}
415 && $languages{$l}{$f}{definitions} !~ /$TYPE/);
416 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
419 $file .= $f . "_" if ($f ne "x");
423 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
424 $filename = "$ETCDIR/$file.$DEFENCODING.src"
427 && defined $languages{$l}{$f}{fallback}) {
428 $file = $languages{$l}{$f}{fallback};
429 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
431 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
433 if (! -f $filename) {
435 "Cannot open $file.$DEFENCODING.src or fallback\n";
438 open(FIN, "$filename");
439 print "Reading from $filename for ${l}_${f}_${c}\n";
440 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
444 if ((/^comment_char\s/) || (/^escape_char\s/)){
447 if (/^LC_COLLATE/../^END LC_COLLATE/) {
453 $shex = sha1_hex(join("\n", @lines));
454 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
455 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
456 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
458 # Warning: Do not edit. This file is automatically extracted from the
459 # tools in /usr/src/tools/tools/locale. The data is obtained from the
460 # CLDR project, obtained from http://cldr.unicode.org/
461 # -----------------------------------------------------------------------------
466 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
467 next if ($enc eq $DEFENCODING);
468 copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
469 "$TYPE.draft/$actfile.$enc.src");
470 $languages{$l}{$f}{data}{$c}{$enc} = $shex;
471 $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
479 foreach my $l (sort keys(%languages)) {
480 foreach my $f (sort keys(%{$languages{$l}})) {
481 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
482 next if ($#filter == 2 && ($filter[0] ne $l
483 || $filter[1] ne $f || $filter[2] ne $c));
484 next if (defined $languages{$l}{$f}{definitions}
485 && $languages{$l}{$f}{definitions} !~ /$TYPE/);
487 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
490 $file .= $f . "_" if ($f ne "x");
493 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
494 $filename = "$ETCDIR/$file.$DEFENCODING.src"
497 && defined $languages{$l}{$f}{fallback}) {
498 $file = $languages{$l}{$f}{fallback};
499 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
501 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
503 if (! -f $filename) {
505 "Cannot open $file.$DEFENCODING.src or fallback\n";
508 open(FIN, "$filename");
509 print "Reading from $filename for ${l}_${f}_${c}\n";
510 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
515 foreach my $k (keys(%keys)) {
516 foreach my $line (@lines) {
518 next if (!$continue && $line !~ /^$k\s/);
525 $values{$l}{$c}{$k} = ""
526 if (!defined $values{$l}{$c}{$k});
528 $continue = ($line =~ /\/$/);
529 $line =~ s/\/$// if ($continue);
531 while ($line =~ /_/) {
533 s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
535 die "_ in data - $line" if ($line =~ /_/);
536 $values{$l}{$c}{$k} .= $line;
538 last if (!$continue);
554 # Conversion to UTF-8 can be done from the Unicode name to
555 # the UTF-8 character code.
558 die "Cannot convert $s in $e (charmap)" if (!defined $v);
561 # Conversion to these encodings can be done from the Unicode
562 # name to Unicode code to the encodings code.
565 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
566 $ucc = $ucd{name2code}{$utf8aliases{$s}}
569 && defined $ucd{name2code}{$utf8aliases{$s}});
572 if (defined $translations{$e}{$s}{hex}) {
573 $v = $translations{$e}{$s}{hex};
575 } elsif (defined $translations{$e}{$s}{ucc}) {
576 $ucc = $translations{$e}{$s}{ucc};
580 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
581 $v = $convertors{$e}{$ucc} if (!defined $v);
583 $v = $translations{$e}{$s}{hex}
584 if (!defined $v && defined $translations{$e}{$s}{hex});
586 if (!defined $v && defined $translations{$e}{$s}{unicode}) {
587 my $ucn = $translations{$e}{$s}{unicode};
588 $ucc = $ucd{name2code}{$ucn}
589 if (defined $ucd{name2code}{$ucn});
590 $ucc = $ucd{name2code}{$utf8aliases{$ucn}}
592 && defined $ucd{name2code}{$utf8aliases{$ucn}});
593 $v = $convertors{$e}{$ucc};
596 die "Cannot convert $s in $e (charmap)" if (!defined $v);
599 return pack("C", hex($v)) if (length($v) == 2);
600 return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
601 if (length($v) == 4);
602 return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
603 hex(substr($v, 4, 2))) if (length($v) == 6);
604 print STDERR "Cannot convert $e $s\n";
605 return "length = " . length($v);
613 return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
618 foreach my $l (sort keys(%languages)) {
619 foreach my $f (sort keys(%{$languages{$l}})) {
620 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
621 next if ($#filter == 2 && ($filter[0] ne $l
622 || $filter[1] ne $f || $filter[2] ne $c));
623 next if (defined $languages{$l}{$f}{definitions}
624 && $languages{$l}{$f}{definitions} !~ /$TYPE/);
625 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
626 if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
627 print "Skipping ${l}_" .
628 ($f eq "x" ? "" : "${f}_") .
633 $file .= "_" . $f if ($f ne "x");
635 print "Writing to $file in $enc\n";
637 if ($enc ne $DEFENCODING &&
638 !defined $convertors{$enc}) {
639 print "Failed! Cannot convert to $enc.\n";
643 open(FOUT, ">$TYPE.draft/$file.$enc.new");
647 # Warning: Do not edit. This file is automatically generated from the
648 # tools in /usr/src/tools/tools/locale. The data is obtained from the
649 # CLDR project, obtained from http://cldr.unicode.org/
650 # -----------------------------------------------------------------------------
652 foreach my $k (keys(%keys)) {
655 die("Unknown $k in \%DESC")
656 if (!defined $DESC{$k});
658 $output .= "#\n# $DESC{$k}\n";
660 # Replace one row with another
668 $callback{data}{c} = $c;
669 $callback{data}{k} = $k;
670 $callback{data}{l} = $l;
671 $callback{data}{e} = $enc;
672 my @a = split(/\</, substr($f, 1));
674 &{$callback{$a[0]}}($values{$l}{$c}{$a[1]});
675 $values{$l}{$c}{$k} = $rv;
677 $callback{data} = ();
680 my $v = $values{$l}{$c}{$k};
681 $v = "undef" if (!defined $v);
695 while ($v =~ /^(.*?)<(.*?)>(.*)/) {
700 my $rv = decodecldr($enc, $cm);
701 # $rv = translate($enc, $cm)
705 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
710 $v = $p1 . $rv . $p3;
716 foreach my $v (split(/;/, $v)) {
720 while ($v =~ /^(.*?)<(.*?)>(.*)/) {
728 # $rv = translate($enc,
733 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
749 $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
750 $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
751 print FOUT "$output# EOF\n";
755 rename("$TYPE.draft/$file.$enc.new",
756 "$TYPE.draft/$file.$enc.src");
758 rename("$TYPE.draft/$file.$enc.new",
759 "$TYPE.draft/$file.$enc.failed");
768 return if ($#filter > -1);
769 print "Creating Makefile for $TYPE\n";
773 if ($TYPE eq "colldef") {
774 $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
775 "\t-f \${MAPLOC}/map.UTF-8 " .
776 "\${.OBJDIR}/\${.IMPSRC:T:R}";
777 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
778 "locale/etc/final-maps\n";
779 $SRCOUT2 = "LC_COLLATE";
781 elsif ($TYPE eq "ctypedef") {
782 $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
783 "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:C/^.*\\.//} " .
784 "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
786 $SRCOUT2 = "LC_CTYPE";
787 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
788 "locale/etc/final-maps\n";
791 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
795 open(FOUT, ">$TYPE.draft/Makefile");
797 # Warning: Do not edit. This file is automatically generated from the
798 # tools in /usr/src/tools/tools/locale.
800 LOCALEDIR= /usr/share/locale
801 FILESNAME= $FILESNAMES{$TYPE}
802 .SUFFIXES: .src .${SRCOUT2}
811 foreach my $hash (keys(%hashtable)) {
812 # For colldef, weight LOCALES to UTF-8
813 # Sort as upper-case and reverse to achieve it
814 # Make en_US, ru_RU, and ca_AD preferred
816 if ($TYPE eq "colldef") {
818 if ($a eq 'en_x_US.UTF-8' ||
819 $a eq 'ru_x_RU.UTF-8' ||
820 $a eq 'ca_x_AD.UTF-8') { return -1; }
821 elsif ($b eq 'en_x_US.UTF-8' ||
822 $b eq 'ru_x_RU.UTF-8' ||
823 $b eq 'ca_x_AD.UTF-8') { return 1; }
824 else { return uc($b) cmp uc($a); }
825 } keys(%{$hashtable{$hash}});
826 } elsif ($TYPE eq "ctypedef") {
828 if ($a =~ /^en_x_US/ ||
829 $a =~ /^en_x_GB.ISO8859-15/ ||
830 $a =~ /^ru_x_RU/) { return -1; }
831 elsif ($b =~ /^en_x_US/ ||
832 $b =~ /^en_x_GB.ISO8859-15/ ||
833 $b =~ /ru_x_RU/) { return 1; }
834 else { return uc($b) cmp uc($a); }
836 if ($a eq 'en_x_US.UTF-8') { return -1; }
837 elsif ($b eq 'en_x_US.UTF-8') { return 1; }
838 else { return uc($b) cmp uc($a); }
839 } keys(%{$hashtable{$hash}});
841 @files = sort(keys(%{$hashtable{$hash}}));
844 my $link = shift(@files);
845 $link =~ s/_x_/_/; # strip family if none there
846 foreach my $file (@files) {
847 my @a = split(/_/, $file);
848 my @b = split(/\./, $a[-1]);
850 print FOUT "SAME+=\t\t$link:$file\n";
851 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
856 foreach my $l (sort keys(%languages)) {
857 foreach my $f (sort keys(%{$languages{$l}})) {
858 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
859 next if ($#filter == 2 && ($filter[0] ne $l
860 || $filter[1] ne $f || $filter[2] ne $c));
861 next if (defined $languages{$l}{$f}{definitions}
862 && $languages{$l}{$f}{definitions} !~ /$TYPE/);
863 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
864 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
865 print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
869 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
871 $file .= $f . "_" if ($f ne "x");
873 next if (!defined $languages{$l}{$f}{data}{$c}{$e});
874 print FOUT "LOCALES+=\t$file.$e\n";
877 if (defined $languages{$l}{$f}{nc_link}) {
878 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
880 $file .= $f . "_" if ($f ne "x");
882 print FOUT "SAME+=\t\t$file.$e:$languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
886 if (defined $languages{$l}{$f}{e_link}) {
887 foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
888 my @a = split(/:/, $el);
890 $file .= $f . "_" if ($f ne "x");
892 print FOUT "SAME+=\t\t$file.$a[0]:$file.$a[1]\t# legacy (same charset)\n";
902 FILES= \${LOCALES:S/\$/.${SRCOUT2}/}
903 CLEANFILES= \${FILES}
906 SYMLINKS+= ../\${f:C/:.*\$//}/\${FILESNAME} \${LOCALEDIR}/\${f:C/^.*://}
909 .for f in \${LOCALES}
910 FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
913 .include <bsd.prog.mk>