Merge branch 'vendor/LESS'
[dragonfly.git] / tools / tools / locale / tools / cldr2def.pl
1 #!/usr/local/bin/perl -wC
2
3 use strict;
4 use File::Copy;
5 use XML::Parser;
6 use Tie::IxHash;
7 use Data::Dumper;
8 use Getopt::Long;
9 use Digest::SHA qw(sha1_hex);
10 require "charmaps.pm";
11
12
13 if ($#ARGV < 2) {
14         print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n";
15         exit(1);
16 }
17
18 my $DEFENCODING = "UTF-8";
19 my @filter = ();
20
21 my $CLDRDIR = undef;
22 my $UNIDATADIR = undef;
23 my $ETCDIR = undef;
24 my $TYPE = undef;
25 my $doonly = undef;
26
27 my $result = GetOptions (
28                 "cldr=s"        => \$CLDRDIR,
29                 "unidata=s"     => \$UNIDATADIR,
30                 "etc=s"         => \$ETCDIR,
31                 "type=s"        => \$TYPE,
32                 "lc=s"          => \$doonly
33             );
34
35 my %convertors = ();
36
37 my %ucd = ();
38 my %values = ();
39 my %hashtable = ();
40 my %languages = ();
41 my %translations = ();
42 my %encodings = ();
43 my %alternativemonths = ();
44 get_languages();
45
46 my %utf8map = ();
47 my %utf8aliases = ();
48 get_unidata($UNIDATADIR);
49 get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm");
50 get_encodings("$ETCDIR/charmaps");
51
52 my %keys = ();
53 tie(%keys, "Tie::IxHash");
54 tie(%hashtable, "Tie::IxHash");
55
56 my %FILESNAMES = (
57         "monetdef"      => "LC_MONETARY",
58         "timedef"       => "LC_TIME",
59         "msgdef"        => "LC_MESSAGES",
60         "numericdef"    => "LC_NUMERIC",
61         "colldef"       => "LC_COLLATE",
62         "ctypedef"      => "LC_CTYPE"
63 );
64
65 my %callback = (
66         mdorder => \&callback_mdorder,
67         altmon => \&callback_altmon,
68         cformat => \&callback_cformat,
69         cbabmon => \&callback_abmon,
70         data => undef,
71 );
72
73 my %DESC = (
74
75         # numericdef
76         "decimal_point" => "decimal_point",
77         "thousands_sep" => "thousands_sep",
78         "grouping"      => "grouping",
79
80         # monetdef
81         "int_curr_symbol"       => "int_curr_symbol (last character always " .
82                                    "SPACE)",
83         "currency_symbol"       => "currency_symbol",
84         "mon_decimal_point"     => "mon_decimal_point",
85         "mon_thousands_sep"     => "mon_thousands_sep",
86         "mon_grouping"          => "mon_grouping",
87         "positive_sign"         => "positive_sign",
88         "negative_sign"         => "negative_sign",
89         "int_frac_digits"       => "int_frac_digits",
90         "frac_digits"           => "frac_digits",
91         "p_cs_precedes"         => "p_cs_precedes",
92         "p_sep_by_space"        => "p_sep_by_space",
93         "n_cs_precedes"         => "n_cs_precedes",
94         "n_sep_by_space"        => "n_sep_by_space",
95         "p_sign_posn"           => "p_sign_posn",
96         "n_sign_posn"           => "n_sign_posn",
97
98         # msgdef
99         "yesexpr"       => "yesexpr",
100         "noexpr"        => "noexpr",
101         "yesstr"        => "yesstr",
102         "nostr"         => "nostr",
103
104         # timedef
105         "abmon"         => "Short month names",
106         "mon"           => "Long month names (as in a date)",
107         "abday"         => "Short weekday names",
108         "day"           => "Long weekday names",
109         "t_fmt"         => "X_fmt",
110         "d_fmt"         => "x_fmt",
111         "c_fmt"         => "c_fmt",
112         "am_pm"         => "AM/PM",
113         "d_t_fmt"       => "date_fmt",
114         "altmon"        => "Long month names (without case ending)",
115         "md_order"      => "md_order",
116         "t_fmt_ampm"    => "ampm_fmt",
117 );
118
119 if ($TYPE eq "colldef") {
120         transform_collation();
121         make_makefile();
122 }
123
124 if ($TYPE eq "ctypedef") {
125         transform_ctypes();
126         make_makefile();
127 }
128
129 if ($TYPE eq "numericdef") {
130         %keys = (
131             "decimal_point"     => "s",
132             "thousands_sep"     => "s",
133             "grouping"          => "ai",
134         );
135         get_fields();
136         print_fields();
137         make_makefile();
138 }
139
140 if ($TYPE eq "monetdef") {
141         %keys = (
142             "int_curr_symbol"   => "s",
143             "currency_symbol"   => "s",
144             "mon_decimal_point" => "s",
145             "mon_thousands_sep" => "s",
146             "mon_grouping"      => "ai",
147             "positive_sign"     => "s",
148             "negative_sign"     => "s",
149             "int_frac_digits"   => "i",
150             "frac_digits"       => "i",
151             "p_cs_precedes"     => "i",
152             "p_sep_by_space"    => "i",
153             "n_cs_precedes"     => "i",
154             "n_sep_by_space"    => "i",
155             "p_sign_posn"       => "i",
156             "n_sign_posn"       => "i"
157         );
158         get_fields();
159         print_fields();
160         make_makefile();
161 }
162
163 if ($TYPE eq "msgdef") {
164         %keys = (
165             "yesexpr"           => "s",
166             "noexpr"            => "s",
167             "yesstr"            => "s",
168             "nostr"             => "s"
169         );
170         get_fields();
171         print_fields();
172         make_makefile();
173 }
174
175 if ($TYPE eq "timedef") {
176         %keys = (
177             "abmon"             => "<cbabmon<abmon<as",
178             "mon"               => "as",
179             "abday"             => "as",
180             "day"               => "as",
181             "t_fmt"             => "s",
182             "d_fmt"             => "s",
183             "c_fmt"             => "<cformat<d_t_fmt<s",
184             "am_pm"             => "as",
185             "d_fmt"             => "s",
186             "d_t_fmt"           => "s",
187             "altmon"            => "<altmon<mon<as",
188             "md_order"          => "<mdorder<d_fmt<s",
189             "t_fmt_ampm"        => "s",
190         );
191         get_fields();
192         print_fields();
193         make_makefile();
194 }
195
196 sub callback_cformat {
197         my $s = shift;
198         $s =~ s/ %Z//;
199         $s =~ s/ %z//;
200         return $s;
201 };
202
203 sub callback_mdorder {
204         my $s = shift;
205         return undef if (!defined $s);
206         $s =~ s/[^dm]//g;
207         return $s;
208 };
209
210 sub callback_altmon {
211         # if the language/country is known in %alternative months then
212         # return that, otherwise repeat mon
213         my $s = shift;
214
215         if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
216                 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
217                 my @cleaned;
218                 foreach (@altnames)
219                 {
220                         $_ =~ s/^\s+//;
221                         $_ =~ s/\s+$//;
222                         push @cleaned, $_;
223                 }
224                 return join(";",@cleaned);
225         }
226
227         return $s;
228 }
229
230 sub callback_abmon {
231         # for specified CJK locales, pad result with a space to enable
232         # columns to line up (style established in FreeBSD in 2001)
233         my $s = shift;
234         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
235
236         if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' ||
237             $nl eq 'zh_HK' || $nl eq 'zh_TW') {
238                 my @monthnames = split(";", $s);
239                 my @cleaned;
240                 foreach (@monthnames)
241                 {
242                         if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ ||
243                            ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/))
244                         {
245                                 $_ =~ s/^"/"<space>/;
246                         }
247                         push @cleaned, $_;
248                 }
249                 return join(";",@cleaned);
250         }
251         return $s;
252 }
253
254 ############################
255
256 sub get_unidata {
257         my $directory = shift;
258
259         open(FIN, "$directory/UnicodeData.txt")
260             or die("Cannot open $directory/UnicodeData.txt");;
261         my @lines = <FIN>;
262         chomp(@lines);
263         close(FIN);
264
265         foreach my $l (@lines) {
266                 my @a = split(/;/, $l);
267
268                 $ucd{code2name}{"$a[0]"} = $a[1];       # Unicode name
269                 $ucd{name2code}{"$a[1]"} = $a[0];       # Unicode code
270         }
271 }
272
273 sub get_utf8map {
274         my $file = shift;
275
276         open(FIN, $file);
277         my @lines = <FIN>;
278         close(FIN);
279         chomp(@lines);
280
281         my $prev_k = undef;
282         my $prev_v = "";
283         my $incharmap = 0;
284         foreach my $l (@lines) {
285                 $l =~ s/\r//;
286                 next if ($l =~ /^\#/);
287                 next if ($l eq "");
288
289                 if ($l eq "CHARMAP") {
290                         $incharmap = 1;
291                         next;
292                 }
293
294                 next if (!$incharmap);
295                 last if ($l eq "END CHARMAP");
296
297                 $l =~ /^<([^\s]+)>\s+(.*)/;
298                 my $k = $1;
299                 my $v = $2;
300                 $k =~ s/_/ /g;          # unicode char string
301                 $v =~ s/\\x//g;         # UTF-8 char code
302                 $utf8map{$k} = $v;
303
304                 $utf8aliases{$k} = $prev_k if ($prev_v eq $v);
305
306                 $prev_v = $v;
307                 $prev_k = $k;
308         }
309 }
310
311 sub get_encodings {
312         my $dir = shift;
313         foreach my $e (sort(keys(%encodings))) {
314                 if (!open(FIN, "$dir/$e.TXT")) {
315                         print "Cannot open charmap for $e\n";
316                         next;
317
318                 }
319                 $encodings{$e} = 1;
320                 my @lines = <FIN>;
321                 close(FIN);
322                 chomp(@lines);
323                 foreach my $l (@lines) {
324                         $l =~ s/\r//;
325                         next if ($l =~ /^\#/);
326                         next if ($l eq "");
327
328                         my @a = split(" ", $l);
329                         next if ($#a < 1);
330                         $a[0] =~ s/^0[xX]//;    # local char code
331                         $a[1] =~ s/^0[xX]//;    # unicode char code
332                         $convertors{$e}{uc($a[1])} = uc($a[0]);
333                 }
334         }
335 }
336
337 sub get_languages {
338         my %data = get_xmldata($ETCDIR);
339         %languages = %{$data{L}}; 
340         %translations = %{$data{T}}; 
341         %alternativemonths = %{$data{AM}}; 
342         %encodings = %{$data{E}}; 
343
344         return if (!defined $doonly);
345
346         my @a = split(/_/, $doonly);
347         if ($#a == 1) {
348                 $filter[0] = $a[0];
349                 $filter[1] = "x";
350                 $filter[2] = $a[1];
351         } elsif ($#a == 2) {
352                 $filter[0] = $a[0];
353                 $filter[1] = $a[1];
354                 $filter[2] = $a[2];
355         }
356
357         print Dumper(@filter);
358         return;
359 }
360
361 sub transform_ctypes {
362         foreach my $l (sort keys(%languages)) {
363         foreach my $f (sort keys(%{$languages{$l}})) {
364         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
365                 next if ($#filter == 2 && ($filter[0] ne $l
366                     || $filter[1] ne $f || $filter[2] ne $c));
367                 next if (defined $languages{$l}{$f}{definitions}
368                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
369                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
370                 my $file;
371                 $file = $l . "_";
372                 $file .= $f . "_" if ($f ne "x");
373                 $file .= $c;
374                 my $actfile = $file;
375
376                 my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src";
377                 if (! -f $filename) {
378                         print STDERR "Cannot open $filename\n";
379                         next;
380                 }
381                 open(FIN, "$filename");
382                 print "Reading from $filename for ${l}_${f}_${c}\n";
383                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
384                 my @lines;
385                 my $shex;
386                 my $uhex;
387                 while (<FIN>) {
388                         push @lines, $_;
389                 }
390                 close(FIN);
391                 $shex = sha1_hex(join("\n", @lines));
392                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
393                 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
394                 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
395                 print FOUT @lines;
396                 close(FOUT);
397                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
398                         next if ($enc eq $DEFENCODING);
399                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
400                         if (! -f $filename) {
401                                 print STDERR "Cannot open $filename\n";
402                                 next;
403                         }
404                         @lines = ();
405                         open(FIN, "$filename");
406                         while (<FIN>) {
407                                 if ((/^comment_char\s/) || (/^escape_char\s/)){
408                                         push @lines, $_;
409                                 }
410                                 if (/^LC_CTYPE/../^END LC_CTYPE/) {
411                                         push @lines, $_;
412                                 }
413                         }
414                         close(FIN);
415                         $uhex = sha1_hex(join("\n", @lines) . $enc);
416                         $languages{$l}{$f}{data}{$c}{$enc} = $uhex;
417                         $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
418                         open(FOUT, ">$TYPE.draft/$actfile.$enc.src");
419                         print FOUT <<EOF;
420 # Warning: Do not edit. This file is automatically extracted from the
421 # tools in /usr/src/tools/tools/locale. The data is obtained from the
422 # CLDR project, obtained from http://cldr.unicode.org/
423 # -----------------------------------------------------------------------------
424 EOF
425                         print FOUT @lines;
426                         close(FOUT);
427                 }
428         }
429         }
430         }
431 }
432
433
434 sub transform_collation {
435         foreach my $l (sort keys(%languages)) {
436         foreach my $f (sort keys(%{$languages{$l}})) {
437         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
438                 next if ($#filter == 2 && ($filter[0] ne $l
439                     || $filter[1] ne $f || $filter[2] ne $c));
440                 next if (defined $languages{$l}{$f}{definitions}
441                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
442                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
443                 my $file;
444                 $file = $l . "_";
445                 $file .= $f . "_" if ($f ne "x");
446                 $file .= $c;
447                 my $actfile = $file;
448
449                 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
450                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
451                     if (! -f $filename);
452                 if (! -f $filename
453                  && defined $languages{$l}{$f}{fallback}) {
454                         $file = $languages{$l}{$f}{fallback};
455                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
456                 }
457                 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
458                     if (! -f $filename);
459                 if (! -f $filename) {
460                         print STDERR
461                             "Cannot open $file.$DEFENCODING.src or fallback\n";
462                         next;
463                 }
464                 open(FIN, "$filename");
465                 print "Reading from $filename for ${l}_${f}_${c}\n";
466                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
467                 my @lines;
468                 my $shex;
469                 while (<FIN>) {
470                         if ((/^comment_char\s/) || (/^escape_char\s/)){
471                                 push @lines, $_;
472                         }
473                         if (/^LC_COLLATE/../^END LC_COLLATE/) {
474                                 $_ =~ s/[ ]+/ /g;
475                                 push @lines, $_;
476                         }
477                 }
478                 close(FIN);
479                 $shex = sha1_hex(join("\n", @lines));
480                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
481                 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
482                 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
483                 print FOUT <<EOF;
484 # Warning: Do not edit. This file is automatically extracted from the
485 # tools in /usr/src/tools/tools/locale. The data is obtained from the
486 # CLDR project, obtained from http://cldr.unicode.org/
487 # -----------------------------------------------------------------------------
488 EOF
489                 print FOUT @lines;
490                 close(FOUT);
491
492                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
493                         next if ($enc eq $DEFENCODING);
494                         copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
495                               "$TYPE.draft/$actfile.$enc.src");
496                         $languages{$l}{$f}{data}{$c}{$enc} = $shex;
497                         $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
498                 }
499         }
500         }
501         }
502 }
503
504 sub get_fields {
505         foreach my $l (sort keys(%languages)) {
506         foreach my $f (sort keys(%{$languages{$l}})) {
507         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
508                 next if ($#filter == 2 && ($filter[0] ne $l
509                     || $filter[1] ne $f || $filter[2] ne $c));
510                 next if (defined $languages{$l}{$f}{definitions}
511                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
512
513                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
514                 my $file;
515                 $file = $l . "_";
516                 $file .= $f . "_" if ($f ne "x");
517                 $file .= $c;
518
519                 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
520                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
521                     if (! -f $filename);
522                 if (! -f $filename
523                  && defined $languages{$l}{$f}{fallback}) {
524                         $file = $languages{$l}{$f}{fallback};
525                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
526                 }
527                 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
528                     if (! -f $filename);
529                 if (! -f $filename) {
530                         print STDERR
531                             "Cannot open $file.$DEFENCODING.src or fallback\n";
532                         next;
533                 }
534                 open(FIN, "$filename");
535                 print "Reading from $filename for ${l}_${f}_${c}\n";
536                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
537                 my @lines = <FIN>;
538                 chomp(@lines);
539                 close(FIN);
540                 my $continue = 0;
541                 foreach my $k (keys(%keys)) {
542                         foreach my $line (@lines) {
543                                 $line =~ s/\r//;
544                                 next if (!$continue && $line !~ /^$k\s/);
545                                 if ($continue) {
546                                         $line =~ s/^\s+//;
547                                 } else {
548                                         $line =~ s/^$k\s+//;
549                                 }
550
551                                 $values{$l}{$c}{$k} = ""
552                                         if (!defined $values{$l}{$c}{$k});
553
554                                 $continue = ($line =~ /\/$/);
555                                 $line =~ s/\/$// if ($continue);
556
557                                 while ($line =~ /_/) {
558                                         $line =~
559                                             s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
560                                 }
561                                 die "_ in data - $line" if ($line =~ /_/);
562                                 $values{$l}{$c}{$k} .= $line;
563
564                                 last if (!$continue);
565                         }
566                 }
567         }
568         }
569         }
570 }
571
572 sub decodecldr {
573         my $e = shift;
574         my $s = shift;
575
576         my $v = undef;
577
578         if ($e eq "UTF-8") {
579                 #
580                 # Conversion to UTF-8 can be done from the Unicode name to
581                 # the UTF-8 character code.
582                 #
583                 $v = $utf8map{$s};
584                 die "Cannot convert $s in $e (charmap)" if (!defined $v);
585         } else {
586                 #
587                 # Conversion to these encodings can be done from the Unicode
588                 # name to Unicode code to the encodings code.
589                 #
590                 my $ucc = undef;
591                 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
592                 $ucc = $ucd{name2code}{$utf8aliases{$s}}
593                         if (!defined $ucc
594                          && $utf8aliases{$s}
595                          && defined $ucd{name2code}{$utf8aliases{$s}});
596
597                 if (!defined $ucc) {
598                         if (defined $translations{$e}{$s}{hex}) {
599                                 $v = $translations{$e}{$s}{hex};
600                                 $ucc = 0;
601                         } elsif (defined $translations{$e}{$s}{ucc}) {
602                                 $ucc = $translations{$e}{$s}{ucc};
603                         }
604                 }
605
606                 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
607                 $v = $convertors{$e}{$ucc} if (!defined $v);
608
609                 $v = $translations{$e}{$s}{hex}
610                         if (!defined $v && defined $translations{$e}{$s}{hex});
611
612                 if (!defined $v && defined $translations{$e}{$s}{unicode}) {
613                         my $ucn = $translations{$e}{$s}{unicode};
614                         $ucc = $ucd{name2code}{$ucn}
615                                 if (defined $ucd{name2code}{$ucn});
616                         $ucc = $ucd{name2code}{$utf8aliases{$ucn}}
617                                 if (!defined $ucc
618                                  && defined $ucd{name2code}{$utf8aliases{$ucn}});
619                         $v = $convertors{$e}{$ucc};
620                 }
621
622                 die "Cannot convert $s in $e (charmap)" if (!defined $v);
623         }
624
625         return pack("C", hex($v)) if (length($v) == 2);
626         return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
627                 if (length($v) == 4);
628         return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
629             hex(substr($v, 4, 2))) if (length($v) == 6);
630         print STDERR "Cannot convert $e $s\n";
631         return "length = " . length($v);
632
633 }
634
635 sub translate {
636         my $enc = shift;
637         my $v = shift;
638
639         return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
640         return undef;
641 }
642
643 sub print_fields {
644         foreach my $l (sort keys(%languages)) {
645         foreach my $f (sort keys(%{$languages{$l}})) {
646         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
647                 next if ($#filter == 2 && ($filter[0] ne $l
648                     || $filter[1] ne $f || $filter[2] ne $c));
649                 next if (defined $languages{$l}{$f}{definitions}
650                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
651                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
652                         if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
653                                 print "Skipping ${l}_" .
654                                     ($f eq "x" ? "" : "${f}_") .
655                                     "${c} - not read\n";
656                                 next;
657                         }
658                         my $file = $l;
659                         $file .= "_" . $f if ($f ne "x");
660                         $file .= "_" . $c;
661                         print "Writing to $file in $enc\n";
662
663                         if ($enc ne $DEFENCODING &&
664                             !defined $convertors{$enc}) {
665                                 print "Failed! Cannot convert to $enc.\n";
666                                 next;
667                         };
668
669                         open(FOUT, ">$TYPE.draft/$file.$enc.new");
670                         my $okay = 1;
671                         my $output = "";
672                         print FOUT <<EOF;
673 # Warning: Do not edit. This file is automatically generated from the
674 # tools in /usr/src/tools/tools/locale. The data is obtained from the
675 # CLDR project, obtained from http://cldr.unicode.org/
676 # -----------------------------------------------------------------------------
677 EOF
678                         foreach my $k (keys(%keys)) {
679                                 my $f = $keys{$k};
680
681                                 die("Unknown $k in \%DESC")
682                                         if (!defined $DESC{$k});
683
684                                 $output .= "#\n# $DESC{$k}\n";
685
686                                 # Replace one row with another
687                                 if ($f =~ /^>/) {
688                                         $k = substr($f, 1);
689                                         $f = $keys{$k};
690                                 }
691
692                                 # Callback function
693                                 if ($f =~ /^\</) {
694                                         $callback{data}{c} = $c;
695                                         $callback{data}{k} = $k;
696                                         $callback{data}{l} = $l;
697                                         $callback{data}{e} = $enc;
698                                         my @a = split(/\</, substr($f, 1));
699                                         my $rv =
700                                             &{$callback{$a[0]}}($values{$l}{$c}{$a[1]});
701                                         $values{$l}{$c}{$k} = $rv;
702                                         $f = $a[2];
703                                         $callback{data} = ();
704                                 }
705
706                                 my $v = $values{$l}{$c}{$k};
707                                 $v = "undef" if (!defined $v);
708
709                                 if ($f eq "i") {
710                                         $output .= "$v\n";
711                                         next;
712                                 }
713                                 if ($f eq "ai") {
714                                         $output .= "$v\n";
715                                         next;
716                                 }
717                                 if ($f eq "s") {
718                                         $v =~ s/^"//;
719                                         $v =~ s/"$//;
720                                         my $cm = "";
721                                         while ($v =~ /^(.*?)<(.*?)>(.*)/) {
722                                                 my $p1 = $1;
723                                                 $cm = $2;
724                                                 my $p3 = $3;
725
726                                                 my $rv = decodecldr($enc, $cm);
727 #                                               $rv = translate($enc, $cm)
728 #                                                       if (!defined $rv);
729                                                 if (!defined $rv) {
730                                                         print STDERR 
731 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
732                                                         $okay = 0;
733                                                         next;
734                                                 }
735
736                                                 $v = $p1 . $rv . $p3;
737                                         }
738                                         $output .= "$v\n";
739                                         next;
740                                 }
741                                 if ($f eq "as") {
742                                         foreach my $v (split(/;/, $v)) {
743                                                 $v =~ s/^"//;
744                                                 $v =~ s/"$//;
745                                                 my $cm = "";
746                                                 while ($v =~ /^(.*?)<(.*?)>(.*)/) {
747                                                         my $p1 = $1;
748                                                         $cm = $2;
749                                                         my $p3 = $3;
750
751                                                         my $rv =
752                                                             decodecldr($enc,
753                                                                 $cm);
754 #                                                       $rv = translate($enc,
755 #                                                           $cm)
756 #                                                           if (!defined $rv);
757                                                         if (!defined $rv) {
758                                                                 print STDERR 
759 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
760                                                                 $okay = 0;
761                                                                 next;
762                                                         }
763
764                                                         $v = $1 . $rv . $3;
765                                                 }
766                                                 $output .= "$v\n";
767                                         }
768                                         next;
769                                 }
770
771                                 die("$k is '$f'");
772
773                         }
774
775                         $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
776                         $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
777                         print FOUT "$output# EOF\n";
778                         close(FOUT);
779
780                         if ($okay) {
781                                 rename("$TYPE.draft/$file.$enc.new",
782                                     "$TYPE.draft/$file.$enc.src");
783                         } else {
784                                 rename("$TYPE.draft/$file.$enc.new",
785                                     "$TYPE.draft/$file.$enc.failed");
786                         }
787                 }
788         }
789         }
790         }
791 }
792
793 sub make_makefile {
794         return if ($#filter > -1);
795         print "Creating Makefile for $TYPE\n";
796         my $SRCOUT;
797         my $SRCOUT2;
798         my $SRCOUT3;
799         my $MAPLOC;
800         if ($TYPE eq "colldef") {
801                 $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
802                         "\t-f \${MAPLOC}/map.UTF-8 " .
803                         "\${.OBJDIR}/\${.IMPSRC:T:R}";
804                 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
805                                 "locale/etc/final-maps\n";
806                 $SRCOUT2 = "LC_COLLATE";
807         }
808         elsif ($TYPE eq "ctypedef") {
809                 $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
810                         "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:C/^.*\\.//} " .
811                         "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
812                         " || true";
813                 $SRCOUT2 = "LC_CTYPE";
814                 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
815                                 "locale/etc/final-maps\n";
816                 $SRCOUT3 = "## SYMPAIRS\n\n" .
817                         ".for PAIR in \${SYMPAIRS}\n" .
818                         "\${PAIR:C/^.*://:S/src\$/LC_CTYPE/}: " .
819                         "\${PAIR:C/:.*//}\n" .
820                         "\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
821                         "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " .
822                         "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " .
823                         " || true\n" .
824                         ".endfor\n\n";
825         }
826         else {
827                 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
828                 $SRCOUT2 = "out";
829                 $MAPLOC = "";
830         }
831         open(FOUT, ">$TYPE.draft/Makefile");
832         print FOUT <<EOF;
833 # Warning: Do not edit. This file is automatically generated from the
834 # tools in /usr/src/tools/tools/locale.
835
836 LOCALEDIR=      \${SHAREDIR}/locale
837 FILESNAME=      $FILESNAMES{$TYPE}
838 .SUFFIXES:      .src .${SRCOUT2}
839 ${MAPLOC}
840 .src.${SRCOUT2}:
841         $SRCOUT
842
843 ## PLACEHOLDER
844
845 EOF
846
847         foreach my $hash (keys(%hashtable)) {
848                 # For colldef, weight LOCALES to UTF-8
849                 #     Sort as upper-case and reverse to achieve it
850                 #     Make en_US, ru_RU, and ca_AD preferred
851                 my @files;
852                 if ($TYPE eq "colldef") {
853                         @files = sort {
854                                 if ($a eq 'en_x_US.UTF-8' ||
855                                     $a eq 'ru_x_RU.UTF-8' ||
856                                     $a eq 'ca_x_AD.UTF-8') { return -1; }
857                                 elsif ($b eq 'en_x_US.UTF-8' ||
858                                        $b eq 'ru_x_RU.UTF-8' ||
859                                        $b eq 'ca_x_AD.UTF-8') { return 1; }
860                                 else { return uc($b) cmp uc($a); }
861                                 } keys(%{$hashtable{$hash}});
862                 } elsif ($TYPE eq "ctypedef") {
863                         @files = sort {
864                                 if ($a eq 'en_x_US.UTF-8') { return -1; }
865                                 elsif ($b eq 'en_x_US.UTF-8') { return 1; }
866                                 if ($a =~ /^en_x_US/) { return -1; }
867                                 elsif ($b =~ /^en_x_US/) { return 1; }
868
869                                 if ($a =~ /^en_x_GB.ISO8859-15/ ||
870                                     $a =~ /^ru_x_RU/) { return -1; }
871                                 elsif ($b =~ /^en_x_GB.ISO8859-15/ ||
872                                        $b =~ /ru_x_RU/) { return 1; }
873                                 else { return uc($b) cmp uc($a); }
874
875                                 } keys(%{$hashtable{$hash}});
876                 } else {
877                         @files = sort {
878                                 if ($a =~ /_Comm_/ ||
879                                     $b eq 'en_x_US.UTF-8') { return 1; }
880                                 elsif ($b =~ /_Comm_/ ||
881                                        $a eq 'en_x_US.UTF-8') { return -1; }
882                                 else { return uc($b) cmp uc($a); }
883                                 } keys(%{$hashtable{$hash}});
884                 }
885                 if ($#files > 0) {
886                         my $link = shift(@files);
887                         $link =~ s/_x_/_/;      # strip family if none there
888                         foreach my $file (@files) {
889                                 my @a = split(/_/, $file);
890                                 my @b = split(/\./, $a[-1]);
891                                 $file =~ s/_x_/_/;
892                                 print FOUT "SAME+=\t\t$link:$file\n";
893                                 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
894                         }
895                 }
896         }
897
898         foreach my $l (sort keys(%languages)) {
899         foreach my $f (sort keys(%{$languages{$l}})) {
900         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
901                 next if ($#filter == 2 && ($filter[0] ne $l
902                     || $filter[1] ne $f || $filter[2] ne $c));
903                 next if (defined $languages{$l}{$f}{definitions}
904                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
905                 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
906                  && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
907                         print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
908                             "${c} - not read\n";
909                         next;
910                 }
911                 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
912                         my $file = $l . "_";
913                         $file .= $f . "_" if ($f ne "x");
914                         $file .= $c;
915                         next if (!defined $languages{$l}{$f}{data}{$c}{$e});
916                         print FOUT "LOCALES+=\t$file.$e\n";
917                 }
918
919                 if (defined $languages{$l}{$f}{nc_link}) {
920                         foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
921                                 my $file = $l . "_";
922                                 $file .= $f . "_" if ($f ne "x");
923                                 $file .= $c;
924                                 print FOUT "SAME+=\t\t$file.$e:$languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
925                         }
926                 }
927
928                 if (defined $languages{$l}{$f}{e_link}) {
929                         foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
930                                 my @a = split(/:/, $el);
931                                 my $file = $l . "_";
932                                 $file .= $f . "_" if ($f ne "x");
933                                 $file .= $c;
934                                 print FOUT "SAME+=\t\t$file.$a[0]:$file.$a[1]\t# legacy (same charset)\n";
935                         }
936                 }
937
938         }
939         }
940         }
941
942         print FOUT <<EOF;
943
944 FILES=          \${LOCALES:S/\$/.${SRCOUT2}/}
945 CLEANFILES=     \${FILES}
946
947 .for f in \${SAME}
948 SYMLINKS+=      ../\${f:C/:.*\$//}/\${FILESNAME} \\
949                 \${LOCALEDIR}/\${f:C/^.*://}/\${FILESNAME}
950 .endfor
951
952 .for f in \${LOCALES}
953 FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
954 .endfor
955
956 ${SRCOUT3}.include <bsd.prog.mk>
957 EOF
958
959         close(FOUT);
960 }