9aa26664fa8248ff8282c29442640235bdf60a2a
[dragonfly.git] / tools / tools / locale / tools / cldr2def.pl
1 #!/usr/local/bin/perl -wC
2
3 use strict;
4 use File::Copy;
5 use XML::Parser;
6 use Tie::IxHash;
7 use Data::Dumper;
8 use Getopt::Long;
9 use Digest::SHA qw(sha1_hex);
10 require "charmaps.pm";
11
12
13 if ($#ARGV < 2) {
14         print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n";
15         exit(1);
16 }
17
18 my $DEFENCODING = "UTF-8";
19 my @filter = ();
20
21 my $CLDRDIR = undef;
22 my $UNIDATADIR = undef;
23 my $ETCDIR = undef;
24 my $TYPE = undef;
25 my $doonly = undef;
26
27 my $result = GetOptions (
28                 "cldr=s"        => \$CLDRDIR,
29                 "unidata=s"     => \$UNIDATADIR,
30                 "etc=s"         => \$ETCDIR,
31                 "type=s"        => \$TYPE,
32                 "lc=s"          => \$doonly
33             );
34
35 my %convertors = ();
36
37 my %ucd = ();
38 my %values = ();
39 my %hashtable = ();
40 my %languages = ();
41 my %translations = ();
42 my %encodings = ();
43 my %alternativemonths = ();
44 get_languages();
45
46 my %utf8map = ();
47 my %utf8aliases = ();
48 get_unidata($UNIDATADIR);
49 get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm");
50 get_encodings("$ETCDIR/charmaps");
51
52 my %keys = ();
53 tie(%keys, "Tie::IxHash");
54 tie(%hashtable, "Tie::IxHash");
55
56 my %FILESNAMES = (
57         "monetdef"      => "LC_MONETARY",
58         "timedef"       => "LC_TIME",
59         "msgdef"        => "LC_MESSAGES",
60         "numericdef"    => "LC_NUMERIC",
61         "colldef"       => "LC_COLLATE",
62         "ctypedef"      => "LC_CTYPE"
63 );
64
65 my %callback = (
66         mdorder => \&callback_mdorder,
67         altmon => \&callback_altmon,
68         cformat => \&callback_cformat,
69         data => undef,
70 );
71
72 my %DESC = (
73
74         # numericdef
75         "decimal_point" => "decimal_point",
76         "thousands_sep" => "thousands_sep",
77         "grouping"      => "grouping",
78
79         # monetdef
80         "int_curr_symbol"       => "int_curr_symbol (last character always " .
81                                    "SPACE)",
82         "currency_symbol"       => "currency_symbol",
83         "mon_decimal_point"     => "mon_decimal_point",
84         "mon_thousands_sep"     => "mon_thousands_sep",
85         "mon_grouping"          => "mon_grouping",
86         "positive_sign"         => "positive_sign",
87         "negative_sign"         => "negative_sign",
88         "int_frac_digits"       => "int_frac_digits",
89         "frac_digits"           => "frac_digits",
90         "p_cs_precedes"         => "p_cs_precedes",
91         "p_sep_by_space"        => "p_sep_by_space",
92         "n_cs_precedes"         => "n_cs_precedes",
93         "n_sep_by_space"        => "n_sep_by_space",
94         "p_sign_posn"           => "p_sign_posn",
95         "n_sign_posn"           => "n_sign_posn",
96
97         # msgdef
98         "yesexpr"       => "yesexpr",
99         "noexpr"        => "noexpr",
100         "yesstr"        => "yesstr",
101         "nostr"         => "nostr",
102
103         # timedef
104         "abmon"         => "Short month names",
105         "mon"           => "Long month names (as in a date)",
106         "abday"         => "Short weekday names",
107         "day"           => "Long weekday names",
108         "t_fmt"         => "X_fmt",
109         "d_fmt"         => "x_fmt",
110         "c_fmt"         => "c_fmt",
111         "am_pm"         => "AM/PM",
112         "d_t_fmt"       => "date_fmt",
113         "altmon"        => "Long month names (without case ending)",
114         "md_order"      => "md_order",
115         "t_fmt_ampm"    => "ampm_fmt",
116 );
117
118 if ($TYPE eq "colldef") {
119         transform_collation();
120         make_makefile();
121 }
122
123 if ($TYPE eq "ctypedef") {
124         transform_ctypes();
125         make_makefile();
126 }
127
128 if ($TYPE eq "numericdef") {
129         %keys = (
130             "decimal_point"     => "s",
131             "thousands_sep"     => "s",
132             "grouping"          => "ai",
133         );
134         get_fields();
135         print_fields();
136         make_makefile();
137 }
138
139 if ($TYPE eq "monetdef") {
140         %keys = (
141             "int_curr_symbol"   => "s",
142             "currency_symbol"   => "s",
143             "mon_decimal_point" => "s",
144             "mon_thousands_sep" => "s",
145             "mon_grouping"      => "ai",
146             "positive_sign"     => "s",
147             "negative_sign"     => "s",
148             "int_frac_digits"   => "i",
149             "frac_digits"       => "i",
150             "p_cs_precedes"     => "i",
151             "p_sep_by_space"    => "i",
152             "n_cs_precedes"     => "i",
153             "n_sep_by_space"    => "i",
154             "p_sign_posn"       => "i",
155             "n_sign_posn"       => "i"
156         );
157         get_fields();
158         print_fields();
159         make_makefile();
160 }
161
162 if ($TYPE eq "msgdef") {
163         %keys = (
164             "yesexpr"           => "s",
165             "noexpr"            => "s",
166             "yesstr"            => "s",
167             "nostr"             => "s"
168         );
169         get_fields();
170         print_fields();
171         make_makefile();
172 }
173
174 if ($TYPE eq "timedef") {
175         %keys = (
176             "abmon"             => "as",
177             "mon"               => "as",
178             "abday"             => "as",
179             "day"               => "as",
180             "t_fmt"             => "s",
181             "d_fmt"             => "s",
182             "c_fmt"             => "<cformat<d_t_fmt<s",
183             "am_pm"             => "as",
184             "d_fmt"             => "s",
185             "d_t_fmt"           => "s",
186             "altmon"            => "<altmon<mon<as",
187             "md_order"          => "<mdorder<d_fmt<s",
188             "t_fmt_ampm"        => "s",
189         );
190         get_fields();
191         print_fields();
192         make_makefile();
193 }
194
195 sub callback_cformat {
196         my $s = shift;
197         $s =~ s/ %Z//;
198         $s =~ s/ %z//;
199         return $s;
200 };
201
202 sub callback_mdorder {
203         my $s = shift;
204         return undef if (!defined $s);
205         $s =~ s/[^dm]//g;
206         return $s;
207 };
208
209 sub callback_altmon {
210         # if the language/country is known in %alternative months then
211         # return that, otherwise repeat mon
212         my $s = shift;
213
214         if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
215                 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
216                 my @cleaned;
217                 foreach (@altnames)
218                 {
219                         $_ =~ s/^\s+//;
220                         $_ =~ s/\s+$//;
221                         push @cleaned, $_;
222                 }
223                 return join(";",@cleaned);
224         }
225
226         return $s;
227 }
228
229 ############################
230
231 sub get_unidata {
232         my $directory = shift;
233
234         open(FIN, "$directory/UnicodeData.txt")
235             or die("Cannot open $directory/UnicodeData.txt");;
236         my @lines = <FIN>;
237         chomp(@lines);
238         close(FIN);
239
240         foreach my $l (@lines) {
241                 my @a = split(/;/, $l);
242
243                 $ucd{code2name}{"$a[0]"} = $a[1];       # Unicode name
244                 $ucd{name2code}{"$a[1]"} = $a[0];       # Unicode code
245         }
246 }
247
248 sub get_utf8map {
249         my $file = shift;
250
251         open(FIN, $file);
252         my @lines = <FIN>;
253         close(FIN);
254         chomp(@lines);
255
256         my $prev_k = undef;
257         my $prev_v = "";
258         my $incharmap = 0;
259         foreach my $l (@lines) {
260                 $l =~ s/\r//;
261                 next if ($l =~ /^\#/);
262                 next if ($l eq "");
263
264                 if ($l eq "CHARMAP") {
265                         $incharmap = 1;
266                         next;
267                 }
268
269                 next if (!$incharmap);
270                 last if ($l eq "END CHARMAP");
271
272                 $l =~ /^<([^\s]+)>\s+(.*)/;
273                 my $k = $1;
274                 my $v = $2;
275                 $k =~ s/_/ /g;          # unicode char string
276                 $v =~ s/\\x//g;         # UTF-8 char code
277                 $utf8map{$k} = $v;
278
279                 $utf8aliases{$k} = $prev_k if ($prev_v eq $v);
280
281                 $prev_v = $v;
282                 $prev_k = $k;
283         }
284 }
285
286 sub get_encodings {
287         my $dir = shift;
288         foreach my $e (sort(keys(%encodings))) {
289                 if (!open(FIN, "$dir/$e.TXT")) {
290                         print "Cannot open charmap for $e\n";
291                         next;
292
293                 }
294                 $encodings{$e} = 1;
295                 my @lines = <FIN>;
296                 close(FIN);
297                 chomp(@lines);
298                 foreach my $l (@lines) {
299                         $l =~ s/\r//;
300                         next if ($l =~ /^\#/);
301                         next if ($l eq "");
302
303                         my @a = split(" ", $l);
304                         next if ($#a < 1);
305                         $a[0] =~ s/^0[xX]//;    # local char code
306                         $a[1] =~ s/^0[xX]//;    # unicode char code
307                         $convertors{$e}{uc($a[1])} = uc($a[0]);
308                 }
309         }
310 }
311
312 sub get_languages {
313         my %data = get_xmldata($ETCDIR);
314         %languages = %{$data{L}}; 
315         %translations = %{$data{T}}; 
316         %alternativemonths = %{$data{AM}}; 
317         %encodings = %{$data{E}}; 
318
319         return if (!defined $doonly);
320
321         my @a = split(/_/, $doonly);
322         if ($#a == 1) {
323                 $filter[0] = $a[0];
324                 $filter[1] = "x";
325                 $filter[2] = $a[1];
326         } elsif ($#a == 2) {
327                 $filter[0] = $a[0];
328                 $filter[1] = $a[1];
329                 $filter[2] = $a[2];
330         }
331
332         print Dumper(@filter);
333         return;
334 }
335
336 sub transform_ctypes {
337         foreach my $l (sort keys(%languages)) {
338         foreach my $f (sort keys(%{$languages{$l}})) {
339         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
340                 next if ($#filter == 2 && ($filter[0] ne $l
341                     || $filter[1] ne $f || $filter[2] ne $c));
342                 next if (defined $languages{$l}{$f}{definitions}
343                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
344                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
345                 my $file;
346                 $file = $l . "_";
347                 $file .= $f . "_" if ($f ne "x");
348                 $file .= $c;
349                 my $actfile = $file;
350
351                 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
352                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
353                     if (! -f $filename);
354                 if (! -f $filename
355                  && defined $languages{$l}{$f}{fallback}) {
356                         $file = $languages{$l}{$f}{fallback};
357                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
358                 }
359                 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
360                     if (! -f $filename);
361                 if (! -f $filename) {
362                         print STDERR
363                             "Cannot open $file.$DEFENCODING.src or fallback\n";
364                         next;
365                 }
366                 open(FIN, "$filename");
367                 print "Reading from $filename for ${l}_${f}_${c}\n";
368                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
369                 my @lines;
370                 my $shex;
371                 my $uhex;
372                 while (<FIN>) {
373                         if ((/^comment_char\s/) || (/^escape_char\s/)){
374                                 push @lines, $_;
375                         }
376                         if (/^LC_CTYPE/../^END LC_CTYPE/) {
377                                 push @lines, $_;
378                         }
379                 }
380                 close(FIN);
381                 $shex = sha1_hex(join("\n", @lines));
382                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
383                 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
384                 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
385                 print FOUT <<EOF;
386 # Warning: Do not edit. This file is automatically extracted from the
387 # tools in /usr/src/tools/tools/locale. The data is obtained from the
388 # CLDR project, obtained from http://cldr.unicode.org/
389 # -----------------------------------------------------------------------------
390 EOF
391                 print FOUT @lines;
392                 close(FOUT);
393
394                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
395                         next if ($enc eq $DEFENCODING);
396                         copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
397                               "$TYPE.draft/$actfile.$enc.src");
398                         $uhex = sha1_hex(join("\n", @lines) . $enc);
399                         $languages{$l}{$f}{data}{$c}{$enc} = $uhex;
400                         $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
401                 }
402         }
403         }
404         }
405 }
406
407
408 sub transform_collation {
409         foreach my $l (sort keys(%languages)) {
410         foreach my $f (sort keys(%{$languages{$l}})) {
411         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
412                 next if ($#filter == 2 && ($filter[0] ne $l
413                     || $filter[1] ne $f || $filter[2] ne $c));
414                 next if (defined $languages{$l}{$f}{definitions}
415                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
416                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
417                 my $file;
418                 $file = $l . "_";
419                 $file .= $f . "_" if ($f ne "x");
420                 $file .= $c;
421                 my $actfile = $file;
422
423                 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
424                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
425                     if (! -f $filename);
426                 if (! -f $filename
427                  && defined $languages{$l}{$f}{fallback}) {
428                         $file = $languages{$l}{$f}{fallback};
429                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
430                 }
431                 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
432                     if (! -f $filename);
433                 if (! -f $filename) {
434                         print STDERR
435                             "Cannot open $file.$DEFENCODING.src or fallback\n";
436                         next;
437                 }
438                 open(FIN, "$filename");
439                 print "Reading from $filename for ${l}_${f}_${c}\n";
440                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
441                 my @lines;
442                 my $shex;
443                 while (<FIN>) {
444                         if ((/^comment_char\s/) || (/^escape_char\s/)){
445                                 push @lines, $_;
446                         }
447                         if (/^LC_COLLATE/../^END LC_COLLATE/) {
448                                 $_ =~ s/[ ]+/ /g;
449                                 push @lines, $_;
450                         }
451                 }
452                 close(FIN);
453                 $shex = sha1_hex(join("\n", @lines));
454                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
455                 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
456                 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
457                 print FOUT <<EOF;
458 # Warning: Do not edit. This file is automatically extracted from the
459 # tools in /usr/src/tools/tools/locale. The data is obtained from the
460 # CLDR project, obtained from http://cldr.unicode.org/
461 # -----------------------------------------------------------------------------
462 EOF
463                 print FOUT @lines;
464                 close(FOUT);
465
466                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
467                         next if ($enc eq $DEFENCODING);
468                         copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
469                               "$TYPE.draft/$actfile.$enc.src");
470                         $languages{$l}{$f}{data}{$c}{$enc} = $shex;
471                         $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
472                 }
473         }
474         }
475         }
476 }
477
478 sub get_fields {
479         foreach my $l (sort keys(%languages)) {
480         foreach my $f (sort keys(%{$languages{$l}})) {
481         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
482                 next if ($#filter == 2 && ($filter[0] ne $l
483                     || $filter[1] ne $f || $filter[2] ne $c));
484                 next if (defined $languages{$l}{$f}{definitions}
485                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
486
487                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
488                 my $file;
489                 $file = $l . "_";
490                 $file .= $f . "_" if ($f ne "x");
491                 $file .= $c;
492
493                 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
494                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
495                     if (! -f $filename);
496                 if (! -f $filename
497                  && defined $languages{$l}{$f}{fallback}) {
498                         $file = $languages{$l}{$f}{fallback};
499                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
500                 }
501                 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
502                     if (! -f $filename);
503                 if (! -f $filename) {
504                         print STDERR
505                             "Cannot open $file.$DEFENCODING.src or fallback\n";
506                         next;
507                 }
508                 open(FIN, "$filename");
509                 print "Reading from $filename for ${l}_${f}_${c}\n";
510                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
511                 my @lines = <FIN>;
512                 chomp(@lines);
513                 close(FIN);
514                 my $continue = 0;
515                 foreach my $k (keys(%keys)) {
516                         foreach my $line (@lines) {
517                                 $line =~ s/\r//;
518                                 next if (!$continue && $line !~ /^$k\s/);
519                                 if ($continue) {
520                                         $line =~ s/^\s+//;
521                                 } else {
522                                         $line =~ s/^$k\s+//;
523                                 }
524
525                                 $values{$l}{$c}{$k} = ""
526                                         if (!defined $values{$l}{$c}{$k});
527
528                                 $continue = ($line =~ /\/$/);
529                                 $line =~ s/\/$// if ($continue);
530
531                                 while ($line =~ /_/) {
532                                         $line =~
533                                             s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
534                                 }
535                                 die "_ in data - $line" if ($line =~ /_/);
536                                 $values{$l}{$c}{$k} .= $line;
537
538                                 last if (!$continue);
539                         }
540                 }
541         }
542         }
543         }
544 }
545
546 sub decodecldr {
547         my $e = shift;
548         my $s = shift;
549
550         my $v = undef;
551
552         if ($e eq "UTF-8") {
553                 #
554                 # Conversion to UTF-8 can be done from the Unicode name to
555                 # the UTF-8 character code.
556                 #
557                 $v = $utf8map{$s};
558                 die "Cannot convert $s in $e (charmap)" if (!defined $v);
559         } else {
560                 #
561                 # Conversion to these encodings can be done from the Unicode
562                 # name to Unicode code to the encodings code.
563                 #
564                 my $ucc = undef;
565                 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
566                 $ucc = $ucd{name2code}{$utf8aliases{$s}}
567                         if (!defined $ucc
568                          && $utf8aliases{$s}
569                          && defined $ucd{name2code}{$utf8aliases{$s}});
570
571                 if (!defined $ucc) {
572                         if (defined $translations{$e}{$s}{hex}) {
573                                 $v = $translations{$e}{$s}{hex};
574                                 $ucc = 0;
575                         } elsif (defined $translations{$e}{$s}{ucc}) {
576                                 $ucc = $translations{$e}{$s}{ucc};
577                         }
578                 }
579
580                 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
581                 $v = $convertors{$e}{$ucc} if (!defined $v);
582
583                 $v = $translations{$e}{$s}{hex}
584                         if (!defined $v && defined $translations{$e}{$s}{hex});
585
586                 if (!defined $v && defined $translations{$e}{$s}{unicode}) {
587                         my $ucn = $translations{$e}{$s}{unicode};
588                         $ucc = $ucd{name2code}{$ucn}
589                                 if (defined $ucd{name2code}{$ucn});
590                         $ucc = $ucd{name2code}{$utf8aliases{$ucn}}
591                                 if (!defined $ucc
592                                  && defined $ucd{name2code}{$utf8aliases{$ucn}});
593                         $v = $convertors{$e}{$ucc};
594                 }
595
596                 die "Cannot convert $s in $e (charmap)" if (!defined $v);
597         }
598
599         return pack("C", hex($v)) if (length($v) == 2);
600         return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
601                 if (length($v) == 4);
602         return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
603             hex(substr($v, 4, 2))) if (length($v) == 6);
604         print STDERR "Cannot convert $e $s\n";
605         return "length = " . length($v);
606
607 }
608
609 sub translate {
610         my $enc = shift;
611         my $v = shift;
612
613         return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
614         return undef;
615 }
616
617 sub print_fields {
618         foreach my $l (sort keys(%languages)) {
619         foreach my $f (sort keys(%{$languages{$l}})) {
620         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
621                 next if ($#filter == 2 && ($filter[0] ne $l
622                     || $filter[1] ne $f || $filter[2] ne $c));
623                 next if (defined $languages{$l}{$f}{definitions}
624                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
625                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
626                         if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
627                                 print "Skipping ${l}_" .
628                                     ($f eq "x" ? "" : "${f}_") .
629                                     "${c} - not read\n";
630                                 next;
631                         }
632                         my $file = $l;
633                         $file .= "_" . $f if ($f ne "x");
634                         $file .= "_" . $c;
635                         print "Writing to $file in $enc\n";
636
637                         if ($enc ne $DEFENCODING &&
638                             !defined $convertors{$enc}) {
639                                 print "Failed! Cannot convert to $enc.\n";
640                                 next;
641                         };
642
643                         open(FOUT, ">$TYPE.draft/$file.$enc.new");
644                         my $okay = 1;
645                         my $output = "";
646                         print FOUT <<EOF;
647 # Warning: Do not edit. This file is automatically generated from the
648 # tools in /usr/src/tools/tools/locale. The data is obtained from the
649 # CLDR project, obtained from http://cldr.unicode.org/
650 # -----------------------------------------------------------------------------
651 EOF
652                         foreach my $k (keys(%keys)) {
653                                 my $f = $keys{$k};
654
655                                 die("Unknown $k in \%DESC")
656                                         if (!defined $DESC{$k});
657
658                                 $output .= "#\n# $DESC{$k}\n";
659
660                                 # Replace one row with another
661                                 if ($f =~ /^>/) {
662                                         $k = substr($f, 1);
663                                         $f = $keys{$k};
664                                 }
665
666                                 # Callback function
667                                 if ($f =~ /^\</) {
668                                         $callback{data}{c} = $c;
669                                         $callback{data}{k} = $k;
670                                         $callback{data}{l} = $l;
671                                         $callback{data}{e} = $enc;
672                                         my @a = split(/\</, substr($f, 1));
673                                         my $rv =
674                                             &{$callback{$a[0]}}($values{$l}{$c}{$a[1]});
675                                         $values{$l}{$c}{$k} = $rv;
676                                         $f = $a[2];
677                                         $callback{data} = ();
678                                 }
679
680                                 my $v = $values{$l}{$c}{$k};
681                                 $v = "undef" if (!defined $v);
682
683                                 if ($f eq "i") {
684                                         $output .= "$v\n";
685                                         next;
686                                 }
687                                 if ($f eq "ai") {
688                                         $output .= "$v\n";
689                                         next;
690                                 }
691                                 if ($f eq "s") {
692                                         $v =~ s/^"//;
693                                         $v =~ s/"$//;
694                                         my $cm = "";
695                                         while ($v =~ /^(.*?)<(.*?)>(.*)/) {
696                                                 my $p1 = $1;
697                                                 $cm = $2;
698                                                 my $p3 = $3;
699
700                                                 my $rv = decodecldr($enc, $cm);
701 #                                               $rv = translate($enc, $cm)
702 #                                                       if (!defined $rv);
703                                                 if (!defined $rv) {
704                                                         print STDERR 
705 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
706                                                         $okay = 0;
707                                                         next;
708                                                 }
709
710                                                 $v = $p1 . $rv . $p3;
711                                         }
712                                         $output .= "$v\n";
713                                         next;
714                                 }
715                                 if ($f eq "as") {
716                                         foreach my $v (split(/;/, $v)) {
717                                                 $v =~ s/^"//;
718                                                 $v =~ s/"$//;
719                                                 my $cm = "";
720                                                 while ($v =~ /^(.*?)<(.*?)>(.*)/) {
721                                                         my $p1 = $1;
722                                                         $cm = $2;
723                                                         my $p3 = $3;
724
725                                                         my $rv =
726                                                             decodecldr($enc,
727                                                                 $cm);
728 #                                                       $rv = translate($enc,
729 #                                                           $cm)
730 #                                                           if (!defined $rv);
731                                                         if (!defined $rv) {
732                                                                 print STDERR 
733 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
734                                                                 $okay = 0;
735                                                                 next;
736                                                         }
737
738                                                         $v = $1 . $rv . $3;
739                                                 }
740                                                 $output .= "$v\n";
741                                         }
742                                         next;
743                                 }
744
745                                 die("$k is '$f'");
746
747                         }
748
749                         $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
750                         $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
751                         print FOUT "$output# EOF\n";
752                         close(FOUT);
753
754                         if ($okay) {
755                                 rename("$TYPE.draft/$file.$enc.new",
756                                     "$TYPE.draft/$file.$enc.src");
757                         } else {
758                                 rename("$TYPE.draft/$file.$enc.new",
759                                     "$TYPE.draft/$file.$enc.failed");
760                         }
761                 }
762         }
763         }
764         }
765 }
766
767 sub make_makefile {
768         return if ($#filter > -1);
769         print "Creating Makefile for $TYPE\n";
770         my $SRCOUT;
771         my $SRCOUT2;
772         my $MAPLOC;
773         if ($TYPE eq "colldef") {
774                 $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
775                         "\t-f \${MAPLOC}/map.UTF-8 " .
776                         "\${.OBJDIR}/\${.IMPSRC:T:R}";
777                 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
778                                 "locale/etc/final-maps\n";
779                 $SRCOUT2 = "LC_COLLATE";
780         }
781         elsif ($TYPE eq "ctypedef") {
782                 $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
783                         "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:C/^.*\\.//} " .
784                         "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
785                         " || true";
786                 $SRCOUT2 = "LC_CTYPE";
787                 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
788                                 "locale/etc/final-maps\n";
789         }
790         else {
791                 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
792                 $SRCOUT2 = "out";
793         }
794         open(FOUT, ">$TYPE.draft/Makefile");
795         print FOUT <<EOF;
796 # Warning: Do not edit. This file is automatically generated from the
797 # tools in /usr/src/tools/tools/locale.
798
799 LOCALEDIR=      /usr/share/locale
800 FILESNAME=      $FILESNAMES{$TYPE}
801 .SUFFIXES:      .src .${SRCOUT2}
802 ${MAPLOC}
803 .src.${SRCOUT2}:
804         $SRCOUT
805
806 ## PLACEHOLDER
807
808 EOF
809
810         foreach my $hash (keys(%hashtable)) {
811                 # For colldef, weight LOCALES to UTF-8
812                 #     Sort as upper-case and reverse to achieve it
813                 #     Make en_US, ru_RU, and ca_AD preferred
814                 my @files;
815                 if ($TYPE eq "colldef") {
816                         @files = sort {
817                                 if ($a eq 'en_x_US.UTF-8' ||
818                                     $a eq 'ru_x_RU.UTF-8' ||
819                                     $a eq 'ca_x_AD.UTF-8') { return -1; }
820                                 elsif ($b eq 'en_x_US.UTF-8' ||
821                                        $b eq 'ru_x_RU.UTF-8' ||
822                                        $b eq 'ca_x_AD.UTF-8') { return 1; }
823                                 else { return uc($b) cmp uc($a); }
824                                 } keys(%{$hashtable{$hash}});
825                 } elsif ($TYPE eq "ctypedef") {
826                         @files = sort {
827                                 if ($a =~ /^en_x_US/ ||
828                                     $a =~ /^ru_x_RU/) { return -1; }
829                                 elsif ($b =~ /^en_x_US/ ||
830                                        $b =~ /ru_x_RU/) { return 1; }
831                                 else { return uc($b) cmp uc($a); }
832
833                                 if ($a eq 'en_x_US.UTF-8') { return -1; }
834                                 elsif ($b eq 'en_x_US.UTF-8') { return 1; }
835                                 else { return uc($b) cmp uc($a); }
836                                 } keys(%{$hashtable{$hash}});
837                 } else {
838                         @files = sort(keys(%{$hashtable{$hash}}));
839                 }
840                 if ($#files > 0) {
841                         my $link = shift(@files);
842                         $link =~ s/_x_/_/;      # strip family if none there
843                         foreach my $file (@files) {
844                                 my @a = split(/_/, $file);
845                                 my @b = split(/\./, $a[-1]);
846                                 $file =~ s/_x_/_/;
847                                 print FOUT "SAME+=\t\t$link:$file\n";
848                                 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
849                         }
850                 }
851         }
852
853         foreach my $l (sort keys(%languages)) {
854         foreach my $f (sort keys(%{$languages{$l}})) {
855         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
856                 next if ($#filter == 2 && ($filter[0] ne $l
857                     || $filter[1] ne $f || $filter[2] ne $c));
858                 next if (defined $languages{$l}{$f}{definitions}
859                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
860                 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
861                  && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
862                         print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
863                             "${c} - not read\n";
864                         next;
865                 }
866                 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
867                         my $file = $l . "_";
868                         $file .= $f . "_" if ($f ne "x");
869                         $file .= $c;
870                         next if (!defined $languages{$l}{$f}{data}{$c}{$e});
871                         print FOUT "LOCALES+=\t$file.$e\n";
872                 }
873
874                 if (defined $languages{$l}{$f}{nc_link}) {
875                         foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
876                                 my $file = $l . "_";
877                                 $file .= $f . "_" if ($f ne "x");
878                                 $file .= $c;
879                                 print FOUT "SAME+=\t\t$file.$e:$languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
880                         }
881                 }
882
883                 if (defined $languages{$l}{$f}{e_link}) {
884                         foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
885                                 my @a = split(/:/, $el);
886                                 my $file = $l . "_";
887                                 $file .= $f . "_" if ($f ne "x");
888                                 $file .= $c;
889                                 print FOUT "SAME+=\t\t$file.$a[0]:$file.$a[1]\t# legacy (same charset)\n";
890                         }
891                 }
892
893         }
894         }
895         }
896
897         print FOUT <<EOF;
898
899 FILES=          \${LOCALES:S/\$/.${SRCOUT2}/}
900 CLEANFILES=     \${FILES}
901
902 .for f in \${SAME}
903 SYMLINKS+=      ../\${f:C/:.*\$//}/\${FILESNAME} \${LOCALEDIR}/\${f:C/^.*://}
904 .endfor
905
906 .for f in \${LOCALES}
907 FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
908 .endfor
909
910 .include <bsd.prog.mk>
911 EOF
912
913         close(FOUT);
914 }