1 #!/usr/bin/env perl -wC
12 use Digest::SHA qw(sha1_hex);
16 print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --input=<inputfile> --output=<outputfile>\n";
23 my $UNIDATADIR = undef;
29 my $result = GetOptions (
30 "cldr=s" => \$CLDRDIR,
31 "unidata=s" => \$UNIDATADIR,
35 "output=s" => \$OUTPUT,
41 get_unidata($UNIDATADIR);
42 get_utf8map("$CLDRDIR/posix/UTF-8.cm");
43 convert($INPUT, $OUTPUT);
45 ############################
48 my $directory = shift;
50 open(FIN, "$directory/UnicodeData.txt")
51 or die("Cannot open $directory/UnicodeData.txt");;
56 foreach my $l (@lines) {
57 my @a = split(/;/, $l);
59 $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name
60 $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code
75 foreach my $l (@lines) {
77 next if ($l =~ /^\#/);
80 if ($l eq "CHARMAP") {
85 next if (!$incharmap);
86 last if ($l eq "END CHARMAP");
88 $l =~ /^<([^\s]+)>\s+(.*)/;
91 $k =~ s/_/ /g; # unicode char string
92 $v =~ s/\\x//g; # UTF-8 char code
95 $utf8aliases{$k} = $prev_k if ($prev_v eq $v);
105 my $v = $utf8map{$s};
106 $v = $utf8aliases{$s} if (!defined $v);
107 die "Cannot convert $s" if (!defined $v);
109 return pack("C", hex($v)) if (length($v) == 2);
110 return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
111 if (length($v) == 4);
112 return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
113 hex(substr($v, 4, 2))) if (length($v) == 6);
114 print STDERR "Cannot convert $s\n";
115 return "length = " . length($v);
125 # print Dumper(%utf8map);
128 while (defined ($l = <FIN>)) {
136 while ($l =~ /^(.*?)<(.*?)>(.*)$/) {
137 $l = $1 . decode_cldr($2) . $3;