#! /usr/bin/perl -w use strict; # Convert cppucnid.tab to cppucnid.h. We use two arrays of length # 65536 to represent the table, since this is nice and simple. The # first array holds the tags indicating which ranges are valid in # which contexts. The second array holds the language name associated # with each element. our(@tags, @names); @tags = ("") x 65536; @names = ("") x 65536; # Array mapping tag numbers to standard #defines our @stds; # Current standard and language our($curstd, $curlang); # First block of the file is a template to be saved for later. our @template; while (<>) { chomp; last if $_ eq '%%'; push @template, $_; }; # Second block of the file is the UCN tables. # The format looks like this: # # [std] # # ; language # xxxx-xxxx xxxx xxxx-xxxx .... # # with comment lines starting with #. while (<>) { chomp; /^#/ and next; /^\s*$/ and next; /^\[(.+)\]$/ and do { $curstd = $1; next; }; /^; (.+)$/ and do { $curlang = $1; next; }; process_range(split); } # Print out the template, inserting as requested. $\ = "\n"; for (@template) { print("/* Automatically generated from cppucnid.tab, do not edit */"), next if $_ eq "[dne]"; print_table(), next if $_ eq "[table]"; print; } sub print_table { my($lo, $hi); my $prevname = ""; for ($lo = 0; $lo <= $#tags; $lo = $hi) { $hi = $lo; $hi++ while $hi <= $#tags && $tags[$hi] eq $tags[$lo] && $names[$hi] eq $names[$lo]; # Range from $lo to $hi-1. # Don't make entries for ranges that are not valid idchars. next if ($tags[$lo] eq ""); my $tag = $tags[$lo]; $tag = " ".$tag if $tag =~ /^C99/; if ($names[$lo] eq $prevname) { printf(" { 0x%04x, 0x%04x, %-11s },\n", $lo, $hi-1, $tag); } else { printf(" { 0x%04x, 0x%04x, %-11s }, /* %s */\n", $lo, $hi-1, $tag, $names[$lo]); } $prevname = $names[$lo]; } } # The line is a list of four-digit hexadecimal numbers or # pairs of such numbers. Each is a valid identifier character # from the given language, under the given standard. sub process_range { for my $range (@_) { if ($range =~ /^[0-9a-f]{4}$/) { my $i = hex($range); if ($tags[$i] eq "") { $tags[$i] = $curstd; } else { $tags[$i] = $curstd . "|" . $tags[$i]; } if ($names[$i] ne "" && $names[$i] ne $curlang) { warn sprintf ("language overlap: %s/%s at %x (tag %d)", $names[$i], $curlang, $i, $tags[$i]); next; } $names[$i] = $curlang; } elsif ($range =~ /^ ([0-9a-f]{4}) - ([0-9a-f]{4}) $/x) { my ($start, $end) = (hex($1), hex($2)); my $i; for ($i = $start; $i <= $end; $i++) { if ($tags[$i] eq "") { $tags[$i] = $curstd; } else { $tags[$i] = $curstd . "|" . $tags[$i]; } if ($names[$i] ne "" && $names[$i] ne $curlang) { warn sprintf ("language overlap: %s/%s at %x (tag %d)", $names[$i], $curlang, $i, $tags[$i]); next; } $names[$i] = $curlang; } } else { warn "malformed range expression $range"; } } }