contrib/groff/src/utils/afmtodit/afmtodit.pl

   1 #! /usr/bin/perl -w
   2 # -*- Perl -*-
   3 # Copyright (C) 1989-2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
   4 #               2009
   5 # Free Software Foundation, Inc.
   6 #      Written by James Clark (jjc@jclark.com)
   7 #
   8 # This file is part of groff.
   9 #
  10 # groff is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU General Public License as published by the Free
  12 # Software Foundation, either version 3 of the License, or
  13 # (at your option) any later version.
  14 #
  15 # groff is distributed in the hope that it will be useful, but WITHOUT ANY
  16 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 # for more details.
  19 #
  20 # You should have received a copy of the GNU General Public License
  21 # along with this program. If not, see <http://www.gnu.org/licenses/>.
  22
  23 use strict;
  24
  25 @afmtodit.tables@
  26
  27 my $prog = $0;
  28 $prog =~ s@.*/@@;
  29
  30 my $groff_sys_fontdir = "@FONTDIR@";
  31
  32 use Getopt::Std;
  33 getopts('a:cd:e:f:i:kmnsvx');
  34
  35 our ($opt_a, $opt_c, $opt_d, $opt_e, $opt_f, $opt_i,
  36      $opt_k, $opt_m, $opt_n, $opt_s, $opt_v, $opt_x);
  37
  38 if ($opt_v) {
  39     print "GNU afmtodit (groff) version @VERSION@\n";
  40     exit 0;
  41 }
  42
  43 if ($#ARGV != 2) {
  44     die "usage: $prog [-ckmnsvx] [-a angle] [-d DESC] [-e encoding]\n" .
  45         "       [-f name] [-i n] afmfile mapfile font\n";
  46 }
  47
  48 my $afm = $ARGV[0];
  49 my $map = $ARGV[1];
  50 my $font = $ARGV[2];
  51 my $desc = $opt_d || "DESC";
  52 my $sys_map = $groff_sys_fontdir . "/devps/generate/" . $map;
  53 my $sys_desc = $groff_sys_fontdir . "/devps/" . $desc;
  54
  55 # read the afm file
  56
  57 my $psname;
  58 my ($notice, $version, $fullname, $familyname, @comments);
  59 my $italic_angle = 0;
  60 my (@kern1, @kern2, @kernx);
  61 my (%italic_correction, %left_italic_correction);
  62 my %subscript_correction;
  63 # my %ligs
  64 my %ligatures;
  65 my (@encoding, %in_encoding);
  66 my (%width, %height, %depth);
  67 my (%left_side_bearing, %right_side_bearing);
  68
  69 open(AFM, $afm) || die "$prog: can't open \`$ARGV[0]': $!\n";
  70
  71 while (<AFM>) {
  72     chomp;
  73     s/\x0D$//;
  74     my @field = split(' ');
  75     next if $#field < 0;
  76     if ($field[0] eq "FontName") {
  77         $psname = $field[1];
  78         if($opt_f) {
  79             $psname = $opt_f;
  80         }
  81     }
  82     elsif($field[0] eq "Notice") {
  83         $notice = $_;
  84     }
  85     elsif($field[0] eq "Version") {
  86         $version = $_;
  87     }
  88     elsif($field[0] eq "FullName") {
  89         $fullname = $_;
  90     }
  91     elsif($field[0] eq "FamilyName") {
  92         $familyname = $_;
  93     }
  94     elsif($field[0] eq "Comment") {
  95         push(@comments, $_);
  96     }
  97     elsif($field[0] eq "ItalicAngle") {
  98         $italic_angle = -$field[1];
  99     }
 100     elsif ($field[0] eq "KPX") {
 101         if ($#field == 3) {
 102             push(@kern1, $field[1]);
 103             push(@kern2, $field[2]);
 104             push(@kernx, $field[3]);
 105         }
 106     }
 107     elsif ($field[0] eq "italicCorrection") {
 108         $italic_correction{$field[1]} = $field[2];
 109     }
 110     elsif ($field[0] eq "leftItalicCorrection") {
 111         $left_italic_correction{$field[1]} = $field[2];
 112     }
 113     elsif ($field[0] eq "subscriptCorrection") {
 114         $subscript_correction{$field[1]} = $field[2];
 115     }
 116     elsif ($field[0] eq "StartCharMetrics") {
 117         while (<AFM>) {
 118             @field = split(' ');
 119             next if $#field < 0;
 120             last if ($field[0] eq "EndCharMetrics");
 121             if ($field[0] eq "C") {
 122                 my $w;
 123                 my $wx = 0;
 124                 my $n = "";
 125 #               %ligs = ();
 126                 my $lly = 0;
 127                 my $ury = 0;
 128                 my $llx = 0;
 129                 my $urx = 0;
 130                 my $c = $field[1];
 131                 my $i = 2;
 132                 while ($i <= $#field) {
 133                     if ($field[$i] eq "WX") {
 134                         $w = $field[$i + 1];
 135                         $i += 2;
 136                     }
 137                     elsif ($field[$i] eq "N") {
 138                         $n = $field[$i + 1];
 139                         $i += 2;
 140                     }
 141                     elsif ($field[$i] eq "B") {
 142                         $llx = $field[$i + 1];
 143                         $lly = $field[$i + 2];
 144                         $urx = $field[$i + 3];
 145                         $ury = $field[$i + 4];
 146                         $i += 5;
 147                     }
 148 #                   elsif ($field[$i] eq "L") {
 149 #                       $ligs{$field[$i + 2]} = $field[$i + 1];
 150 #                       $i += 3;
 151 #                   }
 152                     else {
 153                         while ($i <= $#field && $field[$i] ne ";") {
 154                             $i++;
 155                         }
 156                         $i++;
 157                     }
 158                 }
 159                 if (!$opt_e && $c != -1) {
 160                     $encoding[$c] = $n;
 161                     $in_encoding{$n} = 1;
 162                 }
 163                 $width{$n} = $w;
 164                 $height{$n} = $ury;
 165                 $depth{$n} = -$lly;
 166                 $left_side_bearing{$n} = -$llx;
 167                 $right_side_bearing{$n} = $urx - $w;
 168 #               while ((my $lig, my $glyph2) = each %ligs) {
 169 #                   $ligatures{$lig} = $n . " " . $glyph2;
 170 #               }
 171             }
 172         }
 173     }
 174 }
 175 close(AFM);
 176
 177 # read the DESC file
 178
 179 my ($sizescale, $resolution, $unitwidth);
 180 $sizescale = 1;
 181
 182 open(DESC, $desc) || open(DESC, $sys_desc) ||
 183     die "$prog: can't open \`$desc' or \`$sys_desc': $!\n";
 184 while (<DESC>) {
 185     next if /^#/;
 186     chop;
 187     my @field = split(' ');
 188     next if $#field < 0;
 189     last if $field[0] eq "charset";
 190     if ($field[0] eq "res") {
 191         $resolution = $field[1];
 192     }
 193     elsif ($field[0] eq "unitwidth") {
 194         $unitwidth = $field[1];
 195     }
 196     elsif ($field[0] eq "sizescale") {
 197         $sizescale = $field[1];
 198     }
 199 }
 200 close(DESC);
 201
 202 if ($opt_e) {
 203     # read the encoding file
 204
 205     my $sys_opt_e = $groff_sys_fontdir . "/devps/" . $opt_e;
 206     open(ENCODING, $opt_e) || open(ENCODING, $sys_opt_e) ||
 207         die "$prog: can't open \`$opt_e' or \`$sys_opt_e': $!\n";
 208     while (<ENCODING>) {
 209         next if /^#/;
 210         chop;
 211         my @field = split(' ');
 212         next if $#field < 0;
 213         if ($#field == 1) {
 214             if ($field[1] >= 0 && defined $width{$field[0]}) {
 215                 $encoding[$field[1]] = $field[0];
 216                 $in_encoding{$field[0]} = 1;
 217             }
 218         }
 219     }
 220     close(ENCODING);
 221 }
 222
 223 # read the map file
 224
 225 my (%nmap, %map);
 226
 227 open(MAP, $map) || open(MAP, $sys_map) ||
 228     die "$prog: can't open \`$map' or \`$sys_map': $!\n";
 229 while (<MAP>) {
 230     next if /^#/;
 231     chop;
 232     my @field = split(' ');
 233     next if $#field < 0;
 234     if ($#field == 1) {
 235         if ($field[1] eq "space") {
 236             # The PostScript character "space" is automatically mapped
 237             # to the groff character "space"; this is for grops.
 238             warn "you are not allowed to map to " .
 239                  "the groff character \`space'";
 240         }
 241         elsif ($field[0] eq "space") {
 242             warn "you are not allowed to map " .
 243                  "the PostScript character \`space'";
 244         }
 245         else {
 246             $nmap{$field[0]} += 0;
 247             $map{$field[0], $nmap{$field[0]}} = $field[1];
 248             $nmap{$field[0]} += 1;
 249
 250             # There is more than one way to make a PS glyph name;
 251             # let us try Unicode names with both `uni' and `u' prefixes.
 252             my $utmp = $AGL_to_unicode{$field[0]};
 253             if (defined $utmp && $utmp =~ /^[0-9A-F]{4}$/) {
 254                 foreach my $unicodepsname ("uni" . $utmp, "u" . $utmp) {
 255                     $nmap{$unicodepsname} += 0;
 256                     $map{$unicodepsname, $nmap{$unicodepsname}} = $field[1];
 257                     $nmap{$unicodepsname} += 1;
 258                 }
 259             }
 260         }
 261     }
 262 }
 263 close(MAP);
 264
 265 $italic_angle = $opt_a if $opt_a;
 266
 267
 268 if (!$opt_x) {
 269     my %mapped;
 270     my $i = ($#encoding > 256) ? ($#encoding + 1) : 256;
 271     while (my $ch = each %width) {
 272         # add unencoded characters
 273         if (!$in_encoding{$ch}) {
 274             $encoding[$i] = $ch;
 275             $i++;
 276         }
 277         if ($nmap{$ch}) {
 278             for (my $j = 0; $j < $nmap{$ch}; $j++) {
 279                 if (defined $mapped{$map{$ch, $j}}) {
 280                     warn "both $mapped{$map{$ch, $j}} and $ch " .
 281                          "map to $map{$ch, $j}";
 282                 }
 283                 else {
 284                     $mapped{$map{$ch, $j}} = $ch;
 285                 }
 286             }
 287         }
 288         else {
 289             my $u = "";         # the resulting groff glyph name
 290             my $ucomp = "";     # Unicode string before decomposition
 291             my $utmp = "";      # temporary value
 292             my $component = "";
 293             my $nv = 0;
 294
 295             # Step 1:
 296             #   Drop all characters from the glyph name starting with the
 297             #   first occurrence of a period (U+002E FULL STOP), if any.
 298             #   ?? We avoid mapping of glyphs with periods, since they are
 299             #   likely to be variant glyphs, leading to a `many ps glyphs --
 300             #   one groff glyph' conflict.
 301             #
 302             #   If multiple glyphs in the font represent the same character
 303             #   in the Unicode standard, as do `A' and `A.swash', for example,
 304             #   they can be differentiated by using the same base name with
 305             #   different suffixes.  This suffix (the part of glyph name that
 306             #   follows the first period) does not participate in the
 307             #   computation of a character sequence.  It can be used by font
 308             #   designers to indicate some characteristics of the glyph.  The
 309             #   suffix may contain periods or any other permitted characters.
 310             #   Small cap A, for example, could be named `uni0041.sc' or
 311             #   `A.sc'.
 312
 313             next if $ch =~ /\./;
 314
 315             # Step 2:
 316             #  Split the remaining string into a sequence of components,
 317             #  using the underscore character (U+005F LOW LINE) as the
 318             #  delimiter.
 319
 320             while ($ch =~ /([^_]+)/g) {
 321                 $component = $1;
 322
 323                 # Step 3:
 324                 #   Map each component to a character string according to the
 325                 #   procedure below:
 326                 #
 327                 #   * If the component is in the Adobe Glyph List, then map
 328                 #     it to the corresponding character in that list.
 329
 330                 $utmp = $AGL_to_unicode{$component};
 331                 if ($utmp) {
 332                     $utmp = "U+" . $utmp;
 333                 }
 334
 335                 #   * Otherwise, if the component is of the form `uni'
 336                 #     (U+0075 U+006E U+0069) followed by a sequence of
 337                 #     uppercase hexadecimal digits (0 .. 9, A .. F, i.e.,
 338                 #     U+0030 .. U+0039, U+0041 .. U+0046), the length of
 339                 #     that sequence is a multiple of four, and each group of
 340                 #     four digits represents a number in the set {0x0000 ..
 341                 #     0xD7FF, 0xE000 .. 0xFFFF}, then interpret each such
 342                 #     number as a Unicode scalar value and map the component
 343                 #     to the string made of those scalar values.
 344
 345                 elsif ($component =~ /^uni([0-9A-F]{4})+$/) {
 346                     while ($component =~ /([0-9A-F]{4})/g) {
 347                         $nv = hex("0x" . $1);
 348                         if ($nv <= 0xD7FF || $nv >= 0xE000) {
 349                             $utmp .= "U+" . $1;
 350                         }
 351                         else {
 352                             $utmp = "";
 353                             last;
 354                         }
 355                     }
 356                 }
 357
 358                 #   * Otherwise, if the component is of the form `u' (U+0075)
 359                 #     followed by a sequence of four to six uppercase
 360                 #     hexadecimal digits {0 .. 9, A .. F} (U+0030 .. U+0039,
 361                 #     U+0041 .. U+0046), and those digits represent a number
 362                 #     in {0x0000 .. 0xD7FF, 0xE000 .. 0x10FFFF}, then
 363                 #     interpret this number as a Unicode scalar value and map
 364                 #     the component to the string made of this scalar value.
 365
 366                 elsif ($component =~ /^u([0-9A-F]{4,6})$/) {
 367                     $nv = hex("0x" . $1);
 368                     if ($nv <= 0xD7FF || ($nv >= 0xE000 && $nv <= 0x10FFFF)) {
 369                         $utmp = "U+" . $1;
 370                     }
 371                 }
 372
 373                 # Finally, concatenate those strings; the result is the
 374                 # character string to which the glyph name is mapped.
 375
 376                 $ucomp .= $utmp if $utmp;
 377             }
 378
 379             # Unicode decomposition
 380             while ($ucomp =~ /([0-9A-F]{4,6})/g) {
 381                 $component = $1;
 382                 $utmp = $unicode_decomposed{$component};
 383                 $u .= "_" . ($utmp ? $utmp : $component);
 384             }
 385             $u =~ s/^_/u/;
 386             if ($u) {
 387                 if (defined $mapped{$u}) {
 388                     warn "both $mapped{$u} and $ch map to $u";
 389                 }
 390                 else {
 391                     $mapped{$u} = $ch;
 392                 }
 393                 $nmap{$ch} += 1;
 394                 $map{$ch, "0"} = $u;
 395             }
 396         }
 397     }
 398 }
 399
 400 # Check explicitly for groff's standard ligatures -- many afm files don't
 401 # have proper `L' entries.
 402
 403 my %default_ligatures = (
 404   "fi", "f i",
 405   "fl", "f l",
 406   "ff", "f f",
 407   "ffi", "ff i",
 408   "ffl", "ff l",
 409 );
 410
 411 while (my ($lig, $components) = each %default_ligatures) {
 412     if (defined $width{$lig} && !defined $ligatures{$lig}) {
 413         $ligatures{$lig} = $components;
 414     }
 415 }
 416
 417 # print it all out
 418
 419 open(FONT, ">$font") || die "$prog: can't open \`$font' for output: $!\n";
 420 select(FONT);
 421
 422 print("# This file has been generated with " .
 423       "GNU afmtodit (groff) version @VERSION@\n");
 424 print("#\n");
 425 print("#   $fullname\n") if defined $fullname;
 426 print("#   $version\n") if defined $version;
 427 print("#   $familyname\n") if defined $familyname;
 428
 429 if ($opt_c) {
 430     print("#\n");
 431     if (defined $notice || @comments) {
 432         print("# The original AFM file contains the following comments:\n");
 433         print("#\n");
 434         print("#   $notice\n") if defined $notice;
 435         foreach my $comment (@comments) {
 436             print("#   $comment\n");
 437         }
 438     }
 439     else {
 440         print("# The original AFM file contains no comments.\n");
 441     }
 442 }
 443
 444 print("\n");
 445
 446 print("name $font\n");
 447 print("internalname $psname\n") if $psname;
 448 print("special\n") if $opt_s;
 449 printf("slant %g\n", $italic_angle) if $italic_angle != 0;
 450 printf("spacewidth %d\n", conv($width{"space"})) if defined $width{"space"};
 451
 452 if ($opt_e) {
 453     my $e = $opt_e;
 454     $e =~ s@.*/@@;
 455     print("encoding $e\n");
 456 }
 457
 458 if (!$opt_n && %ligatures) {
 459     print("ligatures");
 460     while (my $lig = each %ligatures) {
 461         print(" $lig");
 462     }
 463     print(" 0\n");
 464 }
 465
 466 if (!$opt_k && $#kern1 >= 0) {
 467     print("\n");
 468     print("kernpairs\n");
 469
 470     for (my $i = 0; $i <= $#kern1; $i++) {
 471         my $c1 = $kern1[$i];
 472         my $c2 = $kern2[$i];
 473         if (defined $nmap{$c1} && $nmap{$c1} != 0
 474             && defined $nmap{$c2} && $nmap{$c2} != 0) {
 475             for (my $j = 0; $j < $nmap{$c1}; $j++) {
 476                 for (my $k = 0; $k < $nmap{$c2}; $k++) {
 477                     if ($kernx[$i] != 0) {
 478                         printf("%s %s %d\n",
 479                                $map{$c1, $j},
 480                                $map{$c2, $k},
 481                                conv($kernx[$i]));
 482                     }
 483                 }
 484             }
 485         }
 486     }
 487 }
 488
 489 my ($asc_boundary, $desc_boundary, $xheight, $slant);
 490
 491 # characters not shorter than asc_boundary are considered to have ascenders
 492
 493 $asc_boundary = 0;
 494 $asc_boundary = $height{"t"} if defined $height{"t"};
 495 $asc_boundary -= 1;
 496
 497 # likewise for descenders
 498
 499 $desc_boundary = 0;
 500 $desc_boundary = $depth{"g"} if defined $depth{"g"};
 501 $desc_boundary = $depth{"j"} if defined $depth{"g"} && $depth{"j"} < $desc_boundary;
 502 $desc_boundary = $depth{"p"} if defined $depth{"p"} && $depth{"p"} < $desc_boundary;
 503 $desc_boundary = $depth{"q"} if defined $depth{"q"} && $depth{"q"} < $desc_boundary;
 504 $desc_boundary = $depth{"y"} if defined $depth{"y"} && $depth{"y"} < $desc_boundary;
 505 $desc_boundary -= 1;
 506
 507 if (defined $height{"x"}) {
 508     $xheight = $height{"x"};
 509 }
 510 elsif (defined $height{"alpha"}) {
 511     $xheight = $height{"alpha"};
 512 }
 513 else {
 514     $xheight = 450;
 515 }
 516
 517 $italic_angle = $italic_angle*3.14159265358979323846/180.0;
 518 $slant = sin($italic_angle)/cos($italic_angle);
 519 $slant = 0 if $slant < 0;
 520
 521 print("\n");
 522 print("charset\n");
 523 for (my $i = 0; $i <= $#encoding; $i++) {
 524     my $ch = $encoding[$i];
 525     if (defined $ch && $ch ne "" && $ch ne "space") {
 526         $map{$ch, "0"} = "---" if !defined $nmap{$ch} || $nmap{$ch} == 0;
 527         my $type = 0;
 528         my $h = $height{$ch};
 529         $h = 0 if $h < 0;
 530         my $d = $depth{$ch};
 531         $d = 0 if $d < 0;
 532         $type = 1 if $d >= $desc_boundary;
 533         $type += 2 if $h >= $asc_boundary;
 534         printf("%s\t%d", $map{$ch, "0"}, conv($width{$ch}));
 535         my $italic_correction = 0;
 536         my $left_math_fit = 0;
 537         my $subscript_correction = 0;
 538         if (defined $opt_i) {
 539             $italic_correction = $right_side_bearing{$ch} + $opt_i;
 540             $italic_correction = 0 if $italic_correction < 0;
 541             $subscript_correction = $slant * $xheight * .8;
 542             $subscript_correction = $italic_correction if
 543                 $subscript_correction > $italic_correction;
 544             $left_math_fit = $left_side_bearing{$ch} + $opt_i;
 545             if (defined $opt_m) {
 546                 $left_math_fit = 0 if $left_math_fit < 0;
 547             }
 548         }
 549         if (defined $italic_correction{$ch}) {
 550             $italic_correction = $italic_correction{$ch};
 551         }
 552         if (defined $left_italic_correction{$ch}) {
 553             $left_math_fit = $left_italic_correction{$ch};
 554         }
 555         if (defined $subscript_correction{$ch}) {
 556             $subscript_correction = $subscript_correction{$ch};
 557         }
 558         if ($subscript_correction != 0) {
 559             printf(",%d,%d", conv($h), conv($d));
 560             printf(",%d,%d,%d", conv($italic_correction),
 561                    conv($left_math_fit),
 562                    conv($subscript_correction));
 563         }
 564         elsif ($left_math_fit != 0) {
 565             printf(",%d,%d", conv($h), conv($d));
 566             printf(",%d,%d", conv($italic_correction),
 567                    conv($left_math_fit));
 568         }
 569         elsif ($italic_correction != 0) {
 570             printf(",%d,%d", conv($h), conv($d));
 571             printf(",%d", conv($italic_correction));
 572         }
 573         elsif ($d != 0) {
 574             printf(",%d,%d", conv($h), conv($d));
 575         }
 576         else {
 577             # always put the height in to stop groff guessing
 578             printf(",%d", conv($h));
 579         }
 580         printf("\t%d", $type);
 581         printf("\t%d\t%s\n", $i, $ch);
 582         if (defined $nmap{$ch}) {
 583             for (my $j = 1; $j < $nmap{$ch}; $j++) {
 584                 printf("%s\t\"\n", $map{$ch, $j});
 585             }
 586         }
 587     }
 588     if (defined $ch && $ch eq "space" && defined $width{"space"}) {
 589         printf("space\t%d\t0\t%d\tspace\n", conv($width{"space"}), $i);
 590     }
 591 }
 592
 593 sub conv {
 594     $_[0]*$unitwidth*$resolution/(72*1000*$sizescale) + ($_[0] < 0 ? -.5 : .5);
 595 }
 596
 597 # eof