src/utils/afmtodit/afmtodit.pl

   1 #! /usr/bin/perl -w
   2 # -*- Perl -*-
   3 # Copyright (C) 1989-2014  Free Software Foundation, Inc.
   4 #      Written by James Clark (jjc@jclark.com)
   5 #
   6 # This file is part of groff.
   7 #
   8 # groff is free software; you can redistribute it and/or modify it under
   9 # the terms of the GNU General Public License as published by the Free
  10 # Software Foundation, either version 3 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # groff is distributed in the hope that it will be useful, but WITHOUT ANY
  14 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 # for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
  20
  21 use strict;
  22
  23 @afmtodit.tables@
  24
  25 my $prog = $0;
  26 $prog =~ s@.*/@@;
  27
  28 my $groff_sys_fontdir = "@FONTDIR@";
  29
  30 use Getopt::Std;
  31 getopts('a:cd:e:f:i:kmno:svx');
  32
  33 our ($opt_a, $opt_c, $opt_d, $opt_e, $opt_f, $opt_i, $opt_k,
  34      $opt_m, $opt_n, $opt_o, $opt_s, $opt_v, $opt_x);
  35
  36 if ($opt_v) {
  37     print "GNU afmtodit (groff) version @VERSION@\n";
  38     exit 0;
  39 }
  40
  41 if ($#ARGV != 2) {
  42     die "usage: $prog [-ckmnsvx] [-a angle] [-d DESC] [-e encoding]\n" .
  43         "       [-f name] [-i n] [-o outfile] afmfile mapfile font\n";
  44 }
  45
  46 my $afm = $ARGV[0];
  47 my $map = $ARGV[1];
  48 my $font = $ARGV[2];
  49 my $outfile = $opt_o || $font;
  50 my $desc = $opt_d || "DESC";
  51 my $sys_map = $groff_sys_fontdir . "/devps/generate/" . $map;
  52 my $sys_desc = $groff_sys_fontdir . "/devps/" . $desc;
  53
  54 # read the afm file
  55
  56 my $psname;
  57 my ($notice, $version, $fullname, $familyname, @comments);
  58 my $italic_angle = 0;
  59 my (@kern1, @kern2, @kernx);
  60 my (%italic_correction, %left_italic_correction);
  61 my %subscript_correction;
  62 # my %ligs
  63 my %ligatures;
  64 my (@encoding, %in_encoding);
  65 my (%width, %height, %depth);
  66 my (%left_side_bearing, %right_side_bearing);
  67
  68 open(AFM, $afm) || die "$prog: can't open \`$ARGV[0]': $!\n";
  69
  70 while (<AFM>) {
  71     chomp;
  72     s/\x0D$//;
  73     my @field = split(' ');
  74     next if $#field < 0;
  75     if ($field[0] eq "FontName") {
  76         $psname = $field[1];
  77         if($opt_f) {
  78             $psname = $opt_f;
  79         }
  80     }
  81     elsif($field[0] eq "Notice") {
  82         $notice = $_;
  83     }
  84     elsif($field[0] eq "Version") {
  85         $version = $_;
  86     }
  87     elsif($field[0] eq "FullName") {
  88         $fullname = $_;
  89     }
  90     elsif($field[0] eq "FamilyName") {
  91         $familyname = $_;
  92     }
  93     elsif($field[0] eq "Comment") {
  94         push(@comments, $_);
  95     }
  96     elsif($field[0] eq "ItalicAngle") {
  97         $italic_angle = -$field[1];
  98     }
  99     elsif ($field[0] eq "KPX") {
 100         if ($#field == 3) {
 101             push(@kern1, $field[1]);
 102             push(@kern2, $field[2]);
 103             push(@kernx, $field[3]);
 104         }
 105     }
 106     elsif ($field[0] eq "italicCorrection") {
 107         $italic_correction{$field[1]} = $field[2];
 108     }
 109     elsif ($field[0] eq "leftItalicCorrection") {
 110         $left_italic_correction{$field[1]} = $field[2];
 111     }
 112     elsif ($field[0] eq "subscriptCorrection") {
 113         $subscript_correction{$field[1]} = $field[2];
 114     }
 115     elsif ($field[0] eq "StartCharMetrics") {
 116         while (<AFM>) {
 117             @field = split(' ');
 118             next if $#field < 0;
 119             last if ($field[0] eq "EndCharMetrics");
 120             if ($field[0] eq "C") {
 121                 my $w;
 122                 my $wx = 0;
 123                 my $n = "";
 124 #               %ligs = ();
 125                 my $lly = 0;
 126                 my $ury = 0;
 127                 my $llx = 0;
 128                 my $urx = 0;
 129                 my $c = $field[1];
 130                 my $i = 2;
 131                 while ($i <= $#field) {
 132                     if ($field[$i] eq "WX") {
 133                         $w = $field[$i + 1];
 134                         $i += 2;
 135                     }
 136                     elsif ($field[$i] eq "N") {
 137                         $n = $field[$i + 1];
 138                         $i += 2;
 139                     }
 140                     elsif ($field[$i] eq "B") {
 141                         $llx = $field[$i + 1];
 142                         $lly = $field[$i + 2];
 143                         $urx = $field[$i + 3];
 144                         $ury = $field[$i + 4];
 145                         $i += 5;
 146                     }
 147 #                   elsif ($field[$i] eq "L") {
 148 #                       $ligs{$field[$i + 2]} = $field[$i + 1];
 149 #                       $i += 3;
 150 #                   }
 151                     else {
 152                         while ($i <= $#field && $field[$i] ne ";") {
 153                             $i++;
 154                         }
 155                         $i++;
 156                     }
 157                 }
 158                 if (!$opt_e && $c != -1) {
 159                     $encoding[$c] = $n;
 160                     $in_encoding{$n} = 1;
 161                 }
 162                 $width{$n} = $w;
 163                 $height{$n} = $ury;
 164                 $depth{$n} = -$lly;
 165                 $left_side_bearing{$n} = -$llx;
 166                 $right_side_bearing{$n} = $urx - $w;
 167 #               while ((my $lig, my $glyph2) = each %ligs) {
 168 #                   $ligatures{$lig} = $n . " " . $glyph2;
 169 #               }
 170             }
 171         }
 172     }
 173 }
 174 close(AFM);
 175
 176 # read the DESC file
 177
 178 my ($sizescale, $resolution, $unitwidth);
 179 $sizescale = 1;
 180
 181 open(DESC, $desc) || open(DESC, $sys_desc) ||
 182     die "$prog: can't open \`$desc' or \`$sys_desc': $!\n";
 183 while (<DESC>) {
 184     next if /^#/;
 185     chop;
 186     my @field = split(' ');
 187     next if $#field < 0;
 188     last if $field[0] eq "charset";
 189     if ($field[0] eq "res") {
 190         $resolution = $field[1];
 191     }
 192     elsif ($field[0] eq "unitwidth") {
 193         $unitwidth = $field[1];
 194     }
 195     elsif ($field[0] eq "sizescale") {
 196         $sizescale = $field[1];
 197     }
 198 }
 199 close(DESC);
 200
 201 if ($opt_e) {
 202     # read the encoding file
 203
 204     my $sys_opt_e = $groff_sys_fontdir . "/devps/" . $opt_e;
 205     open(ENCODING, $opt_e) || open(ENCODING, $sys_opt_e) ||
 206         die "$prog: can't open \`$opt_e' or \`$sys_opt_e': $!\n";
 207     while (<ENCODING>) {
 208         next if /^#/;
 209         chop;
 210         my @field = split(' ');
 211         next if $#field < 0;
 212         if ($#field == 1) {
 213             if ($field[1] >= 0 && defined $width{$field[0]}) {
 214                 $encoding[$field[1]] = $field[0];
 215                 $in_encoding{$field[0]} = 1;
 216             }
 217         }
 218     }
 219     close(ENCODING);
 220 }
 221
 222 # read the map file
 223
 224 my (%nmap, %map);
 225
 226 open(MAP, $map) || open(MAP, $sys_map) ||
 227     die "$prog: can't open \`$map' or \`$sys_map': $!\n";
 228 while (<MAP>) {
 229     next if /^#/;
 230     chop;
 231     my @field = split(' ');
 232     next if $#field < 0;
 233     if ($#field == 1) {
 234         if ($field[1] eq "space") {
 235             # The PostScript character "space" is automatically mapped
 236             # to the groff character "space"; this is for grops.
 237             warn "you are not allowed to map to " .
 238                  "the groff character \`space'";
 239         }
 240         elsif ($field[0] eq "space") {
 241             warn "you are not allowed to map " .
 242                  "the PostScript character \`space'";
 243         }
 244         else {
 245             $nmap{$field[0]} += 0;
 246             $map{$field[0], $nmap{$field[0]}} = $field[1];
 247             $nmap{$field[0]} += 1;
 248
 249             # There is more than one way to make a PS glyph name;
 250             # let us try Unicode names with both `uni' and `u' prefixes.
 251             my $utmp = $AGL_to_unicode{$field[0]};
 252             if (defined $utmp && $utmp =~ /^[0-9A-F]{4}$/) {
 253                 foreach my $unicodepsname ("uni" . $utmp, "u" . $utmp) {
 254                     $nmap{$unicodepsname} += 0;
 255                     $map{$unicodepsname, $nmap{$unicodepsname}} = $field[1];
 256                     $nmap{$unicodepsname} += 1;
 257                 }
 258             }
 259         }
 260     }
 261 }
 262 close(MAP);
 263
 264 $italic_angle = $opt_a if $opt_a;
 265
 266
 267 if (!$opt_x) {
 268     my %mapped;
 269     my $i = ($#encoding > 256) ? ($#encoding + 1) : 256;
 270     while (my $ch = each %width) {
 271         # add unencoded characters
 272         if (!$in_encoding{$ch}) {
 273             $encoding[$i] = $ch;
 274             $i++;
 275         }
 276         if ($nmap{$ch}) {
 277             for (my $j = 0; $j < $nmap{$ch}; $j++) {
 278                 if (defined $mapped{$map{$ch, $j}}) {
 279                     warn "both $mapped{$map{$ch, $j}} and $ch " .
 280                          "map to $map{$ch, $j}";
 281                 }
 282                 else {
 283                     $mapped{$map{$ch, $j}} = $ch;
 284                 }
 285             }
 286         }
 287         else {
 288             my $u = "";         # the resulting groff glyph name
 289             my $ucomp = "";     # Unicode string before decomposition
 290             my $utmp = "";      # temporary value
 291             my $component = "";
 292             my $nv = 0;
 293
 294             # Step 1:
 295             #   Drop all characters from the glyph name starting with the
 296             #   first occurrence of a period (U+002E FULL STOP), if any.
 297             #   ?? We avoid mapping of glyphs with periods, since they are
 298             #   likely to be variant glyphs, leading to a `many ps glyphs --
 299             #   one groff glyph' conflict.
 300             #
 301             #   If multiple glyphs in the font represent the same character
 302             #   in the Unicode standard, as do `A' and `A.swash', for example,
 303             #   they can be differentiated by using the same base name with
 304             #   different suffixes.  This suffix (the part of glyph name that
 305             #   follows the first period) does not participate in the
 306             #   computation of a character sequence.  It can be used by font
 307             #   designers to indicate some characteristics of the glyph.  The
 308             #   suffix may contain periods or any other permitted characters.
 309             #   Small cap A, for example, could be named `uni0041.sc' or
 310             #   `A.sc'.
 311
 312             next if $ch =~ /\./;
 313
 314             # Step 2:
 315             #  Split the remaining string into a sequence of components,
 316             #  using the underscore character (U+005F LOW LINE) as the
 317             #  delimiter.
 318
 319             while ($ch =~ /([^_]+)/g) {
 320                 $component = $1;
 321
 322                 # Step 3:
 323                 #   Map each component to a character string according to the
 324                 #   procedure below:
 325                 #
 326                 #   * If the component is in the Adobe Glyph List, then map
 327                 #     it to the corresponding character in that list.
 328
 329                 $utmp = $AGL_to_unicode{$component};
 330                 if ($utmp) {
 331                     $utmp = "U+" . $utmp;
 332                 }
 333
 334                 #   * Otherwise, if the component is of the form `uni'
 335                 #     (U+0075 U+006E U+0069) followed by a sequence of
 336                 #     uppercase hexadecimal digits (0 .. 9, A .. F, i.e.,
 337                 #     U+0030 .. U+0039, U+0041 .. U+0046), the length of
 338                 #     that sequence is a multiple of four, and each group of
 339                 #     four digits represents a number in the set {0x0000 ..
 340                 #     0xD7FF, 0xE000 .. 0xFFFF}, then interpret each such
 341                 #     number as a Unicode scalar value and map the component
 342                 #     to the string made of those scalar values.
 343
 344                 elsif ($component =~ /^uni([0-9A-F]{4})+$/) {
 345                     while ($component =~ /([0-9A-F]{4})/g) {
 346                         $nv = hex("0x" . $1);
 347                         if ($nv <= 0xD7FF || $nv >= 0xE000) {
 348                             $utmp .= "U+" . $1;
 349                         }
 350                         else {
 351                             $utmp = "";
 352                             last;
 353                         }
 354                     }
 355                 }
 356
 357                 #   * Otherwise, if the component is of the form `u' (U+0075)
 358                 #     followed by a sequence of four to six uppercase
 359                 #     hexadecimal digits {0 .. 9, A .. F} (U+0030 .. U+0039,
 360                 #     U+0041 .. U+0046), and those digits represent a number
 361                 #     in {0x0000 .. 0xD7FF, 0xE000 .. 0x10FFFF}, then
 362                 #     interpret this number as a Unicode scalar value and map
 363                 #     the component to the string made of this scalar value.
 364
 365                 elsif ($component =~ /^u([0-9A-F]{4,6})$/) {
 366                     $nv = hex("0x" . $1);
 367                     if ($nv <= 0xD7FF || ($nv >= 0xE000 && $nv <= 0x10FFFF)) {
 368                         $utmp = "U+" . $1;
 369                     }
 370                 }
 371
 372                 # Finally, concatenate those strings; the result is the
 373                 # character string to which the glyph name is mapped.
 374
 375                 $ucomp .= $utmp if $utmp;
 376             }
 377
 378             # Unicode decomposition
 379             while ($ucomp =~ /([0-9A-F]{4,6})/g) {
 380                 $component = $1;
 381                 $utmp = $unicode_decomposed{$component};
 382                 $u .= "_" . ($utmp ? $utmp : $component);
 383             }
 384             $u =~ s/^_/u/;
 385             if ($u) {
 386                 if (defined $mapped{$u}) {
 387                     warn "both $mapped{$u} and $ch map to $u";
 388                 }
 389                 else {
 390                     $mapped{$u} = $ch;
 391                 }
 392                 $nmap{$ch} += 1;
 393                 $map{$ch, "0"} = $u;
 394             }
 395         }
 396     }
 397 }
 398
 399 # Check explicitly for groff's standard ligatures -- many afm files don't
 400 # have proper `L' entries.
 401
 402 my %default_ligatures = (
 403   "fi", "f i",
 404   "fl", "f l",
 405   "ff", "f f",
 406   "ffi", "ff i",
 407   "ffl", "ff l",
 408 );
 409
 410 while (my ($lig, $components) = each %default_ligatures) {
 411     if (defined $width{$lig} && !defined $ligatures{$lig}) {
 412         $ligatures{$lig} = $components;
 413     }
 414 }
 415
 416 # print it all out
 417
 418 open(FONT, ">$outfile") || die "$prog: can't open \`$outfile' for output: $!\n";
 419 select(FONT);
 420
 421 print("# This file has been generated with " .
 422       "GNU afmtodit (groff) version @VERSION@\n");
 423 print("#\n");
 424 print("#   $fullname\n") if defined $fullname;
 425 print("#   $version\n") if defined $version;
 426 print("#   $familyname\n") if defined $familyname;
 427
 428 if ($opt_c) {
 429     print("#\n");
 430     if (defined $notice || @comments) {
 431         print("# The original AFM file contains the following comments:\n");
 432         print("#\n");
 433         print("#   $notice\n") if defined $notice;
 434         foreach my $comment (@comments) {
 435             print("#   $comment\n");
 436         }
 437     }
 438     else {
 439         print("# The original AFM file contains no comments.\n");
 440     }
 441 }
 442
 443 print("\n");
 444
 445 print("name $font\n");
 446 print("internalname $psname\n") if $psname;
 447 print("special\n") if $opt_s;
 448 printf("slant %g\n", $italic_angle) if $italic_angle != 0;
 449 printf("spacewidth %d\n", conv($width{"space"})) if defined $width{"space"};
 450
 451 if ($opt_e) {
 452     my $e = $opt_e;
 453     $e =~ s@.*/@@;
 454     print("encoding $e\n");
 455 }
 456
 457 if (!$opt_n && %ligatures) {
 458     print("ligatures");
 459     while (my $lig = each %ligatures) {
 460         print(" $lig");
 461     }
 462     print(" 0\n");
 463 }
 464
 465 if (!$opt_k && $#kern1 >= 0) {
 466     print("\n");
 467     print("kernpairs\n");
 468
 469     for (my $i = 0; $i <= $#kern1; $i++) {
 470         my $c1 = $kern1[$i];
 471         my $c2 = $kern2[$i];
 472         if (defined $nmap{$c1} && $nmap{$c1} != 0
 473             && defined $nmap{$c2} && $nmap{$c2} != 0) {
 474             for (my $j = 0; $j < $nmap{$c1}; $j++) {
 475                 for (my $k = 0; $k < $nmap{$c2}; $k++) {
 476                     if ($kernx[$i] != 0) {
 477                         printf("%s %s %d\n",
 478                                $map{$c1, $j},
 479                                $map{$c2, $k},
 480                                conv($kernx[$i]));
 481                     }
 482                 }
 483             }
 484         }
 485     }
 486 }
 487
 488 my ($asc_boundary, $desc_boundary, $xheight, $slant);
 489
 490 # characters not shorter than asc_boundary are considered to have ascenders
 491
 492 $asc_boundary = 0;
 493 $asc_boundary = $height{"t"} if defined $height{"t"};
 494 $asc_boundary -= 1;
 495
 496 # likewise for descenders
 497
 498 $desc_boundary = 0;
 499 $desc_boundary = $depth{"g"} if defined $depth{"g"};
 500 $desc_boundary = $depth{"j"} if defined $depth{"g"} && $depth{"j"} < $desc_boundary;
 501 $desc_boundary = $depth{"p"} if defined $depth{"p"} && $depth{"p"} < $desc_boundary;
 502 $desc_boundary = $depth{"q"} if defined $depth{"q"} && $depth{"q"} < $desc_boundary;
 503 $desc_boundary = $depth{"y"} if defined $depth{"y"} && $depth{"y"} < $desc_boundary;
 504 $desc_boundary -= 1;
 505
 506 if (defined $height{"x"}) {
 507     $xheight = $height{"x"};
 508 }
 509 elsif (defined $height{"alpha"}) {
 510     $xheight = $height{"alpha"};
 511 }
 512 else {
 513     $xheight = 450;
 514 }
 515
 516 $italic_angle = $italic_angle*3.14159265358979323846/180.0;
 517 $slant = sin($italic_angle)/cos($italic_angle);
 518 $slant = 0 if $slant < 0;
 519
 520 print("\n");
 521 print("charset\n");
 522 for (my $i = 0; $i <= $#encoding; $i++) {
 523     my $ch = $encoding[$i];
 524     if (defined $ch && $ch ne "" && $ch ne "space") {
 525         $map{$ch, "0"} = "---" if !defined $nmap{$ch} || $nmap{$ch} == 0;
 526         my $type = 0;
 527         my $h = $height{$ch};
 528         $h = 0 if $h < 0;
 529         my $d = $depth{$ch};
 530         $d = 0 if $d < 0;
 531         $type = 1 if $d >= $desc_boundary;
 532         $type += 2 if $h >= $asc_boundary;
 533         printf("%s\t%d", $map{$ch, "0"}, conv($width{$ch}));
 534         my $italic_correction = 0;
 535         my $left_math_fit = 0;
 536         my $subscript_correction = 0;
 537         if (defined $opt_i) {
 538             $italic_correction = $right_side_bearing{$ch} + $opt_i;
 539             $italic_correction = 0 if $italic_correction < 0;
 540             $subscript_correction = $slant * $xheight * .8;
 541             $subscript_correction = $italic_correction if
 542                 $subscript_correction > $italic_correction;
 543             $left_math_fit = $left_side_bearing{$ch} + $opt_i;
 544             if (defined $opt_m) {
 545                 $left_math_fit = 0 if $left_math_fit < 0;
 546             }
 547         }
 548         if (defined $italic_correction{$ch}) {
 549             $italic_correction = $italic_correction{$ch};
 550         }
 551         if (defined $left_italic_correction{$ch}) {
 552             $left_math_fit = $left_italic_correction{$ch};
 553         }
 554         if (defined $subscript_correction{$ch}) {
 555             $subscript_correction = $subscript_correction{$ch};
 556         }
 557         if ($subscript_correction != 0) {
 558             printf(",%d,%d", conv($h), conv($d));
 559             printf(",%d,%d,%d", conv($italic_correction),
 560                    conv($left_math_fit),
 561                    conv($subscript_correction));
 562         }
 563         elsif ($left_math_fit != 0) {
 564             printf(",%d,%d", conv($h), conv($d));
 565             printf(",%d,%d", conv($italic_correction),
 566                    conv($left_math_fit));
 567         }
 568         elsif ($italic_correction != 0) {
 569             printf(",%d,%d", conv($h), conv($d));
 570             printf(",%d", conv($italic_correction));
 571         }
 572         elsif ($d != 0) {
 573             printf(",%d,%d", conv($h), conv($d));
 574         }
 575         else {
 576             # always put the height in to stop groff guessing
 577             printf(",%d", conv($h));
 578         }
 579         printf("\t%d", $type);
 580         printf("\t%d\t%s\n", $i, $ch);
 581         if (defined $nmap{$ch}) {
 582             for (my $j = 1; $j < $nmap{$ch}; $j++) {
 583                 printf("%s\t\"\n", $map{$ch, $j});
 584             }
 585         }
 586     }
 587     if (defined $ch && $ch eq "space" && defined $width{"space"}) {
 588         printf("space\t%d\t0\t%d\tspace\n", conv($width{"space"}), $i);
 589     }
 590 }
 591
 592 sub conv {
 593     $_[0]*$unitwidth*$resolution/(72*1000*$sizescale) + ($_[0] < 0 ? -.5 : .5);
 594 }
 595
 596 # eof