doc/genps.pl

   1 #!/usr/bin/perl
   2 #
   3 # Format the documentation as PostScript
   4 #
   5
   6 require 'psfonts.ph';           # The fonts we want to use
   7 require 'pswidth.ph';           # PostScript string width
   8
   9 use Fcntl;
  10
  11 #
  12 # PostScript configurables; these values are also available to the
  13 # PostScript code itself
  14 #
  15 %psconf = (
  16            pagewidth => 595,    # Page width in PostScript points
  17            pageheight => 792,   # Page height in PostScript points
  18            lmarg => 100,        # Left margin in PostScript points
  19            rmarg => 50,         # Right margin in PostScript points
  20            topmarg => 100,      # Top margin in PostScript points
  21            botmarg => 100,      # Bottom margin in PostScript points
  22            plmarg => 50,        # Page number position relative to left margin
  23            prmarg => 0,         # Page number position relative to right margin
  24            pymarg => 50,        # Page number position relative to bot margin
  25            startcopyright => 75, # How much above the bottom margin is the
  26                                  # copyright notice stuff
  27            bulladj => 12,       # How much to indent a bullet paragraph
  28            tocind => 12,        # TOC indentation per level
  29            tocpnz => 24,        # Width of TOC page number only zone
  30            tocdots => 8,        # Spacing between TOC dots
  31            idxspace => 24,      # Minimum space between index title and pg#
  32            idxindent => 24,     # How much to indent a subindex entry
  33            idxgutter => 24,     # Space between index columns
  34            idxcolumns => 2,     # Number of index columns
  35            );
  36
  37 %psbool = (
  38            colorlinks => 0,     # Set links in blue rather than black
  39            );
  40
  41 # Known paper sizes
  42 %papersizes = (
  43                'a5'     => [421, 595], # ISO half paper size
  44                'b5'     => [501, 709], # ISO small paper size
  45                'a4'     => [595, 842], # ISO standard paper size
  46                'letter' => [612, 792], # US common paper size
  47                'pa4'    => [595, 792], # Compromise ("portable a4")
  48                'b4'     => [709,1002], # ISO intermediate paper size
  49                'legal'  => [612,1008], # US intermediate paper size
  50                'a3'     => [842,1190], # ISO double paper size
  51                '11x17'  => [792,1224], # US double paper size
  52                );
  53
  54 #
  55 # Parse the command line
  56 #
  57 undef $input;
  58 while ( $arg = shift(@ARGV) ) {
  59     if ( $arg =~ /^\-(|no\-)(.*)$/ ) {
  60         $parm = $2;
  61         $true = ($1 eq '') ? 1 : 0;
  62         if ( $true && defined($papersizes{$parm}) ) {
  63             $psconf{pagewidth}  = $papersizes{$parm}->[0];
  64             $psconf{pageheight} = $papersizes{$parm}->[1];
  65         } elsif ( defined($psbool{$parm}) ) {
  66             $psbool{$parm} = $true;
  67         } elsif ( $true && defined($psconf{$parm}) ) {
  68             $psconf{$parm} = shift(@ARGV);
  69         } else {
  70             die "$0: Unknown option: $arg\n";
  71         }
  72     } else {
  73         $input = $arg;
  74     }
  75 }
  76
  77 #
  78 # Document formatting parameters
  79 #
  80 $paraskip = 6;                  # Space between paragraphs
  81 $chapstart = 30;                # Space before a chapter heading
  82 $chapskip = 24;                 # Space after a chapter heading
  83 $tocskip = 6;                   # Space between TOC entries
  84
  85 # Configure post-paragraph skips for each kind of paragraph
  86 %skiparray = ('chap' => $chapskip, 'appn' => $chapstart,
  87               'head' => $paraskip, 'subh' => $paraskip,
  88               'norm' => $paraskip, 'bull' => $paraskip,
  89               'code' => $paraskip, 'toc0' => $tocskip,
  90               'toc1' => $tocskip,  'toc2' => $tocskip);
  91
  92 #
  93 # Custom encoding vector.  This is basically the same as
  94 # ISOLatin1Encoding (a level 2 feature, so we dont want to use it),
  95 # but with a few extra characters thrown in.  It is basically a
  96 # modified Windows 1252 codepage, minus, for now, the euro sign (\200
  97 # is reserved for euro.)
  98 #
  99 @NASMEncoding =
 100 (
 101  (undef)x32,
 102  'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
 103  'ampersand', 'quoteright', 'parenleft',
 104  'parenright', 'asterisk', 'plus', 'comma', 'minus',
 105  'period', 'slash', 'zero', 'one', 'two', 'three',
 106  'four', 'five', 'six', 'seven', 'eight', 'nine',
 107  'colon', 'semicolon', 'less', 'equal', 'greater',
 108  'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
 109  'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q',
 110  'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
 111  'bracketleft', 'backslash', 'bracketright',
 112  'asciicircum', 'underscore', 'quoteleft', 'a', 'b',
 113  'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
 114  'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 115  'w', 'x', 'y', 'z', 'braceleft', 'bar',
 116  'braceright', 'asciitilde', undef,
 117  undef, 'macron', 'quotesinglbase', 'florin',
 118  'quotedblbase', 'ellipsis', 'dagger', 'dbldagger',
 119  'circumflex', 'perthousand', 'Scaron', 'guilsinglleft',
 120  'OE', 'hungarumlaut', 'Zcaron', 'caron',
 121  'ogonek', 'grave', 'quotesingle', 'quotedblleft',
 122  'quotedblright', 'bullet', 'endash', 'emdash',
 123  'tilde', 'trademark', 'scaron', 'guilsignlright',
 124  'oe', 'ring', 'zcaron', 'Ydieresis',
 125  'space', 'exclamdown', 'cent', 'sterling',
 126  'currency', 'yen', 'brokenbar', 'section',
 127  'dieresis', 'copyright', 'ordfeminine',
 128  'guillemotleft', 'logicalnot', 'hyphen',
 129  'registered', 'macron', 'degree', 'plusminus',
 130  'twosuperior', 'threesuperior', 'acute', 'mu',
 131  'paragraph', 'periodcentered', 'cedilla',
 132  'onesuperior', 'ordmasculine', 'guillemotright',
 133  'onequarter', 'onehalf', 'threequarters',
 134  'questiondown', 'Agrave', 'Aacute', 'Acircumflex',
 135  'Atilde', 'Adieresis', 'Aring', 'AE', 'Ccedilla',
 136  'Egrave', 'Eacute', 'Ecircumflex', 'Edieresis',
 137  'Igrave', 'Iacute', 'Icircumflex', 'Idieresis',
 138  'Eth', 'Ntilde', 'Ograve', 'Oacute', 'Ocircumflex',
 139  'Otilde', 'Odieresis', 'multiply', 'Oslash',
 140  'Ugrave', 'Uacute', 'Ucircumflex', 'Udieresis',
 141  'Yacute', 'Thorn', 'germandbls', 'agrave', 'aacute',
 142  'acircumflex', 'atilde', 'adieresis', 'aring', 'ae',
 143  'ccedilla', 'egrave', 'eacute', 'ecircumflex',
 144  'edieresis', 'igrave', 'iacute', 'icircumflex',
 145  'idieresis', 'eth', 'ntilde', 'ograve', 'oacute',
 146  'ocircumflex', 'otilde', 'odieresis', 'divide',
 147  'oslash', 'ugrave', 'uacute', 'ucircumflex',
 148  'udieresis', 'yacute', 'thorn', 'ydieresis'
 149 );
 150
 151 $emdash    = "\227";
 152 $endash    = "\226";
 153 $bullet    = "\225";
 154 $copyright = "\251";
 155
 156 #
 157 # First, format the stuff coming from the front end into
 158 # a cleaner representation
 159 #
 160 if ( defined($input) ) {
 161     sysopen(PARAS, $input, O_RDONLY) or
 162         die "$0: cannot open $input: $!\n";
 163 } else {
 164     open(PARAS, "<&STDIN") or die "$0: $!\n";
 165 }
 166 while ( defined($line = <PARAS>) ) {
 167     chomp $line;
 168     $data = <PARAS>;
 169     chomp $data;
 170     if ( $line =~ /^meta :(.*)$/ ) {
 171         $metakey = $1;
 172         $metadata{$metakey} = $data;
 173     } elsif ( $line =~ /^indx :(.*)$/ ) {
 174         $ixentry = $1;
 175         push(@ixentries, $ixentry);
 176         $ixterms{$ixentry} = [split(/\037/, $data)];
 177         # Look for commas.  This is easier done on the string
 178         # representation, so do it now.
 179         if ( $data =~ /^(.*)\,\037sp\037/ ) {
 180             $ixprefix = $1;
 181             $ixprefix =~ s/\037n $//; # Discard possible font change at end
 182             $ixhasprefix{$ixentry} = $ixprefix;
 183             if ( !$ixprefixes{$ixprefix} ) {
 184                 $ixcommafirst{$ixentry}++;
 185             }
 186             $ixprefixes{$ixprefix}++;
 187         } else {
 188             # A complete term can also be used as a prefix
 189             $ixprefixes{$data}++;
 190         }
 191     } else {
 192         push(@ptypes, $line);
 193         push(@paras, [split(/\037/, $data)]);
 194     }
 195 }
 196 close(PARAS);
 197
 198 #
 199 # Convert an integer to a chosen base
 200 #
 201 sub int2base($$) {
 202     my($i,$b) = @_;
 203     my($s) = '';
 204     my($n) = '';
 205     my($z) = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
 206     return '0' if ($i == 0);
 207     if ( $i < 0 ) { $n = '-'; $i = -$i; }
 208     while ( $i ) {
 209         $s = substr($z,$i%$b,1) . $s;
 210         $i = int($i/$b);
 211     }
 212     return $n.$s;
 213 }
 214
 215 #
 216 # Convert a string to a rendering array
 217 #
 218 sub string2array($)
 219 {
 220     my($s) = @_;
 221     my(@a) = ();
 222
 223     $s =~ s/ \- / $endash /g;   # Replace " - " with en dash
 224
 225     while ( $s =~ /^(\s+|\S+)(.*)$/ ) {
 226         push(@a, [0,$1]);
 227         $s = $2;
 228     }
 229
 230     return @a;
 231 }
 232
 233 #
 234 # Take a crossreference name and generate the PostScript name for it.
 235 #
 236 # This hack produces a somewhat smaller PDF...
 237 #%ps_xref_list = ();
 238 #$ps_xref_next = 0;
 239 #sub ps_xref($) {
 240 #    my($s) = @_;
 241 #    my $q = $ps_xref_list{$s};
 242 #    return $q if ( defined($ps_xref_list{$s}) );
 243 #    $q = 'X'.int2base($ps_xref_next++, 52);
 244 #    $ps_xref_list{$s} = $q;
 245 #    return $q;
 246 #}
 247
 248 # Somewhat bigger PDF, but one which obeys # URLs
 249 sub ps_xref($) {
 250     return @_[0];
 251 }
 252
 253 #
 254 # Flow lines according to a particular font set and width
 255 #
 256 # A "font set" is represented as an array containing
 257 # arrays of pairs: [<size>, <metricref>]
 258 #
 259 # Each line is represented as:
 260 # [ [type,first|last,aux,fontset,page,ypos,optional col],
 261 #   [rendering array] ]
 262 #
 263 # A space character may be "squeezed" by up to this much
 264 # (as a fraction of the normal width of a space.)
 265 #
 266 $ps_space_squeeze = 0.00;       # Min space width 100%
 267 sub ps_flow_lines($$$@) {
 268     my($wid, $fontset, $type, @data) = @_;
 269     my($fonts) = $$fontset{fonts};
 270     my($e);
 271     my($w)  = 0;                # Width of current line
 272     my($sw) = 0;                # Width of current line due to spaces
 273     my(@l)  = ();               # Current line
 274     my(@ls) = ();               # Accumulated output lines
 275     my(@xd) = ();               # Metadata that goes with subsequent text
 276     my $hasmarker = 0;          # Line has -6 marker
 277     my $pastmarker = 0;         # -6 marker found
 278
 279     # If there is a -6 marker anywhere in the paragraph,
 280     # *each line* output needs to have a -6 marker
 281     foreach $e ( @data ) {
 282         $hasmarker = 1 if ( $$e[0] == -6 );
 283     }
 284
 285     $w = 0;
 286     foreach $e ( @data ) {
 287         if ( $$e[0] < 0 ) {
 288             # Type is metadata.  Zero width.
 289             if ( $$e[0] == -6 ) {
 290                 $pastmarker = 1;
 291             }
 292             if ( $$e[0] == -1 || $$e[0] == -6 ) {
 293                 # -1 (end anchor) or -6 (marker) goes with the preceeding
 294                 # text, otherwise with the subsequent text
 295                 push(@l, $e);
 296             } else {
 297                 push(@xd, $e);
 298             }
 299         } else {
 300             my $ew = ps_width($$e[1], $fontset->{fonts}->[$$e[0]][1],
 301                               \@NASMEncoding) *
 302                 ($fontset->{fonts}->[$$e[0]][0]/1000);
 303             my $sp = $$e[1];
 304             $sp =~ tr/[^ ]//d;  # Delete nonspaces
 305             my $esw = ps_width($sp, $fontset->{fonts}->[$$e[0]][1],
 306                                \@NASMEncoding) *
 307                 ($fontset->{fonts}->[$$e[0]][0]/1000);
 308
 309             if ( ($w+$ew) - $ps_space_squeeze*($sw+$esw) > $wid ) {
 310                 # Begin new line
 311                 # Search backwards for previous space chunk
 312                 my $lx = scalar(@l)-1;
 313                 my @rm = ();
 314                 while ( $lx >= 0 ) {
 315                     while ( $lx >= 0 && $l[$lx]->[0] < 0 ) {
 316                         # Skip metadata
 317                         $pastmarker = 0 if ( $l[$lx]->[0] == -6 );
 318                         $lx--;
 319                     };
 320                     if ( $lx >= 0 ) {
 321                         if ( $l[$lx]->[1] eq ' ' ) {
 322                             splice(@l, $lx, 1);
 323                             @rm = splice(@l, $lx);
 324                             last; # Found place to break
 325                         } else {
 326                             $lx--;
 327                         }
 328                     }
 329                 }
 330
 331                 # Now @l contains the stuff to remain on the old line
 332                 # If we broke the line inside a link, then split the link
 333                 # into two.
 334                 my $lkref = undef;
 335                 foreach my $lc ( @l ) {
 336                     if ( $$lc[0] == -2 || $$lc[0] == -3 || $lc[0] == -7 ) {
 337                         $lkref = $lc;
 338                     } elsif ( $$lc[0] == -1 ) {
 339                         undef $lkref;
 340                     }
 341                 }
 342
 343                 if ( defined($lkref) ) {
 344                     push(@l, [-1,undef]); # Terminate old reference
 345                     unshift(@rm, $lkref); # Duplicate reference on new line
 346                 }
 347
 348                 if ( $hasmarker ) {
 349                     if ( $pastmarker ) {
 350                         unshift(@rm,[-6,undef]); # New line starts with marker
 351                     } else {
 352                         push(@l,[-6,undef]); # Old line ends with marker
 353                     }
 354                 }
 355
 356                 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]);
 357                 @l = @rm;
 358
 359                 $w = $sw = 0;
 360                 # Compute the width of the remainder array
 361                 for my $le ( @l ) {
 362                     if ( $$le[0] >= 0 ) {
 363                         my $xew = ps_width($$le[1],
 364                                            $fontset->{fonts}->[$$le[0]][1],
 365                                            \@NASMEncoding) *
 366                             ($fontset->{fonts}->[$$le[0]][0]/1000);
 367                         my $xsp = $$le[1];
 368                         $xsp =~ tr/[^ ]//d;     # Delete nonspaces
 369                         my $xsw = ps_width($xsp,
 370                                            $fontset->{fonts}->[$$le[0]][1],
 371                                            \@NASMEncoding) *
 372                             ($fontset->{fonts}->[$$le[0]][0]/1000);
 373                         $w += $xew;  $sw += $xsw;
 374                     }
 375                 }
 376             }
 377             push(@l, @xd);      # Accumulated metadata
 378             @xd = ();
 379             if ( $$e[1] ne '' ) {
 380                 push(@l, $e);
 381                 $w += $ew; $sw += $esw;
 382             }
 383         }
 384     }
 385     push(@l,@xd);
 386     if ( scalar(@l) ) {
 387         push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]); # Final line
 388     }
 389
 390     # Mark the first line as first and the last line as last
 391     if ( scalar(@ls) ) {
 392         $ls[0]->[0]->[1] |= 1;     # First in para
 393         $ls[-1]->[0]->[1] |= 2;    # Last in para
 394     }
 395     return @ls;
 396 }
 397
 398 #
 399 # Once we have broken things into lines, having multiple chunks
 400 # with the same font index is no longer meaningful.  Merge
 401 # adjacent chunks to keep down the size of the whole file.
 402 #
 403 sub ps_merge_chunks(@) {
 404     my(@ci) = @_;
 405     my($c, $lc);
 406     my(@co, $eco);
 407
 408     undef $lc;
 409     @co = ();
 410     $eco = -1;                  # Index of the last entry in @co
 411     foreach $c ( @ci ) {
 412         if ( defined($lc) && $$c[0] == $lc && $$c[0] >= 0 ) {
 413             $co[$eco]->[1] .= $$c[1];
 414         } else {
 415             push(@co, $c);  $eco++;
 416             $lc = $$c[0];
 417         }
 418     }
 419     return @co;
 420 }
 421
 422 #
 423 # Convert paragraphs to rendering arrays.  Each
 424 # element in the array contains (font, string),
 425 # where font can be one of:
 426 # -1 end link
 427 # -2 begin crossref
 428 # -3 begin weblink
 429 # -4 index item anchor
 430 # -5 crossref anchor
 431 # -6 left/right marker (used in the index)
 432 # -7 page link (used in the index)
 433 #  0 normal
 434 #  1 empatic (italic)
 435 #  2 code (fixed spacing)
 436 #
 437
 438 sub mkparaarray($@) {
 439     my($ptype, @chunks) = @_;
 440
 441     my @para = ();
 442     my $in_e = 0;
 443     my $chunk;
 444
 445     if ( $ptype =~ /^code/ ) {
 446         foreach $chunk ( @chunks ) {
 447             push(@para, [2, $chunk]);
 448         }
 449     } else {
 450         foreach $chunk ( @chunks ) {
 451             my $type = substr($chunk,0,2);
 452             my $text = substr($chunk,2);
 453
 454             if ( $type eq 'sp' ) {
 455                 push(@para, [$in_e?1:0, ' ']);
 456             } elsif ( $type eq 'da' ) {
 457                 push(@para, [$in_e?1:0, $endash]);
 458             } elsif ( $type eq 'n ' ) {
 459                 push(@para, [0, $text]);
 460                 $in_e = 0;
 461             } elsif ( $type =~ '^e' ) {
 462                 push(@para, [1, $text]);
 463                 $in_e = ($type eq 'es' || $type eq 'e ');
 464             } elsif ( $type eq 'c ' ) {
 465                 push(@para, [2, $text]);
 466                 $in_e = 0;
 467             } elsif ( $type eq 'x ' ) {
 468                 push(@para, [-2, ps_xref($text)]);
 469             } elsif ( $type eq 'xe' ) {
 470                 push(@para, [-1, undef]);
 471             } elsif ( $type eq 'wc' || $type eq 'w ' ) {
 472                 $text =~ /\<(.*)\>(.*)$/;
 473                 my $link = $1; $text = $2;
 474                 push(@para, [-3, $link]);
 475                 push(@para, [($type eq 'wc') ? 2:0, $text]);
 476                 push(@para, [-1, undef]);
 477                 $in_e = 0;
 478             } elsif ( $type eq 'i ' ) {
 479                 push(@para, [-4, $text]);
 480             } else {
 481                 die "Unexpected paragraph chunk: $chunk";
 482             }
 483         }
 484     }
 485     return @para;
 486 }
 487
 488 $npara = scalar(@paras);
 489 for ( $i = 0 ; $i < $npara ; $i++ ) {
 490     $paras[$i] = [mkparaarray($ptypes[$i], @{$paras[$i]})];
 491 }
 492
 493 #
 494 # This converts a rendering array to a simple string
 495 #
 496 sub ps_arraytostr(@) {
 497     my $s = '';
 498     my $c;
 499     foreach $c ( @_ ) {
 500         $s .= $$c[1] if ( $$c[0] >= 0 );
 501     }
 502     return $s;
 503 }
 504
 505 #
 506 # This generates a duplicate of a paragraph
 507 #
 508 sub ps_dup_para(@) {
 509     my(@i) = @_;
 510     my(@o) = ();
 511     my($c);
 512
 513     foreach $c ( @i ) {
 514         my @cc = @{$c};
 515         push(@o, [@cc]);
 516     }
 517     return @o;
 518 }
 519
 520 #
 521 # This generates a duplicate of a paragraph, stripping anchor
 522 # tags (-4 and -5)
 523 #
 524 sub ps_dup_para_noanchor(@) {
 525     my(@i) = @_;
 526     my(@o) = ();
 527     my($c);
 528
 529     foreach $c ( @i ) {
 530         my @cc = @{$c};
 531         push(@o, [@cc]) unless ( $cc[0] == -4 || $cc[0] == -5 );
 532     }
 533     return @o;
 534 }
 535
 536 #
 537 # Scan for header paragraphs and fix up their contents;
 538 # also generate table of contents and PDF bookmarks.
 539 #
 540 @tocparas = ([[-5, 'contents'], [0,'Contents']]);
 541 @tocptypes = ('chap');
 542 @bookmarks = (['title', 0, 'Title'], ['contents', 0, 'Contents']);
 543 %bookref = ();
 544 for ( $i = 0 ; $i < $npara ; $i++ ) {
 545     my $xtype = $ptypes[$i];
 546     my $ptype = substr($xtype,0,4);
 547     my $str;
 548     my $book;
 549
 550     if ( $ptype eq 'chap' || $ptype eq 'appn' ) {
 551         unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
 552             die "Bad para";
 553         }
 554         my $secn = $1;
 555         my $sech = $2;
 556         my $xref = ps_xref($sech);
 557         my $chap = ($ptype eq 'chap')?'Chapter':'Appendix';
 558
 559         $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
 560         push(@bookmarks, $book);
 561         $bookref{$secn} = $book;
 562
 563         push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
 564         push(@tocptypes, 'toc0'.' :'.$sech.':'.$chap.' '.$secn.':');
 565
 566         unshift(@{$paras[$i]},
 567                 [-5, $xref], [0,$chap.' '.$secn.':'], [0, ' ']);
 568     } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
 569         unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
 570             die "Bad para";
 571         }
 572         my $secn = $1;
 573         my $sech = $2;
 574         my $xref = ps_xref($sech);
 575         my $pref;
 576         $pref = $secn; $pref =~ s/\.[^\.]+$//; # Find parent node
 577
 578         $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
 579         push(@bookmarks, $book);
 580         $bookref{$secn} = $book;
 581         $bookref{$pref}->[1]--; # Adjust count for parent node
 582
 583         push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
 584         push(@tocptypes,
 585              (($ptype eq 'subh') ? 'toc2':'toc1').' :'.$sech.':'.$secn);
 586
 587         unshift(@{$paras[$i]}, [-5, $xref]);
 588     }
 589 }
 590
 591 #
 592 # Add TOC to beginning of paragraph list
 593 #
 594 unshift(@paras,  @tocparas);  undef @tocparas;
 595 unshift(@ptypes, @tocptypes); undef @tocptypes;
 596
 597 #
 598 # Add copyright notice to the beginning
 599 #
 600 unshift(@paras,
 601         [[0, $copyright], [0, ' '], [0,$metadata{'year'}],
 602          [0, ' '], string2array($metadata{'author'})],
 603         [string2array($metadata{'license'})]);
 604 unshift(@ptypes, 'norm', 'norm');
 605
 606 $npara = scalar(@paras);
 607
 608 #
 609 # No lines generated, yet.
 610 #
 611 @pslines    = ();
 612
 613 #
 614 # Line Auxilliary Information Types
 615 #
 616 $AuxStr     = 1;                # String
 617 $AuxPage    = 2;                # Page number (from xref)
 618 $AuxPageStr = 3;                # Page number as a PostScript string
 619 $AuxXRef    = 4;                # Cross reference as a name
 620 $AuxNum     = 5;                # Number
 621
 622 #
 623 # Break or convert paragraphs into lines, and push them
 624 # onto the @pslines array.
 625 #
 626 sub ps_break_lines($$) {
 627     my ($paras,$ptypes) = @_;
 628
 629     my $linewidth  = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
 630     my $bullwidth  = $linewidth-$psconf{bulladj};
 631     my $indxwidth  = ($linewidth-$psconf{idxgutter})/$psconf{idxcolumns}
 632                      -$psconf{idxspace};
 633
 634     my $npara = scalar(@{$paras});
 635     my $i;
 636
 637     for ( $i = 0 ; $i < $npara ; $i++ ) {
 638         my $xtype = $ptypes->[$i];
 639         my $ptype = substr($xtype,0,4);
 640         my @data = @{$paras->[$i]};
 641         my @ls = ();
 642         if ( $ptype eq 'code' ) {
 643             my $p;
 644             # Code paragraph; each chunk is a line
 645             foreach $p ( @data ) {
 646                 push(@ls, [[$ptype,0,undef,\%BodyFont,0,0],[$p]]);
 647             }
 648             $ls[0]->[0]->[1] |= 1;           # First in para
 649             $ls[-1]->[0]->[1] |= 2;      # Last in para
 650         } elsif ( $ptype eq 'chap' || $ptype eq 'appn' ) {
 651             # Chapters are flowed normally, but in an unusual font
 652             @ls = ps_flow_lines($linewidth, \%ChapFont, $ptype, @data);
 653         } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
 654             unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
 655                 die "Bad para";
 656             }
 657             my $secn = $1;
 658             my $sech = $2;
 659             my $font = ($ptype eq 'head') ? \%HeadFont : \%SubhFont;
 660             @ls = ps_flow_lines($linewidth, $font, $ptype, @data);
 661             # We need the heading number as auxillary data
 662             $ls[0]->[0]->[2] = [[$AuxStr,$secn]];
 663         } elsif ( $ptype eq 'norm' ) {
 664             @ls = ps_flow_lines($linewidth, \%BodyFont, $ptype, @data);
 665         } elsif ( $ptype eq 'bull' ) {
 666             @ls = ps_flow_lines($bullwidth, \%BodyFont, $ptype, @data);
 667         } elsif ( $ptype =~ /^toc/ ) {
 668             unless ( $xtype =~/^\S+ :([^:]*):(.*)$/ ) {
 669                 die "Bad para";
 670             }
 671             my $xref = $1;
 672             my $refname = $2.' ';
 673             my $ntoc = substr($ptype,3,1)+0;
 674             my $refwidth = ps_width($refname, $BodyFont{fonts}->[0][1],
 675                                     \@NASMEncoding) *
 676                 ($BodyFont{fonts}->[0][0]/1000);
 677
 678             @ls = ps_flow_lines($linewidth-$ntoc*$psconf{tocind}-
 679                                 $psconf{tocpnz}-$refwidth,
 680                                 \%BodyFont, $ptype, @data);
 681
 682             # Auxilliary data: for the first line, the cross reference symbol
 683             # and the reference name; for all lines but the first, the
 684             # reference width; and for the last line, the page number
 685             # as a string.
 686             my $nl = scalar(@ls);
 687             $ls[0]->[0]->[2] = [[$AuxStr,$refname], [$AuxXRef,$xref]];
 688             for ( $j = 1 ; $j < $nl ; $j++ ) {
 689                 $ls[$j]->[0]->[2] = [[$AuxNum,$refwidth]];
 690             }
 691             push(@{$ls[$nl-1]->[0]->[2]}, [$AuxPageStr,$xref]);
 692         } elsif ( $ptype =~ /^idx/ ) {
 693             my $lvl = substr($ptype,3,1)+0;
 694
 695             @ls = ps_flow_lines($indxwidth-$lvl*$psconf{idxindent},
 696                                 \%BodyFont, $ptype, @data);
 697         } else {
 698             die "Unknown para type: $ptype";
 699         }
 700         # Merge adjacent identical chunks
 701         foreach $l ( @ls ) {
 702             @{$$l[1]} = ps_merge_chunks(@{$$l[1]});
 703         }
 704         push(@pslines,@ls);
 705     }
 706 }
 707
 708 # Break the main body text into lines.
 709 ps_break_lines(\@paras, \@ptypes);
 710
 711 #
 712 # Break lines in to pages
 713 #
 714
 715 # Where to start on page 2, the copyright page
 716 $curpage = 2;                   # Start on page 2
 717 $curypos = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg}-
 718     $psconf{startcopyright};
 719 undef $columnstart;             # Not outputting columnar text
 720 undef $curcolumn;               # Current column
 721 $nlines = scalar(@pslines);
 722
 723 #
 724 # This formats lines inside the global @pslines array into pages,
 725 # updating the page and y-coordinate entries.  Start at the
 726 # $startline position in @pslines and go to but not including
 727 # $endline.  The global variables $curpage, $curypos, $columnstart
 728 # and $curcolumn are updated appropriately.
 729 #
 730 sub ps_break_pages($$) {
 731     my($startline, $endline) = @_;
 732
 733     # Paragraph types which should never be broken
 734     my $nobreakregexp = "^(chap|appn|head|subh|toc.|idx.)\$";
 735     # Paragraph types which are heading (meaning they should not be broken
 736     # immediately after)
 737     my $nobreakafter = "^(chap|appn|head|subh)\$";
 738     # Paragraph types which should never be broken *before*
 739     my $nobreakbefore = "^idx[1-9]\$";
 740     # Paragraph types which are set in columnar format
 741     my $columnregexp = "^idx.\$";
 742
 743     my $upageheight = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg};
 744
 745     my $i;
 746
 747     for ( $i = $startline ; $i < $endline ; $i++ ) {
 748         my $linfo = $pslines[$i]->[0];
 749         if ( ($$linfo[0] eq 'chap' || $$linfo[0] eq 'appn' )
 750              && ($$linfo[1] & 1) ) {
 751             # First line of a new chapter heading.  Start a new page.
 752             undef $columnstart;
 753             $curpage++ if ( $curypos > 0 || defined($columnstart) );
 754             $curypos = $chapstart;
 755         } elsif ( defined($columnstart) && $$linfo[0] !~ /$columnregexp/o ) {
 756             undef $columnstart;
 757             $curpage++;
 758             $curypos = 0;
 759         }
 760
 761         if ( $$linfo[0] =~ /$columnregexp/o && !defined($columnstart) ) {
 762             $columnstart = $curypos;
 763             $curcolumn = 0;
 764         }
 765
 766         # Adjust position by the appropriate leading
 767         $curypos += $$linfo[3]->{leading};
 768
 769         # Record the page and y-position
 770         $$linfo[4] = $curpage;
 771         $$linfo[5] = $curypos;
 772         $$linfo[6] = $curcolumn if ( defined($columnstart) );
 773
 774         if ( $curypos > $upageheight ) {
 775             # We need to break the page before this line.
 776             my $broken = 0;             # No place found yet
 777             while ( !$broken && $pslines[$i]->[0]->[4] == $curpage ) {
 778                 my $linfo = $pslines[$i]->[0];
 779                 my $pinfo = $pslines[$i-1]->[0];
 780
 781                 if ( $$linfo[1] == 2 ) {
 782                     # This would be an orphan, don't break.
 783                 } elsif ( $$linfo[1] & 1 ) {
 784                     # Sole line or start of paragraph.  Break unless
 785                     # the previous line was part of a heading.
 786                     $broken = 1 if ( $$pinfo[0] !~ /$nobreakafter/o &&
 787                                      $$linfo[0] !~ /$nobreakbefore/o );
 788                 } else {
 789                     # Middle of paragraph.  Break unless we're in a
 790                     # no-break paragraph, or the previous line would
 791                     # end up being a widow.
 792                     $broken = 1 if ( $$linfo[0] !~ /$nobreakregexp/o &&
 793                                      $$pinfo[1] != 1 );
 794                 }
 795                 $i--;
 796             }
 797             die "Nowhere to break page $curpage\n" if ( !$broken );
 798             # Now $i should point to line immediately before the break, i.e.
 799             # the next paragraph should be the first on the new page
 800             if ( defined($columnstart) &&
 801                  ++$curcolumn < $psconf{idxcolumns} ) {
 802                 # We're actually breaking text into columns, not pages
 803                 $curypos = $columnstart;
 804             } else {
 805                 undef $columnstart;
 806                 $curpage++;
 807                 $curypos = 0;
 808             }
 809             next;
 810         }
 811
 812         # Add end of paragraph skip
 813         if ( $$linfo[1] & 2 ) {
 814             $curypos += $skiparray{$$linfo[0]};
 815         }
 816     }
 817 }
 818
 819 ps_break_pages(0,$nlines);      # Break the main text body into pages
 820
 821 #
 822 # Find the page number of all the indices
 823 #
 824 %ps_xref_page   = ();           # Crossref anchor pages
 825 %ps_index_pages = ();           # Index item pages
 826 $nlines = scalar(@pslines);
 827 for ( $i = 0 ; $i < $nlines ; $i++ ) {
 828     my $linfo = $pslines[$i]->[0];
 829     foreach my $c ( @{$pslines[$i]->[1]} ) {
 830         if ( $$c[0] == -4 ) {
 831             if ( !defined($ps_index_pages{$$c[1]}) ) {
 832                 $ps_index_pages{$$c[1]} = [];
 833             } elsif ( $ps_index_pages{$$c[1]}->[-1] eq $$linfo[4] ) {
 834                 # Pages are emitted in order; if this is a duplicated
 835                 # entry it will be the last one
 836                 next;           # Duplicate
 837             }
 838             push(@{$ps_index_pages{$$c[1]}}, $$linfo[4]);
 839         } elsif ( $$c[0] == -5 ) {
 840             $ps_xref_page{$$c[1]} = $$linfo[4];
 841         }
 842     }
 843 }
 844
 845 #
 846 # Emit index paragraphs
 847 #
 848 $startofindex = scalar(@pslines);
 849 @ixparas = ([[-5,'index'],[0,'Index']]);
 850 @ixptypes = ('chap');
 851
 852 foreach $k ( @ixentries ) {
 853     my $n,$i;
 854     my $ixptype = 'idx0';
 855     my $prefix = $ixhasprefix{$k};
 856     my @ixpara = mkparaarray($ixptype,@{$ixterms{$k}});
 857     my $commapos = undef;
 858
 859     if ( defined($prefix) && $ixprefixes{$prefix} > 1 ) {
 860         # This entry has a "hanging comma"
 861         for ( $i = 0 ; $i < scalar(@ixpara)-1 ; $i++ ) {
 862             if ( substr($ixpara[$i]->[1],-1,1) eq ',' &&
 863                  $ixpara[$i+1]->[1] eq ' ' ) {
 864                 $commapos = $i;
 865                 last;
 866             }
 867         }
 868     }
 869     if ( defined($commapos) ) {
 870         if ( $ixcommafirst{$k} ) {
 871             # This is the first entry; generate the
 872             # "hanging comma" entry
 873             my @precomma = splice(@ixpara,0,$commapos);
 874             if ( $ixpara[0]->[1] eq ',' ) {
 875                 shift(@ixpara); # Discard lone comma
 876             } else {
 877                 # Discard attached comma
 878                 $ixpara[0]->[1] =~ s/\,$//;
 879                 push(@precomma,shift(@ixpara));
 880             }
 881             push(@precomma, [-6,undef]);
 882             push(@ixparas, [@precomma]);
 883             push(@ixptypes, $ixptype);
 884             shift(@ixpara);     # Remove space
 885         } else {
 886             splice(@ixpara,0,$commapos+2);
 887         }
 888         $ixptype = 'idx1';
 889     }
 890
 891     push(@ixpara, [-6,undef]);  # Left/right marker
 892     $i = 1;  $n = scalar(@{$ps_index_pages{$k}});
 893     foreach $p ( @{$ps_index_pages{$k}} ) {
 894         if ( $i++ == $n ) {
 895             push(@ixpara,[-7,$p],[0,"$p"],[-1,undef]);
 896         } else {
 897             push(@ixpara,[-7,$p],[0,"$p,"],[-1,undef],[0,' ']);
 898         }
 899     }
 900
 901     push(@ixparas, [@ixpara]);
 902     push(@ixptypes, $ixptype);
 903 }
 904
 905 #
 906 # Flow index paragraphs into lines
 907 #
 908 ps_break_lines(\@ixparas, \@ixptypes);
 909
 910 #
 911 # Format index into pages
 912 #
 913 $nlines = scalar(@pslines);
 914 ps_break_pages($startofindex, $nlines);
 915
 916 #
 917 # Push index onto bookmark list
 918 #
 919 push(@bookmarks, ['index', 0, 'Index']);
 920
 921 # Get the list of fonts used
 922 %ps_all_fonts = ();
 923 foreach $fset ( @AllFonts ) {
 924     foreach $font ( @{$fset->{fonts}} ) {
 925         $ps_all_fonts{$font->[1]->{name}}++;
 926     }
 927 }
 928
 929 # Emit the PostScript DSC header
 930 print "%!PS-Adobe-3.0\n";
 931 print "%%Pages: $curpage\n";
 932 print "%%BoundingBox: 0 0 ", $psconf{pagewidth}, ' ', $psconf{pageheight}, "\n";
 933 print "%%Creator: (NASM psflow.pl)\n";
 934 print "%%DocumentData: Clean7Bit\n";
 935 print "%%DocumentFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
 936 print "%%DocumentNeededFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
 937 print "%%Orientation: Portrait\n";
 938 print "%%PageOrder: Ascend\n";
 939 print "%%EndComments\n";
 940 print "%%BeginProlog\n";
 941
 942 # Emit the configurables as PostScript tokens
 943 foreach $c ( keys(%psconf) ) {
 944     print "/$c ", $psconf{$c}, " def\n";
 945 }
 946 foreach $c ( keys(%psbool) ) {
 947     print "/$c ", ($psbool{$c}?'true':'false'), " def\n";
 948 }
 949
 950 # Emit custom encoding vector
 951 $zstr = '/NASMEncoding [ ';
 952 foreach $c ( @NASMEncoding ) {
 953     my $z = '/'.(defined($c)?$c:'.notdef ').' ';
 954     if ( length($zstr)+length($z) > 72 ) {
 955         print $zstr,"\n";
 956         $zstr = ' ';
 957     }
 958     $zstr .= $z;
 959 }
 960 print $zstr, "] def\n";
 961
 962 # Font recoding routine
 963 # newname fontname --
 964 print "/nasmenc {\n";
 965 print "  findfont dup length dict begin\n";
 966 print "    { 1 index /FID ne {def}{pop pop} ifelse } forall\n";
 967 print "    /Encoding NASMEncoding def\n";
 968 print "    currentdict\n";
 969 print "  end\n";
 970 print "  definefont pop\n";
 971 print "} def\n";
 972
 973 # Emit fontset definitions
 974 foreach $fset ( @AllFonts ) {
 975     my $i = 0;
 976     my @zfonts = ();
 977     my %allfonts = ();
 978     foreach $font ( @{$fset->{fonts}} ) {
 979         $allfonts{$font->[1]->{name}}++;
 980     }
 981     foreach $font ( keys(%allfonts) ) {
 982         print '/',$font,'-NASM /',$font," nasmenc\n";
 983     }
 984     foreach $font ( @{$fset->{fonts}} ) {
 985         print '/', $fset->{name}, $i, ' ',
 986         '/', $font->[1]->{name}, '-NASM findfont ',
 987         $font->[0], " scalefont def\n";
 988         push(@zfonts, $fset->{name}.$i);
 989         $i++;
 990     }
 991     print '/', $fset->{name}, ' [', join(' ',@zfonts), "] def\n";
 992 }
 993
 994 # Emit the canned PostScript prologue
 995 open(PSHEAD, "< head.ps");
 996 while ( defined($line = <PSHEAD>) ) {
 997     print $line;
 998 }
 999 close(PSHEAD);
1000 print "%%EndProlog\n";
1001
1002 # Generate a PostScript string
1003 sub ps_string($) {
1004     my ($s) = @_;
1005     my ($i,$c);
1006     my ($o) = '(';
1007     my ($l) = length($s);
1008     for ( $i = 0 ; $i < $l ; $i++ ) {
1009         $c = substr($s,$i,1);
1010         if ( ord($c) < 32 || ord($c) > 126 ) {
1011             $o .= sprintf("\\%03o", ord($c));
1012         } elsif ( $c eq '(' || $c eq ')' || $c eq "\\" ) {
1013             $o .= "\\".$c;
1014         } else {
1015             $o .= $c;
1016         }
1017     }
1018     return $o.')';
1019 }
1020
1021 # Generate PDF bookmarks
1022 print "%%BeginSetup\n";
1023 foreach $b ( @bookmarks ) {
1024     print '[/Title ', ps_string($b->[2]), "\n";
1025     print '/Count ', $b->[1], ' ' if ( $b->[1] );
1026     print '/Dest /',$b->[0]," /OUT pdfmark\n";
1027 }
1028
1029 # Ask the PostScript interpreter for the proper size media
1030 print "setpagesize\n";
1031 print "%%EndSetup\n";
1032
1033 # Start a PostScript page
1034 sub ps_start_page() {
1035     $ps_page++;
1036     print "%%Page: $ps_page $ps_page\n";
1037     print "%%BeginPageSetup\n";
1038     print "save\n";
1039     print "%%EndPageSetup\n";
1040     print '/', $ps_page, " pa\n";
1041 }
1042
1043 # End a PostScript page
1044 sub ps_end_page($) {
1045     my($pn) = @_;
1046     if ( $pn ) {
1047         print "($ps_page)", (($ps_page & 1) ? 'pageodd' : 'pageeven'), "\n";
1048     }
1049     print "restore showpage\n";
1050 }
1051
1052 $ps_page = 0;
1053
1054 # Title page
1055 ps_start_page();
1056 $title = $metadata{'title'};
1057 $title =~ s/ \- / $emdash /;
1058 $pstitle = ps_string($title);
1059
1060 # Print title
1061 print "lmarg pageheight 2 mul 3 div moveto\n";
1062 print "tfont0 setfont\n";
1063 print "/title linkdest ${pstitle} show\n";
1064 print "lmarg pageheight 2 mul 3 div 10 sub moveto\n";
1065 print "0 setlinecap 3 setlinewidth\n";
1066 print "pagewidth lmarg sub rmarg sub 0 rlineto stroke\n";
1067
1068 # Print logo, if there is one
1069 # FIX: To be 100% correct, this should look for DocumentNeeded*
1070 # and DocumentFonts in the header of the EPSF and add those to the
1071 # global header.
1072 if ( defined($metadata{epslogo}) &&
1073      sysopen(EPS, $metadata{epslogo}, O_RDONLY) ) {
1074     my @eps = ();
1075     my ($bbllx,$bblly,$bburx,$bbury) = (undef,undef,undef,undef);
1076     my $line;
1077     my $scale = 1;
1078     my $maxwidth = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
1079     my $maxheight = $psconf{pageheight}/3-40;
1080     my $width, $height;
1081     my $x, $y;
1082
1083     while ( defined($line = <EPS>) ) {
1084         last if ( $line =~ /^%%EOF/ );
1085         if ( !defined($bbllx) &&
1086              $line =~ /^\%\%BoundingBox\:\s*([0-9\.]+)\s+([0-9\.]+)\s+([0-9\.]+)\s+([0-9\.]+)/i ) {
1087             $bbllx = $1+0; $bblly = $2+0;
1088             $bburx = $3+0; $bbury = $4+0;
1089         }
1090         push(@eps,$line);
1091     }
1092     close(EPS);
1093
1094     if ( defined($bbllx) ) {
1095         $width = $bburx-$bbllx;
1096         $height = $bbury-$bblly;
1097
1098         if ( $width > $maxwidth ) {
1099             $scale = $maxwidth/$width;
1100         }
1101         if ( $height*$scale > $maxheight ) {
1102             $scale = $maxheight/$height;
1103         }
1104
1105         $x = ($psconf{pagewidth}-$width*$scale)/2;
1106         $y = ($psconf{pageheight}-$height*$scale)/2;
1107
1108         print "BeginEPSF\n";
1109         print $x, ' ', $y, " translate\n";
1110         print $scale, " dup scale\n" unless ( $scale == 1 );
1111         print -$bbllx, ' ', -$bblly, " translate\n";
1112         print "$bbllx $bblly moveto\n";
1113         print "$bburx $bblly lineto\n";
1114         print "$bburx $bbury lineto\n";
1115         print "$bbllx $bbury lineto\n";
1116         print "$bbllx $bblly lineto clip newpath\n";
1117         print "%%BeginDocument: ",ps_string($metadata{epslogo}),"\n";
1118         print @eps;
1119         print "%%EndDocument\n";
1120         print "EndEPSF\n";
1121     }
1122 }
1123 ps_end_page(0);
1124
1125 # Emit the rest of the document (page 2 and on)
1126 $curpage = 2;
1127 ps_start_page();
1128 foreach $line ( @pslines ) {
1129     my $linfo = $line->[0];
1130
1131     if ( $$linfo[4] != $curpage ) {
1132         ps_end_page($curpage > 2);
1133         ps_start_page();
1134         $curpage = $$linfo[4];
1135     }
1136
1137     print '[';
1138     my $curfont = 0;
1139     foreach my $c ( @{$line->[1]} ) {
1140         if ( $$c[0] >= 0 ) {
1141             if ( $curfont != $$c[0] ) {
1142                 print ($curfont = $$c[0]);
1143             }
1144             print ps_string($$c[1]);
1145         } elsif ( $$c[0] == -1 ) {
1146             print '{el}';       # End link
1147         } elsif ( $$c[0] == -2 ) {
1148             print '{/',$$c[1],' xl}'; # xref link
1149         } elsif ( $$c[0] == -3 ) {
1150             print '{',ps_string($$c[1]),'wl}'; # web link
1151         } elsif ( $$c[0] == -4 ) {
1152             # Index anchor -- ignore
1153         } elsif ( $$c[0] == -5 ) {
1154             print '{/',$$c[1],' xa}'; #xref anchor
1155         } elsif ( $$c[0] == -6 ) {
1156             print '][';         # Start a new array
1157             $curfont = 0;
1158         } elsif ( $$c[0] == -7 ) {
1159             print '{/',$$c[1],' pl}'; # page link
1160         } else {
1161             die "Unknown annotation";
1162         }
1163     }
1164     print ']';
1165     if ( defined($$linfo[2]) ) {
1166         foreach my $x ( @{$$linfo[2]} ) {
1167             if ( $$x[0] == $AuxStr ) {
1168                 print ps_string($$x[1]);
1169             } elsif ( $$x[0] == $AuxPage ) {
1170                 print $ps_xref_page{$$x[1]},' ';
1171             } elsif ( $$x[0] == $AuxPageStr ) {
1172                 print ps_string($ps_xref_page{$$x[1]});
1173             } elsif ( $$x[0] == $AuxXRef ) {
1174                 print '/',ps_xref($$x[1]),' ';
1175             } elsif ( $$x[0] == $AuxNum ) {
1176                 print $$x[1],' ';
1177             } else {
1178                 die "Unknown auxilliary data type";
1179             }
1180         }
1181     }
1182     print ($psconf{pageheight}-$psconf{topmarg}-$$linfo[5]);
1183     print ' ', $$linfo[6] if ( defined($$linfo[6]) );
1184     print ' ', $$linfo[0].$$linfo[1], "\n";
1185 }
1186
1187 ps_end_page(1);
1188 print "%%EOF\n";