doc/genps.pl

   1 #!/usr/bin/perl
   2 #
   3 # Format the documentation as PostScript
   4 #
   5
   6 require 'psfonts.ph';           # The fonts we want to use
   7 require 'pswidth.ph';           # PostScript string width
   8
   9 use Fcntl;
  10
  11 #
  12 # PostScript configurables; these values are also available to the
  13 # PostScript code itself
  14 #
  15 %psconf = (
  16            pagewidth => 595,    # Page width in PostScript points
  17            pageheight => 792,   # Page height in PostScript points
  18            lmarg => 100,        # Left margin in PostScript points
  19            rmarg => 50,         # Right margin in PostScript points
  20            topmarg => 100,      # Top margin in PostScript points
  21            botmarg => 100,      # Bottom margin in PostScript points
  22            plmarg => 50,        # Page number position relative to left margin
  23            prmarg => 0,         # Page number position relative to right margin
  24            pymarg => 50,        # Page number position relative to bot margin
  25            bulladj => 12,       # How much to indent a bullet paragraph
  26            tocind => 12,        # TOC indentation per level
  27            tocpnz => 24,        # Width of TOC page number only zone
  28            tocdots => 8,        # Spacing between TOC dots
  29            idxspace => 24,      # Minimum space between index title and pg#
  30            idxindent => 32,     # How much to indent a subindex entry
  31            idxgutter => 24,     # Space between index columns
  32            idxcolumns => 2,     # Number of index columns
  33            );
  34
  35 %psbool = (
  36            colorlinks => 0,     # Set links in blue rather than black
  37            );
  38
  39 # Known paper sizes
  40 %papersizes = (
  41                'a4'     => [595, 842], # ISO standard paper size
  42                'letter' => [612, 792], # US common paper size
  43                'pa4'    => [595, 792], # Compromise ("portable a4")
  44                'b4'     => [709,1002], # ISO intermediate paper size
  45                'legal'  => [612,1008], # US intermediate paper size
  46                'a3'     => [842,1190], # ISO double paper size
  47                '11x17'  => [792,1224], # US double paper size
  48                );
  49
  50 #
  51 # Parse the command line
  52 #
  53 undef $input;
  54 while ( $arg = shift(@ARGV) ) {
  55     if ( $arg =~ /^\-(|no\-)/ ) {
  56         $parm = $';
  57         $true = ($1 eq '') ? 1 : 0;
  58         if ( $true && defined($papersizes{$parm}) ) {
  59             $psconf{pagewidth}  = $papersizes{$parm}->[0];
  60             $psconf{pageheight} = $papersizes{$parm}->[1];
  61         } elsif ( defined($psbool{$parm}) ) {
  62             $psbool{$parm} = $true;
  63         } elsif ( $true && defined($psconf{$parm}) ) {
  64             $psconf{$parm} = shift(@ARGV);
  65         } else {
  66             die "$0: Unknown option: $arg\n";
  67         }
  68     } else {
  69         $input = $arg;
  70     }
  71 }
  72
  73 #
  74 # Document formatting parameters
  75 #
  76 $paraskip = 6;                  # Space between paragraphs
  77 $chapstart = 30;                # Space before a chapter heading
  78 $chapskip = 24;                 # Space after a chapter heading
  79 $tocskip = 6;                   # Space between TOC entries
  80
  81 # Configure post-paragraph skips for each kind of paragraph
  82 %skiparray = ('chap' => $chapskip, 'appn' => $chapstart,
  83               'head' => $paraskip, 'subh' => $paraskip,
  84               'norm' => $paraskip, 'bull' => $paraskip,
  85               'code' => $paraskip, 'toc0' => $tocskip,
  86               'toc1' => $tocskip,  'toc2' => $tocskip);
  87
  88 #
  89 # First, format the stuff coming from the front end into
  90 # a cleaner representation
  91 #
  92 if ( defined($input) ) {
  93     sysopen(PARAS, $input, O_RDONLY) or
  94         die "$0: cannot open $input: $!\n";
  95 } else {
  96     open(PARAS, "<&STDIN") or die "$0: $!\n";
  97 }
  98 while ( defined($line = <PARAS>) ) {
  99     chomp $line;
 100     $data = <PARAS>;
 101     chomp $data;
 102     if ( $line =~ /^meta :/ ) {
 103         $metakey = $';
 104         $metadata{$metakey} = $data;
 105     } elsif ( $line =~ /^indx :/ ) {
 106         $ixentry = $';
 107         push(@ixentries, $ixentry);
 108         $ixterms{$ixentry} = [split(/\037/, $data)];
 109         # Look for commas.  This is easier done on the string
 110         # representation, so do it now.
 111         if ( $line =~ /^(.*\,)\037sp\037/ ) {
 112             $ixprefix = $1;
 113             $ixhasprefix{$ixentry} = $ixprefix;
 114             if ( !$ixprefixes{$ixprefix} ) {
 115                 $ixcommafirst{$ixentry}++;
 116             }
 117             $ixprefixes{$ixprefix}++;
 118         }
 119     } else {
 120         push(@ptypes, $line);
 121         push(@paras, [split(/\037/, $data)]);
 122     }
 123 }
 124 close(PARAS);
 125
 126 #
 127 # Convert an integer to a chosen base
 128 #
 129 sub int2base($$) {
 130     my($i,$b) = @_;
 131     my($s) = '';
 132     my($n) = '';
 133     my($z) = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
 134     return '0' if ($i == 0);
 135     if ( $i < 0 ) { $n = '-'; $i = -$i; }
 136     while ( $i ) {
 137         $s = substr($z,$i%$b,1) . $s;
 138         $i = int($i/$b);
 139     }
 140     return $n.$s;
 141 }
 142
 143 #
 144 # Take a crossreference name and generate the PostScript name for it.
 145 #
 146 # This hack produces a somewhat smaller PDF...
 147 #%ps_xref_list = ();
 148 #$ps_xref_next = 0;
 149 #sub ps_xref($) {
 150 #    my($s) = @_;
 151 #    my $q = $ps_xref_list{$s};
 152 #    return $q if ( defined($ps_xref_list{$s}) );
 153 #    $q = 'X'.int2base($ps_xref_next++, 52);
 154 #    $ps_xref_list{$s} = $q;
 155 #    return $q;
 156 #}
 157
 158 # Somewhat bigger PDF, but one which obeys # URLs
 159 sub ps_xref($) {
 160     return @_[0];
 161 }
 162
 163 #
 164 # Flow lines according to a particular font set and width
 165 #
 166 # A "font set" is represented as an array containing
 167 # arrays of pairs: [<size>, <metricref>]
 168 #
 169 # Each line is represented as:
 170 # [ [type,first|last,aux,fontset,page,ypos,optional col],
 171 #   [rendering array] ]
 172 #
 173 # A space character may be "squeezed" by up to this much
 174 # (as a fraction of the normal width of a space.)
 175 #
 176 $ps_space_squeeze = 0.00;       # Min space width 100%
 177 sub ps_flow_lines($$$@) {
 178     my($wid, $fontset, $type, @data) = @_;
 179     my($fonts) = $$fontset{fonts};
 180     my($e);
 181     my($w)  = 0;                # Width of current line
 182     my($sw) = 0;                # Width of current line due to spaces
 183     my(@l)  = ();               # Current line
 184     my(@ls) = ();               # Accumulated output lines
 185     my(@xd) = ();               # Metadata that goes with subsequent text
 186     my $hasmarker = 0;          # Line has -6 marker
 187     my $pastmarker = 0;         # -6 marker found
 188
 189     # If there is a -6 marker anywhere in the paragraph,
 190     # *each line* output needs to have a -6 marker
 191     foreach $e ( @data ) {
 192         $hasmarker = 1 if ( $$e[0] == -6 );
 193     }
 194
 195     $w = 0;
 196     foreach $e ( @data ) {
 197         if ( $$e[0] < 0 ) {
 198             # Type is metadata.  Zero width.
 199             if ( $$e[0] == -6 ) {
 200                 $pastmarker = 1;
 201             }
 202             if ( $$e[0] == -1 || $$e[0] == -6 ) {
 203                 # -1 (end anchor) or -6 (marker) goes with the preceeding
 204                 # text, otherwise with the subsequent text
 205                 push(@l, $e);
 206             } else {
 207                 push(@xd, $e);
 208             }
 209         } else {
 210             my $ew = ps_width($$e[1], $fontset->{fonts}->[$$e[0]][1]) *
 211                 ($fontset->{fonts}->[$$e[0]][0]/1000);
 212             my $sp = $$e[1];
 213             $sp =~ tr/[^ ]//d;  # Delete nonspaces
 214             my $esw = ps_width($sp, $fontset->{fonts}->[$$e[0]][1]) *
 215                 ($fontset->{fonts}->[$$e[0]][0]/1000);
 216
 217             if ( ($w+$ew) - $ps_space_squeeze*($sw+$esw) > $wid ) {
 218                 # Begin new line
 219                 # Search backwards for previous space chunk
 220                 my $lx = scalar(@l)-1;
 221                 my @rm = ();
 222                 while ( $lx >= 0 ) {
 223                     while ( $lx >= 0 && $l[$lx]->[0] < 0 ) {
 224                         # Skip metadata
 225                         $pastmarker = 0 if ( $l[$lx]->[0] == -6 );
 226                         $lx--;
 227                     };
 228                     if ( $lx >= 0 ) {
 229                         if ( $l[$lx]->[1] eq ' ' ) {
 230                             splice(@l, $lx, 1);
 231                             @rm = splice(@l, $lx);
 232                             last; # Found place to break
 233                         } else {
 234                             $lx--;
 235                         }
 236                     }
 237                 }
 238
 239                 # Now @l contains the stuff to remain on the old line
 240                 # If we broke the line inside a link, then split the link
 241                 # into two.
 242                 my $lkref = undef;
 243                 foreach my $lc ( @l ) {
 244                     if ( $$lc[0] == -2 || $$lc[0] == -3 || $lc[0] == -7 ) {
 245                         $lkref = $lc;
 246                     } elsif ( $$lc[0] == -1 ) {
 247                         undef $lkref;
 248                     }
 249                 }
 250
 251                 if ( defined($lkref) ) {
 252                     push(@l, [-1,undef]); # Terminate old reference
 253                     unshift(@rm, $lkref); # Duplicate reference on new line
 254                 }
 255
 256                 if ( $hasmarker ) {
 257                     if ( $pastmarker ) {
 258                         unshift(@rm,[-6,undef]); # New line starts with marker
 259                     } else {
 260                         push(@l,[-6,undef]); # Old line ends with marker
 261                     }
 262                 }
 263
 264                 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]);
 265                 @l = @rm;
 266
 267                 $w = $sw = 0;
 268                 # Compute the width of the remainder array
 269                 for my $le ( @l ) {
 270                     if ( $$le[0] >= 0 ) {
 271                         my $xew = ps_width($$le[1], $fontset->{fonts}->[$$le[0]][1]) *
 272                             ($fontset->{fonts}->[$$le[0]][0]/1000);
 273                         my $xsp = $$le[1];
 274                         $xsp =~ tr/[^ ]//d;     # Delete nonspaces
 275                         my $xsw = ps_width($xsp, $fontset->{fonts}->[$$le[0]][1]) *
 276                             ($fontset->{fonts}->[$$le[0]][0]/1000);
 277                         $w += $xew;  $sw += $xsw;
 278                     }
 279                 }
 280             }
 281             push(@l, @xd);      # Accumulated metadata
 282             @xd = ();
 283             if ( $$e[1] ne '' ) {
 284                 push(@l, $e);
 285                 $w += $ew; $sw += $esw;
 286             }
 287         }
 288     }
 289     push(@l,@wd);
 290     if ( scalar(@l) ) {
 291         push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]); # Final line
 292     }
 293
 294     # Mark the first line as first and the last line as last
 295     if ( scalar(@ls) ) {
 296         $ls[0]->[0]->[1] |= 1;     # First in para
 297         $ls[-1]->[0]->[1] |= 2;    # Last in para
 298     }
 299     return @ls;
 300 }
 301
 302 #
 303 # Once we have broken things into lines, having multiple chunks
 304 # with the same font index is no longer meaningful.  Merge
 305 # adjacent chunks to keep down the size of the whole file.
 306 #
 307 sub ps_merge_chunks(@) {
 308     my(@ci) = @_;
 309     my($c, $lc);
 310     my(@co, $eco);
 311
 312     undef $lc;
 313     @co = ();
 314     $eco = -1;                  # Index of the last entry in @co
 315     foreach $c ( @ci ) {
 316         if ( defined($lc) && $$c[0] == $lc && $$c[0] >= 0 ) {
 317             $co[$eco]->[1] .= $$c[1];
 318         } else {
 319             push(@co, $c);  $eco++;
 320             $lc = $$c[0];
 321         }
 322     }
 323     return @co;
 324 }
 325
 326 #
 327 # Convert paragraphs to rendering arrays.  Each
 328 # element in the array contains (font, string),
 329 # where font can be one of:
 330 # -1 end link
 331 # -2 begin crossref
 332 # -3 begin weblink
 333 # -4 index item anchor
 334 # -5 crossref anchor
 335 # -6 left/right marker (used in the index)
 336 # -7 page link (used in the index)
 337 #  0 normal
 338 #  1 empatic (italic)
 339 #  2 code (fixed spacing)
 340 #
 341
 342 sub mkparaarray($@) {
 343     my($ptype, @chunks) = @_;
 344
 345     my @para = ();
 346     my $in_e = 0;
 347     my $chunk;
 348
 349     if ( $ptype =~ /^code/ ) {
 350         foreach $chunk ( @chunks ) {
 351             push(@para, [2, $chunk]);
 352         }
 353     } else {
 354         foreach $chunk ( @chunks ) {
 355             my $type = substr($chunk,0,2);
 356             my $text = substr($chunk,2);
 357
 358             if ( $type eq 'sp' ) {
 359                 push(@para, [$in_e?1:0, ' ']);
 360             } elsif ( $type eq 'da' ) {
 361                 # \261 is en dash in Adobe StandardEncoding
 362                 push(@para, [$in_e?1:0, "\261"]);
 363             } elsif ( $type eq 'n ' ) {
 364                 push(@para, [0, $text]);
 365                 $in_e = 0;
 366             } elsif ( $type =~ '^e' ) {
 367                 push(@para, [1, $text]);
 368                 $in_e = ($type eq 'es' || $type eq 'e ');
 369             } elsif ( $type eq 'c ' ) {
 370                 push(@para, [2, $text]);
 371                 $in_e = 0;
 372             } elsif ( $type eq 'x ' ) {
 373                 push(@para, [-2, ps_xref($text)]);
 374             } elsif ( $type eq 'xe' ) {
 375                 push(@para, [-1, undef]);
 376             } elsif ( $type eq 'wc' || $type eq 'w ' ) {
 377                 $text =~ /\<(.*)\>(.*)$/;
 378                 my $link = $1; $text = $2;
 379                 push(@para, [-3, $link]);
 380                 push(@para, [($type eq 'wc') ? 2:0, $text]);
 381                 push(@para, [-1, undef]);
 382                 $in_e = 0;
 383             } elsif ( $type eq 'i ' ) {
 384                 push(@para, [-4, $text]);
 385             } else {
 386                 die "Unexpected paragraph chunk: $chunk";
 387             }
 388         }
 389     }
 390     return @para;
 391 }
 392
 393 $npara = scalar(@paras);
 394 for ( $i = 0 ; $i < $npara ; $i++ ) {
 395     $paras[$i] = [mkparaarray($ptypes[$i], @{$paras[$i]})];
 396 }
 397
 398 #
 399 # This converts a rendering array to a simple string
 400 #
 401 sub ps_arraytostr(@) {
 402     my $s = '';
 403     my $c;
 404     foreach $c ( @_ ) {
 405         $s .= $$c[1] if ( $$c[0] >= 0 );
 406     }
 407     return $s;
 408 }
 409
 410 #
 411 # This generates a duplicate of a paragraph
 412 #
 413 sub ps_dup_para(@) {
 414     my(@i) = @_;
 415     my(@o) = ();
 416     my($c);
 417
 418     foreach $c ( @i ) {
 419         my @cc = @{$c};
 420         push(@o, [@cc]);
 421     }
 422     return @o;
 423 }
 424
 425 #
 426 # Scan for header paragraphs and fix up their contents;
 427 # also generate table of contents and PDF bookmarks.
 428 #
 429 @tocparas = ([[-5, 'contents'], [0,'Contents']]);
 430 @tocptypes = ('chap');
 431 @bookmarks = (['title', 0, 'Title Page'], ['contents', 0, 'Contents']);
 432 %bookref = ();
 433 for ( $i = 0 ; $i < $npara ; $i++ ) {
 434     my $xtype = $ptypes[$i];
 435     my $ptype = substr($xtype,0,4);
 436     my $str;
 437     my $book;
 438
 439     if ( $ptype eq 'chap' || $ptype eq 'appn' ) {
 440         unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
 441             die "Bad para";
 442         }
 443         my $secn = $1;
 444         my $sech = $2;
 445         my $xref = ps_xref($sech);
 446         my $chap = ($ptype eq 'chap')?'Chapter':'Appendix';
 447
 448         $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
 449         push(@bookmarks, $book);
 450         $bookref{$secn} = $book;
 451
 452         push(@tocparas, [ps_dup_para(@{$paras[$i]})]);
 453         push(@tocptypes, 'toc0'.' :'.$sech.':'.$chap.' '.$secn.':');
 454
 455         unshift(@{$paras[$i]},
 456                 [-5, $xref], [0,$chap.' '.$secn.':'], [0, ' ']);
 457     } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
 458         unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
 459             die "Bad para";
 460         }
 461         my $secn = $1;
 462         my $sech = $2;
 463         my $xref = ps_xref($sech);
 464         my $pref;
 465         $pref = $secn; $pref =~ s/\.[^\.]+$//; # Find parent node
 466
 467         $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
 468         push(@bookmarks, $book);
 469         $bookref{$secn} = $book;
 470         $bookref{$pref}->[1]--; # Adjust count for parent node
 471
 472         push(@tocparas, [ps_dup_para(@{$paras[$i]})]);
 473         push(@tocptypes,
 474              (($ptype eq 'subh') ? 'toc2':'toc1').' :'.$sech.':'.$secn);
 475
 476         unshift(@{$paras[$i]}, [-5, $xref]);
 477     }
 478 }
 479
 480 #
 481 # Add TOC to beginning of paragraph list
 482 #
 483 unshift(@paras,  @tocparas);  undef @tocparas;
 484 unshift(@ptypes, @tocptypes); undef @tocptypes;
 485 $npara = scalar(@paras);
 486
 487 #
 488 # No lines generated, yet.
 489 #
 490 @pslines    = ();
 491
 492 #
 493 # Line Auxilliary Information Types
 494 #
 495 $AuxStr     = 1;                # String
 496 $AuxPage    = 2;                # Page number (from xref)
 497 $AuxPageStr = 3;                # Page number as a PostScript string
 498 $AuxXRef    = 4;                # Cross reference as a name
 499 $AuxNum     = 5;                # Number
 500
 501 #
 502 # Break or convert paragraphs into lines, and push them
 503 # onto the @pslines array.
 504 #
 505 sub ps_break_lines($$) {
 506     my ($paras,$ptypes) = @_;
 507
 508     my $linewidth  = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
 509     my $bullwidth  = $linewidth-$psconf{bulladj};
 510     my $indxwidth  = ($linewidth-$psconf{idxgutter})/$psconf{idxcolumns}
 511                      -$psconf{idxspace};
 512
 513     my $npara = scalar(@{$paras});
 514     my $i;
 515
 516     for ( $i = 0 ; $i < $npara ; $i++ ) {
 517         my $xtype = $ptypes->[$i];
 518         my $ptype = substr($xtype,0,4);
 519         my @data = @{$paras->[$i]};
 520         my @ls = ();
 521         if ( $ptype eq 'code' ) {
 522             my $p;
 523             # Code paragraph; each chunk is a line
 524             foreach $p ( @data ) {
 525                 push(@ls, [[$ptype,0,undef,\%TextFont,0,0],[$p]]);
 526             }
 527             $ls[0]->[0]->[1] |= 1;           # First in para
 528             $ls[-1]->[0]->[1] |= 2;      # Last in para
 529         } elsif ( $ptype eq 'chap' || $ptype eq 'appn' ) {
 530             # Chapters are flowed normally, but in an unusual font
 531             @ls = ps_flow_lines($linewidth, \%ChapFont, $ptype, @data);
 532         } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
 533             unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
 534                 die "Bad para";
 535             }
 536             my $secn = $1;
 537             my $sech = $2;
 538             my $font = ($ptype eq 'head') ? \%HeadFont : \%SubhFont;
 539             @ls = ps_flow_lines($linewidth, $font, $ptype, @data);
 540             # We need the heading number as auxillary data
 541             $ls[0]->[0]->[2] = [[$AuxStr,$secn]];
 542         } elsif ( $ptype eq 'norm' ) {
 543             @ls = ps_flow_lines($linewidth, \%TextFont, $ptype, @data);
 544         } elsif ( $ptype eq 'bull' ) {
 545             @ls = ps_flow_lines($bullwidth, \%TextFont, $ptype, @data);
 546         } elsif ( $ptype =~ /^toc/ ) {
 547             unless ( $xtype =~/^\S+ :([^:]*):(.*)$/ ) {
 548                 die "Bad para";
 549             }
 550             my $xref = $1;
 551             my $refname = $2.' ';
 552             my $ntoc = substr($ptype,3,1)+0;
 553             my $refwidth = ps_width($refname, $TextFont{fonts}->[0][1]) *
 554                 ($TextFont{fonts}->[0][0]/1000);
 555
 556             @ls = ps_flow_lines($linewidth-$ntoc*$psconf{tocind}-
 557                                 $psconf{tocpnz}-$refwidth,
 558                                 \%TextFont, $ptype, @data);
 559
 560             # Auxilliary data: for the first line, the cross reference symbol
 561             # and the reference name; for all lines but the first, the
 562             # reference width; and for the last line, the page number
 563             # as a string.
 564             my $nl = scalar(@ls);
 565             $ls[0]->[0]->[2] = [[$AuxStr,$refname], [$AuxXRef,$xref]];
 566             for ( $j = 1 ; $j < $nl ; $j++ ) {
 567                 $ls[$j]->[0]->[2] = [[$AuxNum,$refwidth]];
 568             }
 569             push(@{$ls[$nl-1]->[0]->[2]}, [$AuxPageStr,$xref]);
 570         } elsif ( $ptype =~ /^idx/ ) {
 571             my $lvl = substr($ptype,3,1)+0;
 572
 573             @ls = ps_flow_lines($indxwidth-$lvl*$psconf{idxindent},
 574                                 \%TextFont, $ptype, @data);
 575         } else {
 576             die "Unknown para type: $ptype";
 577         }
 578         # Merge adjacent identical chunks
 579         foreach $l ( @ls ) {
 580             @{$$l[1]} = ps_merge_chunks(@{$$l[1]});
 581         }
 582         push(@pslines,@ls);
 583     }
 584 }
 585
 586 # Break the main body text into lines.
 587 ps_break_lines(\@paras, \@ptypes);
 588
 589 #
 590 # Break lines in to pages
 591 #
 592
 593 $curpage = 3;                   # First text page is page 3
 594 $curypos = 0;                   # Space used on this page
 595 undef $columnstart;             # Not outputting columnar text
 596 undef $curcolumn;               # Current column
 597 $nlines = scalar(@pslines);
 598
 599 #
 600 # This formats lines inside the global @pslines array into pages,
 601 # updating the page and y-coordinate entries.  Start at the
 602 # $startline position in @pslines and go to but not including
 603 # $endline.  The global variables $curpage, $curypos, $columnstart
 604 # and $curcolumn are updated appropriately.
 605 #
 606 sub ps_break_pages($$) {
 607     my($startline, $endline) = @_;
 608
 609     # Paragraph types which should never be broken
 610     my $nobreakregexp = "^(chap|appn|head|subh|toc.|idx.)\$";
 611     # Paragraph types which are heading (meaning they should not be broken
 612     # immediately after)
 613     my $headingregexp = "^(chap|appn|head|subh)\$";
 614     # Paragraph types which are set in columnar format
 615     my $columnregexp = "^idx.\$";
 616
 617     my $upageheight = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg};
 618
 619     my $i;
 620
 621     for ( $i = $startline ; $i < $endline ; $i++ ) {
 622         my $linfo = $pslines[$i]->[0];
 623         if ( ($$linfo[0] eq 'chap' || $$linfo[0] eq 'appn' )
 624              && ($$linfo[1] & 1) ) {
 625             # First line of a new chapter heading.  Start a new page.
 626             undef $columnstart;
 627             $curpage++ if ( $curypos > 0 || defined($columnstart) );
 628             $curypos = $chapstart;
 629         } elsif ( defined($columnstart) && $$linfo[0] !~ /$columnregexp/o ) {
 630             undef $columnstart;
 631             $curpage++;
 632             $curypos = 0;
 633         }
 634
 635         if ( $$linfo[0] =~ /$columnregexp/o && !defined($columnstart) ) {
 636             $columnstart = $curypos;
 637             $curcolumn = 0;
 638         }
 639
 640         # Adjust position by the appropriate leading
 641         $curypos += $$linfo[3]->{leading};
 642
 643         # Record the page and y-position
 644         $$linfo[4] = $curpage;
 645         $$linfo[5] = $curypos;
 646         $$linfo[6] = $curcolumn if ( defined($columnstart) );
 647
 648         if ( $curypos > $upageheight ) {
 649             # We need to break the page before this line.
 650             my $broken = 0;             # No place found yet
 651             while ( !$broken && $pslines[$i]->[0]->[4] == $curpage ) {
 652                 my $linfo = $pslines[$i]->[0];
 653                 my $pinfo = $pslines[$i-1]->[0];
 654
 655                 if ( $$linfo[1] == 2 ) {
 656                     # This would be an orphan, don't break.
 657                 } elsif ( $$linfo[1] & 1 ) {
 658                     # Sole line or start of paragraph.  Break unless
 659                     # the previous line was part of a heading.
 660                     $broken = 1 if ( $$pinfo[0] !~ /$headingregexp/o );
 661                 } else {
 662                     # Middle of paragraph.  Break unless we're in a
 663                     # no-break paragraph, or the previous line would
 664                     # end up being a widow.
 665                     $broken = 1 if ( $$linfo[0] !~ /$nobreakregexp/o &&
 666                                      $$pinfo[1] != 1 );
 667                 }
 668                 $i--;
 669             }
 670             die "Nowhere to break page $curpage\n" if ( !$broken );
 671             # Now $i should point to line immediately before the break, i.e.
 672             # the next paragraph should be the first on the new page
 673             if ( defined($columnstart) &&
 674                  ++$curcolumn < $psconf{idxcolumns} ) {
 675                 # We're actually breaking text into columns, not pages
 676                 $curypos = $columnstart;
 677             } else {
 678                 undef $columnstart;
 679                 $curpage++;
 680                 $curypos = 0;
 681             }
 682             next;
 683         }
 684
 685         # Add end of paragraph skip
 686         if ( $$linfo[1] & 2 ) {
 687             $curypos += $skiparray{$$linfo[0]};
 688         }
 689     }
 690 }
 691
 692 ps_break_pages(0,$nlines);      # Break the main text body into pages
 693
 694 #
 695 # Find the page number of all the indices
 696 #
 697 %ps_xref_page   = ();           # Crossref anchor pages
 698 %ps_index_pages = ();           # Index item pages
 699 $nlines = scalar(@pslines);
 700 for ( $i = 0 ; $i < $nlines ; $i++ ) {
 701     my $linfo = $pslines[$i]->[0];
 702     foreach my $c ( @{$pslines[$i]->[1]} ) {
 703         if ( $$c[0] == -4 ) {
 704             if ( !defined($ps_index_pages{$$c[1]}) ) {
 705                 $ps_index_pages{$$c[1]} = [];
 706             } elsif ( $ps_index_pages{$$c[1]}->[-1] eq $$linfo[4] ) {
 707                 # Pages are emitted in order; if this is a duplicated
 708                 # entry it will be the last one
 709                 next;           # Duplicate
 710             }
 711             push(@{$ps_index_pages{$$c[1]}}, $$linfo[4]);
 712         } elsif ( $$c[0] == -5 ) {
 713             $ps_xref_page{$$c[1]} = $$linfo[4];
 714         }
 715     }
 716 }
 717
 718 #
 719 # Emit index paragraphs
 720 #
 721 $startofindex = scalar(@pslines);
 722 @ixparas = ([[-5,'index'],[0,'Index']]);
 723 @ixptypes = ('chap');
 724
 725 foreach $k ( @ixentries ) {
 726     my $n,$i;
 727     my $ixptype = 'idx0';
 728     my @ixpara = mkparaarray('idx0',@{$ixterms{$k}});
 729
 730     push(@ixpara, [-6,undef]);  # Left/right marker
 731     $i = 1;  $n = scalar(@{$ps_index_pages{$k}});
 732     foreach $p ( @{$ps_index_pages{$k}} ) {
 733         if ( $i++ == $n ) {
 734             push(@ixpara,[-7,$p],[0,"$p"],[-1,undef]);
 735         } else {
 736             push(@ixpara,[-7,$p],[0,"$p,"],[-1,undef],[0,' ']);
 737         }
 738     }
 739
 740     push(@ixparas, [@ixpara]);
 741     push(@ixptypes, $ixptype);
 742 }
 743
 744 #
 745 # Flow index paragraphs into lines
 746 #
 747 ps_break_lines(\@ixparas, \@ixptypes);
 748
 749 #
 750 # Format index into pages
 751 #
 752 $nlines = scalar(@pslines);
 753 ps_break_pages($startofindex, $nlines);
 754
 755 #
 756 # Push index onto bookmark list
 757 #
 758 push(@bookmarks, ['index', 0, 'Index']);
 759
 760 # Get the list of fonts used
 761 %ps_all_fonts = ();
 762 foreach $fset ( @AllFonts ) {
 763     foreach $font ( @{$fset->{fonts}} ) {
 764         $ps_all_fonts{$font->[1]->{name}}++;
 765     }
 766 }
 767
 768 # Emit the PostScript DSC header
 769 print "%!PS-Adobe-3.0\n";
 770 print "%%Pages: $curpage\n";
 771 print "%%BoundingBox: 0 0 ", $psconf{pagewidth}, ' ', $psconf{pageheight}, "\n";
 772 print "%%Creator: NASM psflow.pl\n";
 773 print "%%DocumentData: Clean7Bit\n";
 774 print "%%DocumentFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
 775 print "%%DocumentNeededFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
 776 print "%%Orientation: Portrait\n";
 777 print "%%PageOrder: Ascend\n";
 778 print "%%EndComments\n";
 779 print "%%BeginProlog\n";
 780
 781 # Emit the configurables as PostScript tokens
 782 foreach $c ( keys(%psconf) ) {
 783     print "/$c ", $psconf{$c}, " def\n";
 784 }
 785 foreach $c ( keys(%psbool) ) {
 786     print "/$c ", ($psbool{$c}?'true':'false'), " def\n";
 787 }
 788
 789 # Emit fontset definitions
 790 foreach $fset ( @AllFonts ) {
 791     my $i = 0;
 792     my @zfonts = ();
 793     foreach $font ( @{$fset->{fonts}} ) {
 794         print '/', $fset->{name}, $i, ' ',
 795         '/', $font->[1]->{name}, ' findfont ',
 796         $font->[0], " scalefont def\n";
 797         push(@zfonts, $fset->{name}.$i);
 798         $i++;
 799     }
 800     print '/', $fset->{name}, ' [', join(' ',@zfonts), "] def\n";
 801 }
 802
 803 # Emit the result as PostScript.  This is *NOT* correct code yet!
 804 open(PSHEAD, "< head.ps");
 805 while ( defined($line = <PSHEAD>) ) {
 806     print $line;
 807 }
 808 close(PSHEAD);
 809 print "%%EndProlog\n";
 810
 811 # Generate a PostScript string
 812 sub ps_string($) {
 813     my ($s) = @_;
 814     my ($i,$c);
 815     my ($o) = '(';
 816     my ($l) = length($s);
 817     for ( $i = 0 ; $i < $l ; $i++ ) {
 818         $c = substr($s,$i,1);
 819         if ( ord($c) < 32 || ord($c) > 126 ) {
 820             $o .= sprintf("\\%03o", ord($c));
 821         } elsif ( $c eq '(' || $c eq ')' || $c eq "\\" ) {
 822             $o .= "\\".$c;
 823         } else {
 824             $o .= $c;
 825         }
 826     }
 827     return $o.')';
 828 }
 829
 830 # Generate PDF bookmarks
 831 print "%%BeginSetup\n";
 832 foreach $b ( @bookmarks ) {
 833     print '[/Title ', ps_string($b->[2]), "\n";
 834     print '/Count ', $b->[1], ' ' if ( $b->[1] );
 835     print '/Dest /',$b->[0]," /OUT pdfmark\n";
 836 }
 837
 838 # Ask the PostScript interpreter for the proper size media
 839 print "setpagesize\n";
 840 print "%%EndSetup\n";
 841
 842 # Start a PostScript page
 843 sub ps_start_page() {
 844     $ps_page++;
 845     print "%%Page: $ps_page $ps_page\n";
 846     print "%%BeginPageSetup\n";
 847     print "save\n";
 848     print "%%EndPageSetup\n";
 849     print '/', $ps_page, " pa\n";
 850 }
 851
 852 # End a PostScript page
 853 sub ps_end_page($) {
 854     my($pn) = @_;
 855     if ( $pn ) {
 856         print "($ps_page)", (($ps_page & 1) ? 'pageodd' : 'pageeven'), "\n";
 857     }
 858     print "restore showpage\n";
 859 }
 860
 861 $ps_page = 0;
 862
 863 # Title page and inner cover
 864 ps_start_page();
 865 # FIX THIS: This shouldn't be hard-coded like this
 866 $title = $metadata{'title'};
 867 $title =~ s/ \- / \320 /;       # \320 = em dash
 868 $pstitle = ps_string($title);
 869 print <<EOF;
 870 lmarg pageheight 2 mul 3 div moveto
 871 /Helvetica-Bold findfont 20 scalefont setfont
 872 /title linkdest ${pstitle} show
 873 lmarg pageheight 2 mul 3 div 10 sub moveto
 874 0 setlinecap 3 setlinewidth
 875 pagewidth lmarg sub rmarg sub 0 rlineto stroke
 876 /nasmlogo {
 877 gsave 1 dict begin
 878 /sz exch def
 879 /Courier-Bold findfont sz scalefont setfont
 880 moveto
 881 0.85 1.22 scale
 882 [(-~~..~:\#;L       .-:\#;L,.-   .~:\#:;.T  -~~.~:;. .~:;. )
 883 ( E8+U    *T     +U\'   *T\#  .97     *L   E8+\'  *;T\'  *;, )
 884 ( D97     \`*L  .97     \'*L   \"T;E+:,     D9     *L    *L )
 885 ( H7       I\#  T7       I\#        \"*:.   H7     I\#    I\# )
 886 ( U:       :8  *\#+    , :8  T,      79   U:     :8    :8 )
 887 (,\#B.     .IE,  \"T;E*  .IE, J *+;\#:T*\"  ,\#B.   .IE,  .IE,)] {
 888 currentpoint 3 -1 roll
 889 sz -0.10 mul 0 3 -1 roll ashow
 890 sz 0.72 mul sub moveto
 891 } forall
 892 end grestore
 893 } def
 894 0.6 setgray
 895 pagewidth 2 div 143 sub
 896 pageheight 2 div 33 add
 897 12 nasmlogo
 898 EOF
 899 ps_end_page(0);
 900 ps_start_page();
 901 print "% Inner cover goes here\n";
 902 ps_end_page(0);
 903
 904 $curpage = 3;
 905 ps_start_page();
 906 foreach $line ( @pslines ) {
 907     my $linfo = $line->[0];
 908
 909     if ( $$linfo[4] != $curpage ) {
 910         ps_end_page(1);
 911         ps_start_page();
 912         $curpage = $$linfo[4];
 913     }
 914
 915     print '[';
 916     my $curfont = 0;
 917     foreach my $c ( @{$line->[1]} ) {
 918         if ( $$c[0] >= 0 ) {
 919             if ( $curfont != $$c[0] ) {
 920                 print ($curfont = $$c[0]);
 921             }
 922             print ps_string($$c[1]);
 923         } elsif ( $$c[0] == -1 ) {
 924             print '{el}';       # End link
 925         } elsif ( $$c[0] == -2 ) {
 926             print '{/',$$c[1],' xl}'; # xref link
 927         } elsif ( $$c[0] == -3 ) {
 928             print '{',ps_string($$c[1]),'wl}'; # web link
 929         } elsif ( $$c[0] == -4 ) {
 930             # Index anchor -- ignore
 931         } elsif ( $$c[0] == -5 ) {
 932             print '{/',$$c[1],' xa}'; #xref anchor
 933         } elsif ( $$c[0] == -6 ) {
 934             print '][';         # Start a new array
 935             $curfont = 0;
 936         } elsif ( $$c[0] == -7 ) {
 937             print '{/',$$c[1],' pl}'; # page link
 938         } else {
 939             die "Unknown annotation";
 940         }
 941     }
 942     print ']';
 943     if ( defined($$linfo[2]) ) {
 944         foreach my $x ( @{$$linfo[2]} ) {
 945             if ( $$x[0] == $AuxStr ) {
 946                 print ps_string($$x[1]);
 947             } elsif ( $$x[0] == $AuxPage ) {
 948                 print $ps_xref_page{$$x[1]},' ';
 949             } elsif ( $$x[0] == $AuxPageStr ) {
 950                 print ps_string($ps_xref_page{$$x[1]});
 951             } elsif ( $$x[0] == $AuxXRef ) {
 952                 print '/',ps_xref($$x[1]),' ';
 953             } elsif ( $$x[0] == $AuxNum ) {
 954                 print $$x[1],' ';
 955             } else {
 956                 die "Unknown auxilliary data type";
 957             }
 958         }
 959     }
 960     print ($psconf{pageheight}-$psconf{topmarg}-$$linfo[5]);
 961     print ' ', $$linfo[6] if ( defined($$linfo[6]) );
 962     print ' ', $$linfo[0].$$linfo[1], "\n";
 963 }
 964
 965 ps_end_page(1);
 966 print "%%EOF\n";