doc/genps.pl

   1 #!/usr/bin/perl
   2 #
   3 # Format the documentation as PostScript
   4 #
   5
   6 require 'psfonts.ph';           # The fonts we want to use
   7 require 'pswidth.ph';           # PostScript string width
   8
   9 use Fcntl;
  10
  11 #
  12 # PostScript configurables; these values are also available to the
  13 # PostScript code itself
  14 #
  15 %psconf = (
  16            pagewidth => 595,    # Page width in PostScript points
  17            pageheight => 792,   # Page height in PostScript points
  18            lmarg => 100,        # Left margin in PostScript points
  19            rmarg => 50,         # Right margin in PostScript points
  20            topmarg => 100,      # Top margin in PostScript points
  21            botmarg => 100,      # Bottom margin in PostScript points
  22            plmarg => 50,        # Page number position relative to left margin
  23            prmarg => 0,         # Page number position relative to right margin
  24            pymarg => 50,        # Page number position relative to bot margin
  25            startcopyright => 100, # How much above the bottom margin is the
  26                                   # copyright notice stuff
  27            bulladj => 12,       # How much to indent a bullet paragraph
  28            tocind => 12,        # TOC indentation per level
  29            tocpnz => 24,        # Width of TOC page number only zone
  30            tocdots => 8,        # Spacing between TOC dots
  31            idxspace => 24,      # Minimum space between index title and pg#
  32            idxindent => 32,     # How much to indent a subindex entry
  33            idxgutter => 24,     # Space between index columns
  34            idxcolumns => 2,     # Number of index columns
  35            );
  36
  37 %psbool = (
  38            colorlinks => 0,     # Set links in blue rather than black
  39            );
  40
  41 # Known paper sizes
  42 %papersizes = (
  43                'a4'     => [595, 842], # ISO standard paper size
  44                'letter' => [612, 792], # US common paper size
  45                'pa4'    => [595, 792], # Compromise ("portable a4")
  46                'b4'     => [709,1002], # ISO intermediate paper size
  47                'legal'  => [612,1008], # US intermediate paper size
  48                'a3'     => [842,1190], # ISO double paper size
  49                '11x17'  => [792,1224], # US double paper size
  50                );
  51
  52 #
  53 # Parse the command line
  54 #
  55 undef $input;
  56 while ( $arg = shift(@ARGV) ) {
  57     if ( $arg =~ /^\-(|no\-)(.*)$/ ) {
  58         $parm = $2;
  59         $true = ($1 eq '') ? 1 : 0;
  60         if ( $true && defined($papersizes{$parm}) ) {
  61             $psconf{pagewidth}  = $papersizes{$parm}->[0];
  62             $psconf{pageheight} = $papersizes{$parm}->[1];
  63         } elsif ( defined($psbool{$parm}) ) {
  64             $psbool{$parm} = $true;
  65         } elsif ( $true && defined($psconf{$parm}) ) {
  66             $psconf{$parm} = shift(@ARGV);
  67         } else {
  68             die "$0: Unknown option: $arg\n";
  69         }
  70     } else {
  71         $input = $arg;
  72     }
  73 }
  74
  75 #
  76 # Document formatting parameters
  77 #
  78 $paraskip = 6;                  # Space between paragraphs
  79 $chapstart = 30;                # Space before a chapter heading
  80 $chapskip = 24;                 # Space after a chapter heading
  81 $tocskip = 6;                   # Space between TOC entries
  82
  83 # Configure post-paragraph skips for each kind of paragraph
  84 %skiparray = ('chap' => $chapskip, 'appn' => $chapstart,
  85               'head' => $paraskip, 'subh' => $paraskip,
  86               'norm' => $paraskip, 'bull' => $paraskip,
  87               'code' => $paraskip, 'toc0' => $tocskip,
  88               'toc1' => $tocskip,  'toc2' => $tocskip);
  89
  90 #
  91 # Custom encoding vector.  This is basically the same as
  92 # ISOLatin1Encoding (a level 2 feature, so we dont want to use it),
  93 # but with a few extra characters thrown in.  It is basically a
  94 # modified Windows 1252 codepage, minus, for now, the euro sign (\200
  95 # is reserved for euro.)
  96 #
  97 @NASMEncoding =
  98 (
  99  (undef)x32,
 100  'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
 101  'ampersand', 'quoteright', 'parenleft',
 102  'parenright', 'asterisk', 'plus', 'comma', 'minus',
 103  'period', 'slash', 'zero', 'one', 'two', 'three',
 104  'four', 'five', 'six', 'seven', 'eight', 'nine',
 105  'colon', 'semicolon', 'less', 'equal', 'greater',
 106  'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
 107  'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q',
 108  'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
 109  'bracketleft', 'backslash', 'bracketright',
 110  'asciicircum', 'underscore', 'quoteleft', 'a', 'b',
 111  'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
 112  'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 113  'w', 'x', 'y', 'z', 'braceleft', 'bar',
 114  'braceright', 'asciitilde', undef,
 115  undef, 'macron', 'quotesinglbase', 'florin',
 116  'quotedblbase', 'ellipsis', 'dagger', 'dbldagger',
 117  'circumflex', 'perthousand', 'Scaron', 'guilsinglleft',
 118  'OE', 'hungarumlaut', 'Zcaron', 'caron',
 119  'ogonek', 'grave', 'quotesingle', 'quotedblleft',
 120  'quotedblright', 'bullet', 'endash', 'emdash',
 121  'tilde', 'trademark', 'scaron', 'guilsignlright',
 122  'oe', 'ring', 'zcaron', 'Ydieresis',
 123  'space', 'exclamdown', 'cent', 'sterling',
 124  'currency', 'yen', 'brokenbar', 'section',
 125  'dieresis', 'copyright', 'ordfeminine',
 126  'guillemotleft', 'logicalnot', 'hyphen',
 127  'registered', 'macron', 'degree', 'plusminus',
 128  'twosuperior', 'threesuperior', 'acute', 'mu',
 129  'paragraph', 'periodcentered', 'cedilla',
 130  'onesuperior', 'ordmasculine', 'guillemotright',
 131  'onequarter', 'onehalf', 'threequarters',
 132  'questiondown', 'Agrave', 'Aacute', 'Acircumflex',
 133  'Atilde', 'Adieresis', 'Aring', 'AE', 'Ccedilla',
 134  'Egrave', 'Eacute', 'Ecircumflex', 'Edieresis',
 135  'Igrave', 'Iacute', 'Icircumflex', 'Idieresis',
 136  'Eth', 'Ntilde', 'Ograve', 'Oacute', 'Ocircumflex',
 137  'Otilde', 'Odieresis', 'multiply', 'Oslash',
 138  'Ugrave', 'Uacute', 'Ucircumflex', 'Udieresis',
 139  'Yacute', 'Thorn', 'germandbls', 'agrave', 'aacute',
 140  'acircumflex', 'atilde', 'adieresis', 'aring', 'ae',
 141  'ccedilla', 'egrave', 'eacute', 'ecircumflex',
 142  'edieresis', 'igrave', 'iacute', 'icircumflex',
 143  'idieresis', 'eth', 'ntilde', 'ograve', 'oacute',
 144  'ocircumflex', 'otilde', 'odieresis', 'divide',
 145  'oslash', 'ugrave', 'uacute', 'ucircumflex',
 146  'udieresis', 'yacute', 'thorn', 'ydieresis'
 147 );
 148
 149 $emdash = "\227";
 150 $endash = "\226";
 151 $bullet = "\225";
 152
 153 #
 154 # First, format the stuff coming from the front end into
 155 # a cleaner representation
 156 #
 157 if ( defined($input) ) {
 158     sysopen(PARAS, $input, O_RDONLY) or
 159         die "$0: cannot open $input: $!\n";
 160 } else {
 161     open(PARAS, "<&STDIN") or die "$0: $!\n";
 162 }
 163 while ( defined($line = <PARAS>) ) {
 164     chomp $line;
 165     $data = <PARAS>;
 166     chomp $data;
 167     if ( $line =~ /^meta :(.*)$/ ) {
 168         $metakey = $1;
 169         $metadata{$metakey} = $data;
 170     } elsif ( $line =~ /^indx :(.*)$/ ) {
 171         $ixentry = $1;
 172         push(@ixentries, $ixentry);
 173         $ixterms{$ixentry} = [split(/\037/, $data)];
 174         # Look for commas.  This is easier done on the string
 175         # representation, so do it now.
 176         if ( $line =~ /^(.*\,)\037sp\037/ ) {
 177             $ixprefix = $1;
 178             $ixhasprefix{$ixentry} = $ixprefix;
 179             if ( !$ixprefixes{$ixprefix} ) {
 180                 $ixcommafirst{$ixentry}++;
 181             }
 182             $ixprefixes{$ixprefix}++;
 183         }
 184     } else {
 185         push(@ptypes, $line);
 186         push(@paras, [split(/\037/, $data)]);
 187     }
 188 }
 189 close(PARAS);
 190
 191 #
 192 # Convert an integer to a chosen base
 193 #
 194 sub int2base($$) {
 195     my($i,$b) = @_;
 196     my($s) = '';
 197     my($n) = '';
 198     my($z) = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
 199     return '0' if ($i == 0);
 200     if ( $i < 0 ) { $n = '-'; $i = -$i; }
 201     while ( $i ) {
 202         $s = substr($z,$i%$b,1) . $s;
 203         $i = int($i/$b);
 204     }
 205     return $n.$s;
 206 }
 207
 208 #
 209 # Convert a string to a rendering array
 210 #
 211 sub string2array($)
 212 {
 213     my($s) = @_;
 214     my(@a) = ();
 215
 216     while ( $s =~ /^(\s+|\S+)(.*)$/ ) {
 217         push(@a, [0,$1]);
 218         $s = $2;
 219     }
 220
 221     return @a;
 222 }
 223
 224 #
 225 # Take a crossreference name and generate the PostScript name for it.
 226 #
 227 # This hack produces a somewhat smaller PDF...
 228 #%ps_xref_list = ();
 229 #$ps_xref_next = 0;
 230 #sub ps_xref($) {
 231 #    my($s) = @_;
 232 #    my $q = $ps_xref_list{$s};
 233 #    return $q if ( defined($ps_xref_list{$s}) );
 234 #    $q = 'X'.int2base($ps_xref_next++, 52);
 235 #    $ps_xref_list{$s} = $q;
 236 #    return $q;
 237 #}
 238
 239 # Somewhat bigger PDF, but one which obeys # URLs
 240 sub ps_xref($) {
 241     return @_[0];
 242 }
 243
 244 #
 245 # Flow lines according to a particular font set and width
 246 #
 247 # A "font set" is represented as an array containing
 248 # arrays of pairs: [<size>, <metricref>]
 249 #
 250 # Each line is represented as:
 251 # [ [type,first|last,aux,fontset,page,ypos,optional col],
 252 #   [rendering array] ]
 253 #
 254 # A space character may be "squeezed" by up to this much
 255 # (as a fraction of the normal width of a space.)
 256 #
 257 $ps_space_squeeze = 0.00;       # Min space width 100%
 258 sub ps_flow_lines($$$@) {
 259     my($wid, $fontset, $type, @data) = @_;
 260     my($fonts) = $$fontset{fonts};
 261     my($e);
 262     my($w)  = 0;                # Width of current line
 263     my($sw) = 0;                # Width of current line due to spaces
 264     my(@l)  = ();               # Current line
 265     my(@ls) = ();               # Accumulated output lines
 266     my(@xd) = ();               # Metadata that goes with subsequent text
 267     my $hasmarker = 0;          # Line has -6 marker
 268     my $pastmarker = 0;         # -6 marker found
 269
 270     # If there is a -6 marker anywhere in the paragraph,
 271     # *each line* output needs to have a -6 marker
 272     foreach $e ( @data ) {
 273         $hasmarker = 1 if ( $$e[0] == -6 );
 274     }
 275
 276     $w = 0;
 277     foreach $e ( @data ) {
 278         if ( $$e[0] < 0 ) {
 279             # Type is metadata.  Zero width.
 280             if ( $$e[0] == -6 ) {
 281                 $pastmarker = 1;
 282             }
 283             if ( $$e[0] == -1 || $$e[0] == -6 ) {
 284                 # -1 (end anchor) or -6 (marker) goes with the preceeding
 285                 # text, otherwise with the subsequent text
 286                 push(@l, $e);
 287             } else {
 288                 push(@xd, $e);
 289             }
 290         } else {
 291             my $ew = ps_width($$e[1], $fontset->{fonts}->[$$e[0]][1],
 292                               \@NASMEncoding) *
 293                 ($fontset->{fonts}->[$$e[0]][0]/1000);
 294             my $sp = $$e[1];
 295             $sp =~ tr/[^ ]//d;  # Delete nonspaces
 296             my $esw = ps_width($sp, $fontset->{fonts}->[$$e[0]][1],
 297                                \@NASMEncoding) *
 298                 ($fontset->{fonts}->[$$e[0]][0]/1000);
 299
 300             if ( ($w+$ew) - $ps_space_squeeze*($sw+$esw) > $wid ) {
 301                 # Begin new line
 302                 # Search backwards for previous space chunk
 303                 my $lx = scalar(@l)-1;
 304                 my @rm = ();
 305                 while ( $lx >= 0 ) {
 306                     while ( $lx >= 0 && $l[$lx]->[0] < 0 ) {
 307                         # Skip metadata
 308                         $pastmarker = 0 if ( $l[$lx]->[0] == -6 );
 309                         $lx--;
 310                     };
 311                     if ( $lx >= 0 ) {
 312                         if ( $l[$lx]->[1] eq ' ' ) {
 313                             splice(@l, $lx, 1);
 314                             @rm = splice(@l, $lx);
 315                             last; # Found place to break
 316                         } else {
 317                             $lx--;
 318                         }
 319                     }
 320                 }
 321
 322                 # Now @l contains the stuff to remain on the old line
 323                 # If we broke the line inside a link, then split the link
 324                 # into two.
 325                 my $lkref = undef;
 326                 foreach my $lc ( @l ) {
 327                     if ( $$lc[0] == -2 || $$lc[0] == -3 || $lc[0] == -7 ) {
 328                         $lkref = $lc;
 329                     } elsif ( $$lc[0] == -1 ) {
 330                         undef $lkref;
 331                     }
 332                 }
 333
 334                 if ( defined($lkref) ) {
 335                     push(@l, [-1,undef]); # Terminate old reference
 336                     unshift(@rm, $lkref); # Duplicate reference on new line
 337                 }
 338
 339                 if ( $hasmarker ) {
 340                     if ( $pastmarker ) {
 341                         unshift(@rm,[-6,undef]); # New line starts with marker
 342                     } else {
 343                         push(@l,[-6,undef]); # Old line ends with marker
 344                     }
 345                 }
 346
 347                 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]);
 348                 @l = @rm;
 349
 350                 $w = $sw = 0;
 351                 # Compute the width of the remainder array
 352                 for my $le ( @l ) {
 353                     if ( $$le[0] >= 0 ) {
 354                         my $xew = ps_width($$le[1],
 355                                            $fontset->{fonts}->[$$le[0]][1],
 356                                            \@NASMEncoding) *
 357                             ($fontset->{fonts}->[$$le[0]][0]/1000);
 358                         my $xsp = $$le[1];
 359                         $xsp =~ tr/[^ ]//d;     # Delete nonspaces
 360                         my $xsw = ps_width($xsp,
 361                                            $fontset->{fonts}->[$$le[0]][1],
 362                                            \@NASMEncoding) *
 363                             ($fontset->{fonts}->[$$le[0]][0]/1000);
 364                         $w += $xew;  $sw += $xsw;
 365                     }
 366                 }
 367             }
 368             push(@l, @xd);      # Accumulated metadata
 369             @xd = ();
 370             if ( $$e[1] ne '' ) {
 371                 push(@l, $e);
 372                 $w += $ew; $sw += $esw;
 373             }
 374         }
 375     }
 376     push(@l,@wd);
 377     if ( scalar(@l) ) {
 378         push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]); # Final line
 379     }
 380
 381     # Mark the first line as first and the last line as last
 382     if ( scalar(@ls) ) {
 383         $ls[0]->[0]->[1] |= 1;     # First in para
 384         $ls[-1]->[0]->[1] |= 2;    # Last in para
 385     }
 386     return @ls;
 387 }
 388
 389 #
 390 # Once we have broken things into lines, having multiple chunks
 391 # with the same font index is no longer meaningful.  Merge
 392 # adjacent chunks to keep down the size of the whole file.
 393 #
 394 sub ps_merge_chunks(@) {
 395     my(@ci) = @_;
 396     my($c, $lc);
 397     my(@co, $eco);
 398
 399     undef $lc;
 400     @co = ();
 401     $eco = -1;                  # Index of the last entry in @co
 402     foreach $c ( @ci ) {
 403         if ( defined($lc) && $$c[0] == $lc && $$c[0] >= 0 ) {
 404             $co[$eco]->[1] .= $$c[1];
 405         } else {
 406             push(@co, $c);  $eco++;
 407             $lc = $$c[0];
 408         }
 409     }
 410     return @co;
 411 }
 412
 413 #
 414 # Convert paragraphs to rendering arrays.  Each
 415 # element in the array contains (font, string),
 416 # where font can be one of:
 417 # -1 end link
 418 # -2 begin crossref
 419 # -3 begin weblink
 420 # -4 index item anchor
 421 # -5 crossref anchor
 422 # -6 left/right marker (used in the index)
 423 # -7 page link (used in the index)
 424 #  0 normal
 425 #  1 empatic (italic)
 426 #  2 code (fixed spacing)
 427 #
 428
 429 sub mkparaarray($@) {
 430     my($ptype, @chunks) = @_;
 431
 432     my @para = ();
 433     my $in_e = 0;
 434     my $chunk;
 435
 436     if ( $ptype =~ /^code/ ) {
 437         foreach $chunk ( @chunks ) {
 438             push(@para, [2, $chunk]);
 439         }
 440     } else {
 441         foreach $chunk ( @chunks ) {
 442             my $type = substr($chunk,0,2);
 443             my $text = substr($chunk,2);
 444
 445             if ( $type eq 'sp' ) {
 446                 push(@para, [$in_e?1:0, ' ']);
 447             } elsif ( $type eq 'da' ) {
 448                 push(@para, [$in_e?1:0, $endash]);
 449             } elsif ( $type eq 'n ' ) {
 450                 push(@para, [0, $text]);
 451                 $in_e = 0;
 452             } elsif ( $type =~ '^e' ) {
 453                 push(@para, [1, $text]);
 454                 $in_e = ($type eq 'es' || $type eq 'e ');
 455             } elsif ( $type eq 'c ' ) {
 456                 push(@para, [2, $text]);
 457                 $in_e = 0;
 458             } elsif ( $type eq 'x ' ) {
 459                 push(@para, [-2, ps_xref($text)]);
 460             } elsif ( $type eq 'xe' ) {
 461                 push(@para, [-1, undef]);
 462             } elsif ( $type eq 'wc' || $type eq 'w ' ) {
 463                 $text =~ /\<(.*)\>(.*)$/;
 464                 my $link = $1; $text = $2;
 465                 push(@para, [-3, $link]);
 466                 push(@para, [($type eq 'wc') ? 2:0, $text]);
 467                 push(@para, [-1, undef]);
 468                 $in_e = 0;
 469             } elsif ( $type eq 'i ' ) {
 470                 push(@para, [-4, $text]);
 471             } else {
 472                 die "Unexpected paragraph chunk: $chunk";
 473             }
 474         }
 475     }
 476     return @para;
 477 }
 478
 479 $npara = scalar(@paras);
 480 for ( $i = 0 ; $i < $npara ; $i++ ) {
 481     $paras[$i] = [mkparaarray($ptypes[$i], @{$paras[$i]})];
 482 }
 483
 484 #
 485 # This converts a rendering array to a simple string
 486 #
 487 sub ps_arraytostr(@) {
 488     my $s = '';
 489     my $c;
 490     foreach $c ( @_ ) {
 491         $s .= $$c[1] if ( $$c[0] >= 0 );
 492     }
 493     return $s;
 494 }
 495
 496 #
 497 # This generates a duplicate of a paragraph
 498 #
 499 sub ps_dup_para(@) {
 500     my(@i) = @_;
 501     my(@o) = ();
 502     my($c);
 503
 504     foreach $c ( @i ) {
 505         my @cc = @{$c};
 506         push(@o, [@cc]);
 507     }
 508     return @o;
 509 }
 510
 511 #
 512 # Scan for header paragraphs and fix up their contents;
 513 # also generate table of contents and PDF bookmarks.
 514 #
 515 @tocparas = ([[-5, 'contents'], [0,'Contents']]);
 516 @tocptypes = ('chap');
 517 @bookmarks = (['title', 0, 'Title Page'], ['contents', 0, 'Contents']);
 518 %bookref = ();
 519 for ( $i = 0 ; $i < $npara ; $i++ ) {
 520     my $xtype = $ptypes[$i];
 521     my $ptype = substr($xtype,0,4);
 522     my $str;
 523     my $book;
 524
 525     if ( $ptype eq 'chap' || $ptype eq 'appn' ) {
 526         unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
 527             die "Bad para";
 528         }
 529         my $secn = $1;
 530         my $sech = $2;
 531         my $xref = ps_xref($sech);
 532         my $chap = ($ptype eq 'chap')?'Chapter':'Appendix';
 533
 534         $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
 535         push(@bookmarks, $book);
 536         $bookref{$secn} = $book;
 537
 538         push(@tocparas, [ps_dup_para(@{$paras[$i]})]);
 539         push(@tocptypes, 'toc0'.' :'.$sech.':'.$chap.' '.$secn.':');
 540
 541         unshift(@{$paras[$i]},
 542                 [-5, $xref], [0,$chap.' '.$secn.':'], [0, ' ']);
 543     } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
 544         unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
 545             die "Bad para";
 546         }
 547         my $secn = $1;
 548         my $sech = $2;
 549         my $xref = ps_xref($sech);
 550         my $pref;
 551         $pref = $secn; $pref =~ s/\.[^\.]+$//; # Find parent node
 552
 553         $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
 554         push(@bookmarks, $book);
 555         $bookref{$secn} = $book;
 556         $bookref{$pref}->[1]--; # Adjust count for parent node
 557
 558         push(@tocparas, [ps_dup_para(@{$paras[$i]})]);
 559         push(@tocptypes,
 560              (($ptype eq 'subh') ? 'toc2':'toc1').' :'.$sech.':'.$secn);
 561
 562         unshift(@{$paras[$i]}, [-5, $xref]);
 563     }
 564 }
 565
 566 #
 567 # Add TOC to beginning of paragraph list
 568 #
 569 unshift(@paras,  @tocparas);  undef @tocparas;
 570 unshift(@ptypes, @tocptypes); undef @tocptypes;
 571
 572 #
 573 # Add copyright notice to the beginning
 574 #
 575 unshift(@paras, [[0, "\251"], [0, ' '], [0,$metadata{'year'}],
 576                  [0, ' '], string2array($metadata{'author'})],
 577         [[0, ' ']], [string2array($metadata{'license'})]);
 578 unshift(@ptypes, 'norm', 'norm', 'norm');
 579
 580 $npara = scalar(@paras);
 581
 582 #
 583 # No lines generated, yet.
 584 #
 585 @pslines    = ();
 586
 587 #
 588 # Line Auxilliary Information Types
 589 #
 590 $AuxStr     = 1;                # String
 591 $AuxPage    = 2;                # Page number (from xref)
 592 $AuxPageStr = 3;                # Page number as a PostScript string
 593 $AuxXRef    = 4;                # Cross reference as a name
 594 $AuxNum     = 5;                # Number
 595
 596 #
 597 # Break or convert paragraphs into lines, and push them
 598 # onto the @pslines array.
 599 #
 600 sub ps_break_lines($$) {
 601     my ($paras,$ptypes) = @_;
 602
 603     my $linewidth  = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
 604     my $bullwidth  = $linewidth-$psconf{bulladj};
 605     my $indxwidth  = ($linewidth-$psconf{idxgutter})/$psconf{idxcolumns}
 606                      -$psconf{idxspace};
 607
 608     my $npara = scalar(@{$paras});
 609     my $i;
 610
 611     for ( $i = 0 ; $i < $npara ; $i++ ) {
 612         my $xtype = $ptypes->[$i];
 613         my $ptype = substr($xtype,0,4);
 614         my @data = @{$paras->[$i]};
 615         my @ls = ();
 616         if ( $ptype eq 'code' ) {
 617             my $p;
 618             # Code paragraph; each chunk is a line
 619             foreach $p ( @data ) {
 620                 push(@ls, [[$ptype,0,undef,\%TextFont,0,0],[$p]]);
 621             }
 622             $ls[0]->[0]->[1] |= 1;           # First in para
 623             $ls[-1]->[0]->[1] |= 2;      # Last in para
 624         } elsif ( $ptype eq 'chap' || $ptype eq 'appn' ) {
 625             # Chapters are flowed normally, but in an unusual font
 626             @ls = ps_flow_lines($linewidth, \%ChapFont, $ptype, @data);
 627         } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
 628             unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
 629                 die "Bad para";
 630             }
 631             my $secn = $1;
 632             my $sech = $2;
 633             my $font = ($ptype eq 'head') ? \%HeadFont : \%SubhFont;
 634             @ls = ps_flow_lines($linewidth, $font, $ptype, @data);
 635             # We need the heading number as auxillary data
 636             $ls[0]->[0]->[2] = [[$AuxStr,$secn]];
 637         } elsif ( $ptype eq 'norm' ) {
 638             @ls = ps_flow_lines($linewidth, \%TextFont, $ptype, @data);
 639         } elsif ( $ptype eq 'bull' ) {
 640             @ls = ps_flow_lines($bullwidth, \%TextFont, $ptype, @data);
 641         } elsif ( $ptype =~ /^toc/ ) {
 642             unless ( $xtype =~/^\S+ :([^:]*):(.*)$/ ) {
 643                 die "Bad para";
 644             }
 645             my $xref = $1;
 646             my $refname = $2.' ';
 647             my $ntoc = substr($ptype,3,1)+0;
 648             my $refwidth = ps_width($refname, $TextFont{fonts}->[0][1],
 649                                     \@NASMEncoding) *
 650                 ($TextFont{fonts}->[0][0]/1000);
 651
 652             @ls = ps_flow_lines($linewidth-$ntoc*$psconf{tocind}-
 653                                 $psconf{tocpnz}-$refwidth,
 654                                 \%TextFont, $ptype, @data);
 655
 656             # Auxilliary data: for the first line, the cross reference symbol
 657             # and the reference name; for all lines but the first, the
 658             # reference width; and for the last line, the page number
 659             # as a string.
 660             my $nl = scalar(@ls);
 661             $ls[0]->[0]->[2] = [[$AuxStr,$refname], [$AuxXRef,$xref]];
 662             for ( $j = 1 ; $j < $nl ; $j++ ) {
 663                 $ls[$j]->[0]->[2] = [[$AuxNum,$refwidth]];
 664             }
 665             push(@{$ls[$nl-1]->[0]->[2]}, [$AuxPageStr,$xref]);
 666         } elsif ( $ptype =~ /^idx/ ) {
 667             my $lvl = substr($ptype,3,1)+0;
 668
 669             @ls = ps_flow_lines($indxwidth-$lvl*$psconf{idxindent},
 670                                 \%TextFont, $ptype, @data);
 671         } else {
 672             die "Unknown para type: $ptype";
 673         }
 674         # Merge adjacent identical chunks
 675         foreach $l ( @ls ) {
 676             @{$$l[1]} = ps_merge_chunks(@{$$l[1]});
 677         }
 678         push(@pslines,@ls);
 679     }
 680 }
 681
 682 # Break the main body text into lines.
 683 ps_break_lines(\@paras, \@ptypes);
 684
 685 #
 686 # Break lines in to pages
 687 #
 688
 689 # Where to start on page 2, the copyright page
 690 $curpage = 2;                   # Start on page 2
 691 $curypos = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg}-
 692     $psconf{startcopyright};
 693 undef $columnstart;             # Not outputting columnar text
 694 undef $curcolumn;               # Current column
 695 $nlines = scalar(@pslines);
 696
 697 #
 698 # This formats lines inside the global @pslines array into pages,
 699 # updating the page and y-coordinate entries.  Start at the
 700 # $startline position in @pslines and go to but not including
 701 # $endline.  The global variables $curpage, $curypos, $columnstart
 702 # and $curcolumn are updated appropriately.
 703 #
 704 sub ps_break_pages($$) {
 705     my($startline, $endline) = @_;
 706
 707     # Paragraph types which should never be broken
 708     my $nobreakregexp = "^(chap|appn|head|subh|toc.|idx.)\$";
 709     # Paragraph types which are heading (meaning they should not be broken
 710     # immediately after)
 711     my $headingregexp = "^(chap|appn|head|subh)\$";
 712     # Paragraph types which are set in columnar format
 713     my $columnregexp = "^idx.\$";
 714
 715     my $upageheight = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg};
 716
 717     my $i;
 718
 719     for ( $i = $startline ; $i < $endline ; $i++ ) {
 720         my $linfo = $pslines[$i]->[0];
 721         if ( ($$linfo[0] eq 'chap' || $$linfo[0] eq 'appn' )
 722              && ($$linfo[1] & 1) ) {
 723             # First line of a new chapter heading.  Start a new page.
 724             undef $columnstart;
 725             $curpage++ if ( $curypos > 0 || defined($columnstart) );
 726             $curypos = $chapstart;
 727         } elsif ( defined($columnstart) && $$linfo[0] !~ /$columnregexp/o ) {
 728             undef $columnstart;
 729             $curpage++;
 730             $curypos = 0;
 731         }
 732
 733         if ( $$linfo[0] =~ /$columnregexp/o && !defined($columnstart) ) {
 734             $columnstart = $curypos;
 735             $curcolumn = 0;
 736         }
 737
 738         # Adjust position by the appropriate leading
 739         $curypos += $$linfo[3]->{leading};
 740
 741         # Record the page and y-position
 742         $$linfo[4] = $curpage;
 743         $$linfo[5] = $curypos;
 744         $$linfo[6] = $curcolumn if ( defined($columnstart) );
 745
 746         if ( $curypos > $upageheight ) {
 747             # We need to break the page before this line.
 748             my $broken = 0;             # No place found yet
 749             while ( !$broken && $pslines[$i]->[0]->[4] == $curpage ) {
 750                 my $linfo = $pslines[$i]->[0];
 751                 my $pinfo = $pslines[$i-1]->[0];
 752
 753                 if ( $$linfo[1] == 2 ) {
 754                     # This would be an orphan, don't break.
 755                 } elsif ( $$linfo[1] & 1 ) {
 756                     # Sole line or start of paragraph.  Break unless
 757                     # the previous line was part of a heading.
 758                     $broken = 1 if ( $$pinfo[0] !~ /$headingregexp/o );
 759                 } else {
 760                     # Middle of paragraph.  Break unless we're in a
 761                     # no-break paragraph, or the previous line would
 762                     # end up being a widow.
 763                     $broken = 1 if ( $$linfo[0] !~ /$nobreakregexp/o &&
 764                                      $$pinfo[1] != 1 );
 765                 }
 766                 $i--;
 767             }
 768             die "Nowhere to break page $curpage\n" if ( !$broken );
 769             # Now $i should point to line immediately before the break, i.e.
 770             # the next paragraph should be the first on the new page
 771             if ( defined($columnstart) &&
 772                  ++$curcolumn < $psconf{idxcolumns} ) {
 773                 # We're actually breaking text into columns, not pages
 774                 $curypos = $columnstart;
 775             } else {
 776                 undef $columnstart;
 777                 $curpage++;
 778                 $curypos = 0;
 779             }
 780             next;
 781         }
 782
 783         # Add end of paragraph skip
 784         if ( $$linfo[1] & 2 ) {
 785             $curypos += $skiparray{$$linfo[0]};
 786         }
 787     }
 788 }
 789
 790 ps_break_pages(0,$nlines);      # Break the main text body into pages
 791
 792 #
 793 # Find the page number of all the indices
 794 #
 795 %ps_xref_page   = ();           # Crossref anchor pages
 796 %ps_index_pages = ();           # Index item pages
 797 $nlines = scalar(@pslines);
 798 for ( $i = 0 ; $i < $nlines ; $i++ ) {
 799     my $linfo = $pslines[$i]->[0];
 800     foreach my $c ( @{$pslines[$i]->[1]} ) {
 801         if ( $$c[0] == -4 ) {
 802             if ( !defined($ps_index_pages{$$c[1]}) ) {
 803                 $ps_index_pages{$$c[1]} = [];
 804             } elsif ( $ps_index_pages{$$c[1]}->[-1] eq $$linfo[4] ) {
 805                 # Pages are emitted in order; if this is a duplicated
 806                 # entry it will be the last one
 807                 next;           # Duplicate
 808             }
 809             push(@{$ps_index_pages{$$c[1]}}, $$linfo[4]);
 810         } elsif ( $$c[0] == -5 ) {
 811             $ps_xref_page{$$c[1]} = $$linfo[4];
 812         }
 813     }
 814 }
 815
 816 #
 817 # Emit index paragraphs
 818 #
 819 $startofindex = scalar(@pslines);
 820 @ixparas = ([[-5,'index'],[0,'Index']]);
 821 @ixptypes = ('chap');
 822
 823 foreach $k ( @ixentries ) {
 824     my $n,$i;
 825     my $ixptype = 'idx0';
 826     my @ixpara = mkparaarray('idx0',@{$ixterms{$k}});
 827
 828     push(@ixpara, [-6,undef]);  # Left/right marker
 829     $i = 1;  $n = scalar(@{$ps_index_pages{$k}});
 830     foreach $p ( @{$ps_index_pages{$k}} ) {
 831         if ( $i++ == $n ) {
 832             push(@ixpara,[-7,$p],[0,"$p"],[-1,undef]);
 833         } else {
 834             push(@ixpara,[-7,$p],[0,"$p,"],[-1,undef],[0,' ']);
 835         }
 836     }
 837
 838     push(@ixparas, [@ixpara]);
 839     push(@ixptypes, $ixptype);
 840 }
 841
 842 #
 843 # Flow index paragraphs into lines
 844 #
 845 ps_break_lines(\@ixparas, \@ixptypes);
 846
 847 #
 848 # Format index into pages
 849 #
 850 $nlines = scalar(@pslines);
 851 ps_break_pages($startofindex, $nlines);
 852
 853 #
 854 # Push index onto bookmark list
 855 #
 856 push(@bookmarks, ['index', 0, 'Index']);
 857
 858 # Get the list of fonts used
 859 %ps_all_fonts = ();
 860 foreach $fset ( @AllFonts ) {
 861     foreach $font ( @{$fset->{fonts}} ) {
 862         $ps_all_fonts{$font->[1]->{name}}++;
 863     }
 864 }
 865
 866 # Emit the PostScript DSC header
 867 print "%!PS-Adobe-3.0\n";
 868 print "%%Pages: $curpage\n";
 869 print "%%BoundingBox: 0 0 ", $psconf{pagewidth}, ' ', $psconf{pageheight}, "\n";
 870 print "%%Creator: NASM psflow.pl\n";
 871 print "%%DocumentData: Clean7Bit\n";
 872 print "%%DocumentFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
 873 print "%%DocumentNeededFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
 874 print "%%Orientation: Portrait\n";
 875 print "%%PageOrder: Ascend\n";
 876 print "%%EndComments\n";
 877 print "%%BeginProlog\n";
 878
 879 # Emit the configurables as PostScript tokens
 880 foreach $c ( keys(%psconf) ) {
 881     print "/$c ", $psconf{$c}, " def\n";
 882 }
 883 foreach $c ( keys(%psbool) ) {
 884     print "/$c ", ($psbool{$c}?'true':'false'), " def\n";
 885 }
 886
 887 # Emit custom encoding vector
 888 $zstr = '/NASMEncoding [ ';
 889 foreach $c ( @NASMEncoding ) {
 890     my $z = '/'.(defined($c)?$c:'.notdef ').' ';
 891     if ( length($zstr)+length($z) > 72 ) {
 892         print $zstr,"\n";
 893         $zstr = ' ';
 894     }
 895     $zstr .= $z;
 896 }
 897 print $zstr, "] def\n";
 898
 899 # Font recoding routine
 900 # newname fontname --
 901 print "/nasmenc {\n";
 902 print "  findfont dup length dict begin\n";
 903 print "    { 1 index /FID ne {def}{pop pop} ifelse } forall\n";
 904 print "    /Encoding NASMEncoding def\n";
 905 print "    currentdict\n";
 906 print "  end\n";
 907 print "  definefont pop\n";
 908 print "} def\n";
 909
 910 # Emit fontset definitions
 911 foreach $fset ( @AllFonts ) {
 912     my $i = 0;
 913     my @zfonts = ();
 914     my %allfonts = ();
 915     foreach $font ( @{$fset->{fonts}} ) {
 916         $allfonts{$font->[1]->{name}}++;
 917     }
 918     foreach $font ( keys(%allfonts) ) {
 919         print '/',$font,'-NASM /',$font," nasmenc\n";
 920     }
 921     foreach $font ( @{$fset->{fonts}} ) {
 922         print '/', $fset->{name}, $i, ' ',
 923         '/', $font->[1]->{name}, '-NASM findfont ',
 924         $font->[0], " scalefont def\n";
 925         push(@zfonts, $fset->{name}.$i);
 926         $i++;
 927     }
 928     print '/', $fset->{name}, ' [', join(' ',@zfonts), "] def\n";
 929 }
 930
 931 # Emit the result as PostScript.  This is *NOT* correct code yet!
 932 open(PSHEAD, "< head.ps");
 933 while ( defined($line = <PSHEAD>) ) {
 934     print $line;
 935 }
 936 close(PSHEAD);
 937 print "%%EndProlog\n";
 938
 939 # Generate a PostScript string
 940 sub ps_string($) {
 941     my ($s) = @_;
 942     my ($i,$c);
 943     my ($o) = '(';
 944     my ($l) = length($s);
 945     for ( $i = 0 ; $i < $l ; $i++ ) {
 946         $c = substr($s,$i,1);
 947         if ( ord($c) < 32 || ord($c) > 126 ) {
 948             $o .= sprintf("\\%03o", ord($c));
 949         } elsif ( $c eq '(' || $c eq ')' || $c eq "\\" ) {
 950             $o .= "\\".$c;
 951         } else {
 952             $o .= $c;
 953         }
 954     }
 955     return $o.')';
 956 }
 957
 958 # Generate PDF bookmarks
 959 print "%%BeginSetup\n";
 960 foreach $b ( @bookmarks ) {
 961     print '[/Title ', ps_string($b->[2]), "\n";
 962     print '/Count ', $b->[1], ' ' if ( $b->[1] );
 963     print '/Dest /',$b->[0]," /OUT pdfmark\n";
 964 }
 965
 966 # Ask the PostScript interpreter for the proper size media
 967 print "setpagesize\n";
 968 print "%%EndSetup\n";
 969
 970 # Start a PostScript page
 971 sub ps_start_page() {
 972     $ps_page++;
 973     print "%%Page: $ps_page $ps_page\n";
 974     print "%%BeginPageSetup\n";
 975     print "save\n";
 976     print "%%EndPageSetup\n";
 977     print '/', $ps_page, " pa\n";
 978 }
 979
 980 # End a PostScript page
 981 sub ps_end_page($) {
 982     my($pn) = @_;
 983     if ( $pn ) {
 984         print "($ps_page)", (($ps_page & 1) ? 'pageodd' : 'pageeven'), "\n";
 985     }
 986     print "restore showpage\n";
 987 }
 988
 989 $ps_page = 0;
 990
 991 # Title page and inner cover
 992 ps_start_page();
 993 $title = $metadata{'title'};
 994 $title =~ s/ \- / $emdash /;
 995 $pstitle = ps_string($title);
 996
 997 # FIX THIS: This shouldn't be hard-coded like this
 998 print <<EOF;
 999 lmarg pageheight 2 mul 3 div moveto
1000 /Helvetica-Bold-NASM findfont 20 scalefont setfont
1001 /title linkdest ${pstitle} show
1002 lmarg pageheight 2 mul 3 div 10 sub moveto
1003 0 setlinecap 3 setlinewidth
1004 pagewidth lmarg sub rmarg sub 0 rlineto stroke
1005 /nasmlogo {
1006 gsave 1 dict begin
1007 /sz exch def
1008 /Courier-Bold findfont sz scalefont setfont
1009 moveto
1010 0.85 1.22 scale
1011 [(-~~..~:\#;L       .-:\#;L,.-   .~:\#:;.T  -~~.~:;. .~:;. )
1012 ( E8+U    *T     +U\'   *T\#  .97     *L   E8+\'  *;T\'  *;, )
1013 ( D97     \`*L  .97     \'*L   \"T;E+:,     D9     *L    *L )
1014 ( H7       I\#  T7       I\#        \"*:.   H7     I\#    I\# )
1015 ( U:       :8  *\#+    , :8  T,      79   U:     :8    :8 )
1016 (,\#B.     .IE,  \"T;E*  .IE, J *+;\#:T*\"  ,\#B.   .IE,  .IE,)] {
1017 currentpoint 3 -1 roll
1018 sz -0.10 mul 0 3 -1 roll ashow
1019 sz 0.72 mul sub moveto
1020 } forall
1021 end grestore
1022 } def
1023 0.6 setgray
1024 pagewidth 2 div 143 sub
1025 pageheight 2 div 33 add
1026 12 nasmlogo
1027 EOF
1028 ps_end_page(0);
1029
1030 $curpage = 2;
1031 ps_start_page();
1032 foreach $line ( @pslines ) {
1033     my $linfo = $line->[0];
1034
1035     if ( $$linfo[4] != $curpage ) {
1036         ps_end_page($curpage > 2);
1037         ps_start_page();
1038         $curpage = $$linfo[4];
1039     }
1040
1041     print '[';
1042     my $curfont = 0;
1043     foreach my $c ( @{$line->[1]} ) {
1044         if ( $$c[0] >= 0 ) {
1045             if ( $curfont != $$c[0] ) {
1046                 print ($curfont = $$c[0]);
1047             }
1048             print ps_string($$c[1]);
1049         } elsif ( $$c[0] == -1 ) {
1050             print '{el}';       # End link
1051         } elsif ( $$c[0] == -2 ) {
1052             print '{/',$$c[1],' xl}'; # xref link
1053         } elsif ( $$c[0] == -3 ) {
1054             print '{',ps_string($$c[1]),'wl}'; # web link
1055         } elsif ( $$c[0] == -4 ) {
1056             # Index anchor -- ignore
1057         } elsif ( $$c[0] == -5 ) {
1058             print '{/',$$c[1],' xa}'; #xref anchor
1059         } elsif ( $$c[0] == -6 ) {
1060             print '][';         # Start a new array
1061             $curfont = 0;
1062         } elsif ( $$c[0] == -7 ) {
1063             print '{/',$$c[1],' pl}'; # page link
1064         } else {
1065             die "Unknown annotation";
1066         }
1067     }
1068     print ']';
1069     if ( defined($$linfo[2]) ) {
1070         foreach my $x ( @{$$linfo[2]} ) {
1071             if ( $$x[0] == $AuxStr ) {
1072                 print ps_string($$x[1]);
1073             } elsif ( $$x[0] == $AuxPage ) {
1074                 print $ps_xref_page{$$x[1]},' ';
1075             } elsif ( $$x[0] == $AuxPageStr ) {
1076                 print ps_string($ps_xref_page{$$x[1]});
1077             } elsif ( $$x[0] == $AuxXRef ) {
1078                 print '/',ps_xref($$x[1]),' ';
1079             } elsif ( $$x[0] == $AuxNum ) {
1080                 print $$x[1],' ';
1081             } else {
1082                 die "Unknown auxilliary data type";
1083             }
1084         }
1085     }
1086     print ($psconf{pageheight}-$psconf{topmarg}-$$linfo[5]);
1087     print ' ', $$linfo[6] if ( defined($$linfo[6]) );
1088     print ' ', $$linfo[0].$$linfo[1], "\n";
1089 }
1090
1091 ps_end_page(1);
1092 print "%%EOF\n";