doc/genps.pl

   1 #!/usr/bin/perl
   2 #
   3 # Format the documentation as PostScript
   4 #
   5
   6 require 'psfonts.ph';           # The fonts we want to use
   7 require 'pswidth.ph';           # PostScript string width
   8
   9 use Fcntl;
  10
  11 #
  12 # PostScript configurables; these values are also available to the
  13 # PostScript code itself
  14 #
  15 %psconf = (
  16            pagewidth => 595,    # Page width in PostScript points
  17            pageheight => 792,   # Page height in PostScript points
  18            lmarg => 100,        # Left margin in PostScript points
  19            rmarg => 50,         # Right margin in PostScript points
  20            topmarg => 100,      # Top margin in PostScript points
  21            botmarg => 100,      # Bottom margin in PostScript points
  22            plmarg => 50,        # Page number position relative to left margin
  23            prmarg => 0,         # Page number position relative to right margin
  24            pymarg => 50,        # Page number position relative to bot margin
  25            startcopyright => 100, # How much above the bottom margin is the
  26                                   # copyright notice stuff
  27            bulladj => 12,       # How much to indent a bullet paragraph
  28            tocind => 12,        # TOC indentation per level
  29            tocpnz => 24,        # Width of TOC page number only zone
  30            tocdots => 8,        # Spacing between TOC dots
  31            idxspace => 24,      # Minimum space between index title and pg#
  32            idxindent => 32,     # How much to indent a subindex entry
  33            idxgutter => 24,     # Space between index columns
  34            idxcolumns => 2,     # Number of index columns
  35            );
  36
  37 %psbool = (
  38            colorlinks => 0,     # Set links in blue rather than black
  39            );
  40
  41 # Known paper sizes
  42 %papersizes = (
  43                'a4'     => [595, 842], # ISO standard paper size
  44                'letter' => [612, 792], # US common paper size
  45                'pa4'    => [595, 792], # Compromise ("portable a4")
  46                'b4'     => [709,1002], # ISO intermediate paper size
  47                'legal'  => [612,1008], # US intermediate paper size
  48                'a3'     => [842,1190], # ISO double paper size
  49                '11x17'  => [792,1224], # US double paper size
  50                );
  51
  52 #
  53 # Parse the command line
  54 #
  55 undef $input;
  56 while ( $arg = shift(@ARGV) ) {
  57     if ( $arg =~ /^\-(|no\-)(.*)$/ ) {
  58         $parm = $2;
  59         $true = ($1 eq '') ? 1 : 0;
  60         if ( $true && defined($papersizes{$parm}) ) {
  61             $psconf{pagewidth}  = $papersizes{$parm}->[0];
  62             $psconf{pageheight} = $papersizes{$parm}->[1];
  63         } elsif ( defined($psbool{$parm}) ) {
  64             $psbool{$parm} = $true;
  65         } elsif ( $true && defined($psconf{$parm}) ) {
  66             $psconf{$parm} = shift(@ARGV);
  67         } else {
  68             die "$0: Unknown option: $arg\n";
  69         }
  70     } else {
  71         $input = $arg;
  72     }
  73 }
  74
  75 #
  76 # Document formatting parameters
  77 #
  78 $paraskip = 6;                  # Space between paragraphs
  79 $chapstart = 30;                # Space before a chapter heading
  80 $chapskip = 24;                 # Space after a chapter heading
  81 $tocskip = 6;                   # Space between TOC entries
  82
  83 # Configure post-paragraph skips for each kind of paragraph
  84 %skiparray = ('chap' => $chapskip, 'appn' => $chapstart,
  85               'head' => $paraskip, 'subh' => $paraskip,
  86               'norm' => $paraskip, 'bull' => $paraskip,
  87               'code' => $paraskip, 'toc0' => $tocskip,
  88               'toc1' => $tocskip,  'toc2' => $tocskip);
  89
  90 #
  91 # Custom encoding vector.  This is basically the same as
  92 # ISOLatin1Encoding (a level 2 feature, so we dont want to use it),
  93 # but with a few extra characters thrown in.  It is basically a
  94 # modified Windows 1252 codepage, minus, for now, the euro sign (\200
  95 # is reserved for euro.)
  96 #
  97 @NASMEncoding =
  98 (
  99  (undef)x32,
 100  'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
 101  'ampersand', 'quoteright', 'parenleft',
 102  'parenright', 'asterisk', 'plus', 'comma', 'minus',
 103  'period', 'slash', 'zero', 'one', 'two', 'three',
 104  'four', 'five', 'six', 'seven', 'eight', 'nine',
 105  'colon', 'semicolon', 'less', 'equal', 'greater',
 106  'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
 107  'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q',
 108  'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
 109  'bracketleft', 'backslash', 'bracketright',
 110  'asciicircum', 'underscore', 'quoteleft', 'a', 'b',
 111  'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
 112  'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 113  'w', 'x', 'y', 'z', 'braceleft', 'bar',
 114  'braceright', 'asciitilde', undef,
 115  undef, 'macron', 'quotesinglbase', 'florin',
 116  'quotedblbase', 'ellipsis', 'dagger', 'dbldagger',
 117  'circumflex', 'perthousand', 'Scaron', 'guilsinglleft',
 118  'OE', 'hungarumlaut', 'Zcaron', 'caron',
 119  'ogonek', 'grave', 'quotesingle', 'quotedblleft',
 120  'quotedblright', 'bullet', 'endash', 'emdash',
 121  'tilde', 'trademark', 'scaron', 'guilsignlright',
 122  'oe', 'ring', 'zcaron', 'Ydieresis',
 123  'space', 'exclamdown', 'cent', 'sterling',
 124  'currency', 'yen', 'brokenbar', 'section',
 125  'dieresis', 'copyright', 'ordfeminine',
 126  'guillemotleft', 'logicalnot', 'hyphen',
 127  'registered', 'macron', 'degree', 'plusminus',
 128  'twosuperior', 'threesuperior', 'acute', 'mu',
 129  'paragraph', 'periodcentered', 'cedilla',
 130  'onesuperior', 'ordmasculine', 'guillemotright',
 131  'onequarter', 'onehalf', 'threequarters',
 132  'questiondown', 'Agrave', 'Aacute', 'Acircumflex',
 133  'Atilde', 'Adieresis', 'Aring', 'AE', 'Ccedilla',
 134  'Egrave', 'Eacute', 'Ecircumflex', 'Edieresis',
 135  'Igrave', 'Iacute', 'Icircumflex', 'Idieresis',
 136  'Eth', 'Ntilde', 'Ograve', 'Oacute', 'Ocircumflex',
 137  'Otilde', 'Odieresis', 'multiply', 'Oslash',
 138  'Ugrave', 'Uacute', 'Ucircumflex', 'Udieresis',
 139  'Yacute', 'Thorn', 'germandbls', 'agrave', 'aacute',
 140  'acircumflex', 'atilde', 'adieresis', 'aring', 'ae',
 141  'ccedilla', 'egrave', 'eacute', 'ecircumflex',
 142  'edieresis', 'igrave', 'iacute', 'icircumflex',
 143  'idieresis', 'eth', 'ntilde', 'ograve', 'oacute',
 144  'ocircumflex', 'otilde', 'odieresis', 'divide',
 145  'oslash', 'ugrave', 'uacute', 'ucircumflex',
 146  'udieresis', 'yacute', 'thorn', 'ydieresis'
 147 );
 148
 149 $emdash = "\227";
 150 $endash = "\226";
 151 $bullet = "\225";
 152
 153 #
 154 # First, format the stuff coming from the front end into
 155 # a cleaner representation
 156 #
 157 if ( defined($input) ) {
 158     sysopen(PARAS, $input, O_RDONLY) or
 159         die "$0: cannot open $input: $!\n";
 160 } else {
 161     open(PARAS, "<&STDIN") or die "$0: $!\n";
 162 }
 163 while ( defined($line = <PARAS>) ) {
 164     chomp $line;
 165     $data = <PARAS>;
 166     chomp $data;
 167     if ( $line =~ /^meta :(.*)$/ ) {
 168         $metakey = $1;
 169         $metadata{$metakey} = $data;
 170     } elsif ( $line =~ /^indx :(.*)$/ ) {
 171         $ixentry = $1;
 172         push(@ixentries, $ixentry);
 173         $ixterms{$ixentry} = [split(/\037/, $data)];
 174         # Look for commas.  This is easier done on the string
 175         # representation, so do it now.
 176         if ( $line =~ /^(.*\,)\037sp\037/ ) {
 177             $ixprefix = $1;
 178             $ixhasprefix{$ixentry} = $ixprefix;
 179             if ( !$ixprefixes{$ixprefix} ) {
 180                 $ixcommafirst{$ixentry}++;
 181             }
 182             $ixprefixes{$ixprefix}++;
 183         }
 184     } else {
 185         push(@ptypes, $line);
 186         push(@paras, [split(/\037/, $data)]);
 187     }
 188 }
 189 close(PARAS);
 190
 191 #
 192 # Convert an integer to a chosen base
 193 #
 194 sub int2base($$) {
 195     my($i,$b) = @_;
 196     my($s) = '';
 197     my($n) = '';
 198     my($z) = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
 199     return '0' if ($i == 0);
 200     if ( $i < 0 ) { $n = '-'; $i = -$i; }
 201     while ( $i ) {
 202         $s = substr($z,$i%$b,1) . $s;
 203         $i = int($i/$b);
 204     }
 205     return $n.$s;
 206 }
 207
 208 #
 209 # Convert a string to a rendering array
 210 #
 211 sub string2array($)
 212 {
 213     my($s) = @_;
 214     my(@a) = ();
 215
 216     while ( $s =~ /^(\s+|\S+)(.*)$/ ) {
 217         push(@a, [0,$1]);
 218         $s = $2;
 219     }
 220
 221     return @a;
 222 }
 223
 224 #
 225 # Take a crossreference name and generate the PostScript name for it.
 226 #
 227 # This hack produces a somewhat smaller PDF...
 228 #%ps_xref_list = ();
 229 #$ps_xref_next = 0;
 230 #sub ps_xref($) {
 231 #    my($s) = @_;
 232 #    my $q = $ps_xref_list{$s};
 233 #    return $q if ( defined($ps_xref_list{$s}) );
 234 #    $q = 'X'.int2base($ps_xref_next++, 52);
 235 #    $ps_xref_list{$s} = $q;
 236 #    return $q;
 237 #}
 238
 239 # Somewhat bigger PDF, but one which obeys # URLs
 240 sub ps_xref($) {
 241     return @_[0];
 242 }
 243
 244 #
 245 # Flow lines according to a particular font set and width
 246 #
 247 # A "font set" is represented as an array containing
 248 # arrays of pairs: [<size>, <metricref>]
 249 #
 250 # Each line is represented as:
 251 # [ [type,first|last,aux,fontset,page,ypos,optional col],
 252 #   [rendering array] ]
 253 #
 254 # A space character may be "squeezed" by up to this much
 255 # (as a fraction of the normal width of a space.)
 256 #
 257 $ps_space_squeeze = 0.00;       # Min space width 100%
 258 sub ps_flow_lines($$$@) {
 259     my($wid, $fontset, $type, @data) = @_;
 260     my($fonts) = $$fontset{fonts};
 261     my($e);
 262     my($w)  = 0;                # Width of current line
 263     my($sw) = 0;                # Width of current line due to spaces
 264     my(@l)  = ();               # Current line
 265     my(@ls) = ();               # Accumulated output lines
 266     my(@xd) = ();               # Metadata that goes with subsequent text
 267     my $hasmarker = 0;          # Line has -6 marker
 268     my $pastmarker = 0;         # -6 marker found
 269
 270     # If there is a -6 marker anywhere in the paragraph,
 271     # *each line* output needs to have a -6 marker
 272     foreach $e ( @data ) {
 273         $hasmarker = 1 if ( $$e[0] == -6 );
 274     }
 275
 276     $w = 0;
 277     foreach $e ( @data ) {
 278         if ( $$e[0] < 0 ) {
 279             # Type is metadata.  Zero width.
 280             if ( $$e[0] == -6 ) {
 281                 $pastmarker = 1;
 282             }
 283             if ( $$e[0] == -1 || $$e[0] == -6 ) {
 284                 # -1 (end anchor) or -6 (marker) goes with the preceeding
 285                 # text, otherwise with the subsequent text
 286                 push(@l, $e);
 287             } else {
 288                 push(@xd, $e);
 289             }
 290         } else {
 291             my $ew = ps_width($$e[1], $fontset->{fonts}->[$$e[0]][1],
 292                               \@NASMEncoding) *
 293                 ($fontset->{fonts}->[$$e[0]][0]/1000);
 294             my $sp = $$e[1];
 295             $sp =~ tr/[^ ]//d;  # Delete nonspaces
 296             my $esw = ps_width($sp, $fontset->{fonts}->[$$e[0]][1],
 297                                \@NASMEncoding) *
 298                 ($fontset->{fonts}->[$$e[0]][0]/1000);
 299
 300             if ( ($w+$ew) - $ps_space_squeeze*($sw+$esw) > $wid ) {
 301                 # Begin new line
 302                 # Search backwards for previous space chunk
 303                 my $lx = scalar(@l)-1;
 304                 my @rm = ();
 305                 while ( $lx >= 0 ) {
 306                     while ( $lx >= 0 && $l[$lx]->[0] < 0 ) {
 307                         # Skip metadata
 308                         $pastmarker = 0 if ( $l[$lx]->[0] == -6 );
 309                         $lx--;
 310                     };
 311                     if ( $lx >= 0 ) {
 312                         if ( $l[$lx]->[1] eq ' ' ) {
 313                             splice(@l, $lx, 1);
 314                             @rm = splice(@l, $lx);
 315                             last; # Found place to break
 316                         } else {
 317                             $lx--;
 318                         }
 319                     }
 320                 }
 321
 322                 # Now @l contains the stuff to remain on the old line
 323                 # If we broke the line inside a link, then split the link
 324                 # into two.
 325                 my $lkref = undef;
 326                 foreach my $lc ( @l ) {
 327                     if ( $$lc[0] == -2 || $$lc[0] == -3 || $lc[0] == -7 ) {
 328                         $lkref = $lc;
 329                     } elsif ( $$lc[0] == -1 ) {
 330                         undef $lkref;
 331                     }
 332                 }
 333
 334                 if ( defined($lkref) ) {
 335                     push(@l, [-1,undef]); # Terminate old reference
 336                     unshift(@rm, $lkref); # Duplicate reference on new line
 337                 }
 338
 339                 if ( $hasmarker ) {
 340                     if ( $pastmarker ) {
 341                         unshift(@rm,[-6,undef]); # New line starts with marker
 342                     } else {
 343                         push(@l,[-6,undef]); # Old line ends with marker
 344                     }
 345                 }
 346
 347                 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]);
 348                 @l = @rm;
 349
 350                 $w = $sw = 0;
 351                 # Compute the width of the remainder array
 352                 for my $le ( @l ) {
 353                     if ( $$le[0] >= 0 ) {
 354                         my $xew = ps_width($$le[1],
 355                                            $fontset->{fonts}->[$$le[0]][1],
 356                                            \@NASMEncoding) *
 357                             ($fontset->{fonts}->[$$le[0]][0]/1000);
 358                         my $xsp = $$le[1];
 359                         $xsp =~ tr/[^ ]//d;     # Delete nonspaces
 360                         my $xsw = ps_width($xsp,
 361                                            $fontset->{fonts}->[$$le[0]][1],
 362                                            \@NASMEncoding) *
 363                             ($fontset->{fonts}->[$$le[0]][0]/1000);
 364                         $w += $xew;  $sw += $xsw;
 365                     }
 366                 }
 367             }
 368             push(@l, @xd);      # Accumulated metadata
 369             @xd = ();
 370             if ( $$e[1] ne '' ) {
 371                 push(@l, $e);
 372                 $w += $ew; $sw += $esw;
 373             }
 374         }
 375     }
 376     push(@l,@xd);
 377     if ( scalar(@l) ) {
 378         push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]); # Final line
 379     }
 380
 381     # Mark the first line as first and the last line as last
 382     if ( scalar(@ls) ) {
 383         $ls[0]->[0]->[1] |= 1;     # First in para
 384         $ls[-1]->[0]->[1] |= 2;    # Last in para
 385     }
 386     return @ls;
 387 }
 388
 389 #
 390 # Once we have broken things into lines, having multiple chunks
 391 # with the same font index is no longer meaningful.  Merge
 392 # adjacent chunks to keep down the size of the whole file.
 393 #
 394 sub ps_merge_chunks(@) {
 395     my(@ci) = @_;
 396     my($c, $lc);
 397     my(@co, $eco);
 398
 399     undef $lc;
 400     @co = ();
 401     $eco = -1;                  # Index of the last entry in @co
 402     foreach $c ( @ci ) {
 403         if ( defined($lc) && $$c[0] == $lc && $$c[0] >= 0 ) {
 404             $co[$eco]->[1] .= $$c[1];
 405         } else {
 406             push(@co, $c);  $eco++;
 407             $lc = $$c[0];
 408         }
 409     }
 410     return @co;
 411 }
 412
 413 #
 414 # Convert paragraphs to rendering arrays.  Each
 415 # element in the array contains (font, string),
 416 # where font can be one of:
 417 # -1 end link
 418 # -2 begin crossref
 419 # -3 begin weblink
 420 # -4 index item anchor
 421 # -5 crossref anchor
 422 # -6 left/right marker (used in the index)
 423 # -7 page link (used in the index)
 424 #  0 normal
 425 #  1 empatic (italic)
 426 #  2 code (fixed spacing)
 427 #
 428
 429 sub mkparaarray($@) {
 430     my($ptype, @chunks) = @_;
 431
 432     my @para = ();
 433     my $in_e = 0;
 434     my $chunk;
 435
 436     if ( $ptype =~ /^code/ ) {
 437         foreach $chunk ( @chunks ) {
 438             push(@para, [2, $chunk]);
 439         }
 440     } else {
 441         foreach $chunk ( @chunks ) {
 442             my $type = substr($chunk,0,2);
 443             my $text = substr($chunk,2);
 444
 445             if ( $type eq 'sp' ) {
 446                 push(@para, [$in_e?1:0, ' ']);
 447             } elsif ( $type eq 'da' ) {
 448                 push(@para, [$in_e?1:0, $endash]);
 449             } elsif ( $type eq 'n ' ) {
 450                 push(@para, [0, $text]);
 451                 $in_e = 0;
 452             } elsif ( $type =~ '^e' ) {
 453                 push(@para, [1, $text]);
 454                 $in_e = ($type eq 'es' || $type eq 'e ');
 455             } elsif ( $type eq 'c ' ) {
 456                 push(@para, [2, $text]);
 457                 $in_e = 0;
 458             } elsif ( $type eq 'x ' ) {
 459                 push(@para, [-2, ps_xref($text)]);
 460             } elsif ( $type eq 'xe' ) {
 461                 push(@para, [-1, undef]);
 462             } elsif ( $type eq 'wc' || $type eq 'w ' ) {
 463                 $text =~ /\<(.*)\>(.*)$/;
 464                 my $link = $1; $text = $2;
 465                 push(@para, [-3, $link]);
 466                 push(@para, [($type eq 'wc') ? 2:0, $text]);
 467                 push(@para, [-1, undef]);
 468                 $in_e = 0;
 469             } elsif ( $type eq 'i ' ) {
 470                 push(@para, [-4, $text]);
 471             } else {
 472                 die "Unexpected paragraph chunk: $chunk";
 473             }
 474         }
 475     }
 476     return @para;
 477 }
 478
 479 $npara = scalar(@paras);
 480 for ( $i = 0 ; $i < $npara ; $i++ ) {
 481     $paras[$i] = [mkparaarray($ptypes[$i], @{$paras[$i]})];
 482 }
 483
 484 #
 485 # This converts a rendering array to a simple string
 486 #
 487 sub ps_arraytostr(@) {
 488     my $s = '';
 489     my $c;
 490     foreach $c ( @_ ) {
 491         $s .= $$c[1] if ( $$c[0] >= 0 );
 492     }
 493     return $s;
 494 }
 495
 496 #
 497 # This generates a duplicate of a paragraph
 498 #
 499 sub ps_dup_para(@) {
 500     my(@i) = @_;
 501     my(@o) = ();
 502     my($c);
 503
 504     foreach $c ( @i ) {
 505         my @cc = @{$c};
 506         push(@o, [@cc]);
 507     }
 508     return @o;
 509 }
 510
 511 #
 512 # This generates a duplicate of a paragraph, stripping anchor
 513 # tags (-4 and -5)
 514 #
 515 sub ps_dup_para_noanchor(@) {
 516     my(@i) = @_;
 517     my(@o) = ();
 518     my($c);
 519
 520     foreach $c ( @i ) {
 521         my @cc = @{$c};
 522         push(@o, [@cc]) unless ( $cc[0] == -4 || $cc[0] == -5 );
 523     }
 524     return @o;
 525 }
 526
 527 #
 528 # Scan for header paragraphs and fix up their contents;
 529 # also generate table of contents and PDF bookmarks.
 530 #
 531 @tocparas = ([[-5, 'contents'], [0,'Contents']]);
 532 @tocptypes = ('chap');
 533 @bookmarks = (['title', 0, 'Title Page'], ['contents', 0, 'Contents']);
 534 %bookref = ();
 535 for ( $i = 0 ; $i < $npara ; $i++ ) {
 536     my $xtype = $ptypes[$i];
 537     my $ptype = substr($xtype,0,4);
 538     my $str;
 539     my $book;
 540
 541     if ( $ptype eq 'chap' || $ptype eq 'appn' ) {
 542         unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
 543             die "Bad para";
 544         }
 545         my $secn = $1;
 546         my $sech = $2;
 547         my $xref = ps_xref($sech);
 548         my $chap = ($ptype eq 'chap')?'Chapter':'Appendix';
 549
 550         $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
 551         push(@bookmarks, $book);
 552         $bookref{$secn} = $book;
 553
 554         push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
 555         push(@tocptypes, 'toc0'.' :'.$sech.':'.$chap.' '.$secn.':');
 556
 557         unshift(@{$paras[$i]},
 558                 [-5, $xref], [0,$chap.' '.$secn.':'], [0, ' ']);
 559     } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
 560         unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
 561             die "Bad para";
 562         }
 563         my $secn = $1;
 564         my $sech = $2;
 565         my $xref = ps_xref($sech);
 566         my $pref;
 567         $pref = $secn; $pref =~ s/\.[^\.]+$//; # Find parent node
 568
 569         $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
 570         push(@bookmarks, $book);
 571         $bookref{$secn} = $book;
 572         $bookref{$pref}->[1]--; # Adjust count for parent node
 573
 574         push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
 575         push(@tocptypes,
 576              (($ptype eq 'subh') ? 'toc2':'toc1').' :'.$sech.':'.$secn);
 577
 578         unshift(@{$paras[$i]}, [-5, $xref]);
 579     }
 580 }
 581
 582 #
 583 # Add TOC to beginning of paragraph list
 584 #
 585 unshift(@paras,  @tocparas);  undef @tocparas;
 586 unshift(@ptypes, @tocptypes); undef @tocptypes;
 587
 588 #
 589 # Add copyright notice to the beginning
 590 #
 591 unshift(@paras, [[0, "\251"], [0, ' '], [0,$metadata{'year'}],
 592                  [0, ' '], string2array($metadata{'author'})],
 593         [[0, ' ']], [string2array($metadata{'license'})]);
 594 unshift(@ptypes, 'norm', 'norm', 'norm');
 595
 596 $npara = scalar(@paras);
 597
 598 #
 599 # No lines generated, yet.
 600 #
 601 @pslines    = ();
 602
 603 #
 604 # Line Auxilliary Information Types
 605 #
 606 $AuxStr     = 1;                # String
 607 $AuxPage    = 2;                # Page number (from xref)
 608 $AuxPageStr = 3;                # Page number as a PostScript string
 609 $AuxXRef    = 4;                # Cross reference as a name
 610 $AuxNum     = 5;                # Number
 611
 612 #
 613 # Break or convert paragraphs into lines, and push them
 614 # onto the @pslines array.
 615 #
 616 sub ps_break_lines($$) {
 617     my ($paras,$ptypes) = @_;
 618
 619     my $linewidth  = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
 620     my $bullwidth  = $linewidth-$psconf{bulladj};
 621     my $indxwidth  = ($linewidth-$psconf{idxgutter})/$psconf{idxcolumns}
 622                      -$psconf{idxspace};
 623
 624     my $npara = scalar(@{$paras});
 625     my $i;
 626
 627     for ( $i = 0 ; $i < $npara ; $i++ ) {
 628         my $xtype = $ptypes->[$i];
 629         my $ptype = substr($xtype,0,4);
 630         my @data = @{$paras->[$i]};
 631         my @ls = ();
 632         if ( $ptype eq 'code' ) {
 633             my $p;
 634             # Code paragraph; each chunk is a line
 635             foreach $p ( @data ) {
 636                 push(@ls, [[$ptype,0,undef,\%BodyFont,0,0],[$p]]);
 637             }
 638             $ls[0]->[0]->[1] |= 1;           # First in para
 639             $ls[-1]->[0]->[1] |= 2;      # Last in para
 640         } elsif ( $ptype eq 'chap' || $ptype eq 'appn' ) {
 641             # Chapters are flowed normally, but in an unusual font
 642             @ls = ps_flow_lines($linewidth, \%ChapFont, $ptype, @data);
 643         } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
 644             unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
 645                 die "Bad para";
 646             }
 647             my $secn = $1;
 648             my $sech = $2;
 649             my $font = ($ptype eq 'head') ? \%HeadFont : \%SubhFont;
 650             @ls = ps_flow_lines($linewidth, $font, $ptype, @data);
 651             # We need the heading number as auxillary data
 652             $ls[0]->[0]->[2] = [[$AuxStr,$secn]];
 653         } elsif ( $ptype eq 'norm' ) {
 654             @ls = ps_flow_lines($linewidth, \%BodyFont, $ptype, @data);
 655         } elsif ( $ptype eq 'bull' ) {
 656             @ls = ps_flow_lines($bullwidth, \%BodyFont, $ptype, @data);
 657         } elsif ( $ptype =~ /^toc/ ) {
 658             unless ( $xtype =~/^\S+ :([^:]*):(.*)$/ ) {
 659                 die "Bad para";
 660             }
 661             my $xref = $1;
 662             my $refname = $2.' ';
 663             my $ntoc = substr($ptype,3,1)+0;
 664             my $refwidth = ps_width($refname, $BodyFont{fonts}->[0][1],
 665                                     \@NASMEncoding) *
 666                 ($BodyFont{fonts}->[0][0]/1000);
 667
 668             @ls = ps_flow_lines($linewidth-$ntoc*$psconf{tocind}-
 669                                 $psconf{tocpnz}-$refwidth,
 670                                 \%BodyFont, $ptype, @data);
 671
 672             # Auxilliary data: for the first line, the cross reference symbol
 673             # and the reference name; for all lines but the first, the
 674             # reference width; and for the last line, the page number
 675             # as a string.
 676             my $nl = scalar(@ls);
 677             $ls[0]->[0]->[2] = [[$AuxStr,$refname], [$AuxXRef,$xref]];
 678             for ( $j = 1 ; $j < $nl ; $j++ ) {
 679                 $ls[$j]->[0]->[2] = [[$AuxNum,$refwidth]];
 680             }
 681             push(@{$ls[$nl-1]->[0]->[2]}, [$AuxPageStr,$xref]);
 682         } elsif ( $ptype =~ /^idx/ ) {
 683             my $lvl = substr($ptype,3,1)+0;
 684
 685             @ls = ps_flow_lines($indxwidth-$lvl*$psconf{idxindent},
 686                                 \%BodyFont, $ptype, @data);
 687         } else {
 688             die "Unknown para type: $ptype";
 689         }
 690         # Merge adjacent identical chunks
 691         foreach $l ( @ls ) {
 692             @{$$l[1]} = ps_merge_chunks(@{$$l[1]});
 693         }
 694         push(@pslines,@ls);
 695     }
 696 }
 697
 698 # Break the main body text into lines.
 699 ps_break_lines(\@paras, \@ptypes);
 700
 701 #
 702 # Break lines in to pages
 703 #
 704
 705 # Where to start on page 2, the copyright page
 706 $curpage = 2;                   # Start on page 2
 707 $curypos = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg}-
 708     $psconf{startcopyright};
 709 undef $columnstart;             # Not outputting columnar text
 710 undef $curcolumn;               # Current column
 711 $nlines = scalar(@pslines);
 712
 713 #
 714 # This formats lines inside the global @pslines array into pages,
 715 # updating the page and y-coordinate entries.  Start at the
 716 # $startline position in @pslines and go to but not including
 717 # $endline.  The global variables $curpage, $curypos, $columnstart
 718 # and $curcolumn are updated appropriately.
 719 #
 720 sub ps_break_pages($$) {
 721     my($startline, $endline) = @_;
 722
 723     # Paragraph types which should never be broken
 724     my $nobreakregexp = "^(chap|appn|head|subh|toc.|idx.)\$";
 725     # Paragraph types which are heading (meaning they should not be broken
 726     # immediately after)
 727     my $headingregexp = "^(chap|appn|head|subh)\$";
 728     # Paragraph types which are set in columnar format
 729     my $columnregexp = "^idx.\$";
 730
 731     my $upageheight = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg};
 732
 733     my $i;
 734
 735     for ( $i = $startline ; $i < $endline ; $i++ ) {
 736         my $linfo = $pslines[$i]->[0];
 737         if ( ($$linfo[0] eq 'chap' || $$linfo[0] eq 'appn' )
 738              && ($$linfo[1] & 1) ) {
 739             # First line of a new chapter heading.  Start a new page.
 740             undef $columnstart;
 741             $curpage++ if ( $curypos > 0 || defined($columnstart) );
 742             $curypos = $chapstart;
 743         } elsif ( defined($columnstart) && $$linfo[0] !~ /$columnregexp/o ) {
 744             undef $columnstart;
 745             $curpage++;
 746             $curypos = 0;
 747         }
 748
 749         if ( $$linfo[0] =~ /$columnregexp/o && !defined($columnstart) ) {
 750             $columnstart = $curypos;
 751             $curcolumn = 0;
 752         }
 753
 754         # Adjust position by the appropriate leading
 755         $curypos += $$linfo[3]->{leading};
 756
 757         # Record the page and y-position
 758         $$linfo[4] = $curpage;
 759         $$linfo[5] = $curypos;
 760         $$linfo[6] = $curcolumn if ( defined($columnstart) );
 761
 762         if ( $curypos > $upageheight ) {
 763             # We need to break the page before this line.
 764             my $broken = 0;             # No place found yet
 765             while ( !$broken && $pslines[$i]->[0]->[4] == $curpage ) {
 766                 my $linfo = $pslines[$i]->[0];
 767                 my $pinfo = $pslines[$i-1]->[0];
 768
 769                 if ( $$linfo[1] == 2 ) {
 770                     # This would be an orphan, don't break.
 771                 } elsif ( $$linfo[1] & 1 ) {
 772                     # Sole line or start of paragraph.  Break unless
 773                     # the previous line was part of a heading.
 774                     $broken = 1 if ( $$pinfo[0] !~ /$headingregexp/o );
 775                 } else {
 776                     # Middle of paragraph.  Break unless we're in a
 777                     # no-break paragraph, or the previous line would
 778                     # end up being a widow.
 779                     $broken = 1 if ( $$linfo[0] !~ /$nobreakregexp/o &&
 780                                      $$pinfo[1] != 1 );
 781                 }
 782                 $i--;
 783             }
 784             die "Nowhere to break page $curpage\n" if ( !$broken );
 785             # Now $i should point to line immediately before the break, i.e.
 786             # the next paragraph should be the first on the new page
 787             if ( defined($columnstart) &&
 788                  ++$curcolumn < $psconf{idxcolumns} ) {
 789                 # We're actually breaking text into columns, not pages
 790                 $curypos = $columnstart;
 791             } else {
 792                 undef $columnstart;
 793                 $curpage++;
 794                 $curypos = 0;
 795             }
 796             next;
 797         }
 798
 799         # Add end of paragraph skip
 800         if ( $$linfo[1] & 2 ) {
 801             $curypos += $skiparray{$$linfo[0]};
 802         }
 803     }
 804 }
 805
 806 ps_break_pages(0,$nlines);      # Break the main text body into pages
 807
 808 #
 809 # Find the page number of all the indices
 810 #
 811 %ps_xref_page   = ();           # Crossref anchor pages
 812 %ps_index_pages = ();           # Index item pages
 813 $nlines = scalar(@pslines);
 814 for ( $i = 0 ; $i < $nlines ; $i++ ) {
 815     my $linfo = $pslines[$i]->[0];
 816     foreach my $c ( @{$pslines[$i]->[1]} ) {
 817         if ( $$c[0] == -4 ) {
 818             if ( !defined($ps_index_pages{$$c[1]}) ) {
 819                 $ps_index_pages{$$c[1]} = [];
 820             } elsif ( $ps_index_pages{$$c[1]}->[-1] eq $$linfo[4] ) {
 821                 # Pages are emitted in order; if this is a duplicated
 822                 # entry it will be the last one
 823                 next;           # Duplicate
 824             }
 825             push(@{$ps_index_pages{$$c[1]}}, $$linfo[4]);
 826         } elsif ( $$c[0] == -5 ) {
 827             $ps_xref_page{$$c[1]} = $$linfo[4];
 828         }
 829     }
 830 }
 831
 832 #
 833 # Emit index paragraphs
 834 #
 835 $startofindex = scalar(@pslines);
 836 @ixparas = ([[-5,'index'],[0,'Index']]);
 837 @ixptypes = ('chap');
 838
 839 foreach $k ( @ixentries ) {
 840     my $n,$i;
 841     my $ixptype = 'idx0';
 842     my @ixpara = mkparaarray('idx0',@{$ixterms{$k}});
 843
 844     push(@ixpara, [-6,undef]);  # Left/right marker
 845     $i = 1;  $n = scalar(@{$ps_index_pages{$k}});
 846     foreach $p ( @{$ps_index_pages{$k}} ) {
 847         if ( $i++ == $n ) {
 848             push(@ixpara,[-7,$p],[0,"$p"],[-1,undef]);
 849         } else {
 850             push(@ixpara,[-7,$p],[0,"$p,"],[-1,undef],[0,' ']);
 851         }
 852     }
 853
 854     push(@ixparas, [@ixpara]);
 855     push(@ixptypes, $ixptype);
 856 }
 857
 858 #
 859 # Flow index paragraphs into lines
 860 #
 861 ps_break_lines(\@ixparas, \@ixptypes);
 862
 863 #
 864 # Format index into pages
 865 #
 866 $nlines = scalar(@pslines);
 867 ps_break_pages($startofindex, $nlines);
 868
 869 #
 870 # Push index onto bookmark list
 871 #
 872 push(@bookmarks, ['index', 0, 'Index']);
 873
 874 # Get the list of fonts used
 875 %ps_all_fonts = ();
 876 foreach $fset ( @AllFonts ) {
 877     foreach $font ( @{$fset->{fonts}} ) {
 878         $ps_all_fonts{$font->[1]->{name}}++;
 879     }
 880 }
 881
 882 # Emit the PostScript DSC header
 883 print "%!PS-Adobe-3.0\n";
 884 print "%%Pages: $curpage\n";
 885 print "%%BoundingBox: 0 0 ", $psconf{pagewidth}, ' ', $psconf{pageheight}, "\n";
 886 print "%%Creator: NASM psflow.pl\n";
 887 print "%%DocumentData: Clean7Bit\n";
 888 print "%%DocumentFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
 889 print "%%DocumentNeededFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
 890 print "%%Orientation: Portrait\n";
 891 print "%%PageOrder: Ascend\n";
 892 print "%%EndComments\n";
 893 print "%%BeginProlog\n";
 894
 895 # Emit the configurables as PostScript tokens
 896 foreach $c ( keys(%psconf) ) {
 897     print "/$c ", $psconf{$c}, " def\n";
 898 }
 899 foreach $c ( keys(%psbool) ) {
 900     print "/$c ", ($psbool{$c}?'true':'false'), " def\n";
 901 }
 902
 903 # Emit custom encoding vector
 904 $zstr = '/NASMEncoding [ ';
 905 foreach $c ( @NASMEncoding ) {
 906     my $z = '/'.(defined($c)?$c:'.notdef ').' ';
 907     if ( length($zstr)+length($z) > 72 ) {
 908         print $zstr,"\n";
 909         $zstr = ' ';
 910     }
 911     $zstr .= $z;
 912 }
 913 print $zstr, "] def\n";
 914
 915 # Font recoding routine
 916 # newname fontname --
 917 print "/nasmenc {\n";
 918 print "  findfont dup length dict begin\n";
 919 print "    { 1 index /FID ne {def}{pop pop} ifelse } forall\n";
 920 print "    /Encoding NASMEncoding def\n";
 921 print "    currentdict\n";
 922 print "  end\n";
 923 print "  definefont pop\n";
 924 print "} def\n";
 925
 926 # Emit fontset definitions
 927 foreach $fset ( @AllFonts ) {
 928     my $i = 0;
 929     my @zfonts = ();
 930     my %allfonts = ();
 931     foreach $font ( @{$fset->{fonts}} ) {
 932         $allfonts{$font->[1]->{name}}++;
 933     }
 934     foreach $font ( keys(%allfonts) ) {
 935         print '/',$font,'-NASM /',$font," nasmenc\n";
 936     }
 937     foreach $font ( @{$fset->{fonts}} ) {
 938         print '/', $fset->{name}, $i, ' ',
 939         '/', $font->[1]->{name}, '-NASM findfont ',
 940         $font->[0], " scalefont def\n";
 941         push(@zfonts, $fset->{name}.$i);
 942         $i++;
 943     }
 944     print '/', $fset->{name}, ' [', join(' ',@zfonts), "] def\n";
 945 }
 946
 947 # Emit the result as PostScript.  This is *NOT* correct code yet!
 948 open(PSHEAD, "< head.ps");
 949 while ( defined($line = <PSHEAD>) ) {
 950     print $line;
 951 }
 952 close(PSHEAD);
 953 print "%%EndProlog\n";
 954
 955 # Generate a PostScript string
 956 sub ps_string($) {
 957     my ($s) = @_;
 958     my ($i,$c);
 959     my ($o) = '(';
 960     my ($l) = length($s);
 961     for ( $i = 0 ; $i < $l ; $i++ ) {
 962         $c = substr($s,$i,1);
 963         if ( ord($c) < 32 || ord($c) > 126 ) {
 964             $o .= sprintf("\\%03o", ord($c));
 965         } elsif ( $c eq '(' || $c eq ')' || $c eq "\\" ) {
 966             $o .= "\\".$c;
 967         } else {
 968             $o .= $c;
 969         }
 970     }
 971     return $o.')';
 972 }
 973
 974 # Generate PDF bookmarks
 975 print "%%BeginSetup\n";
 976 foreach $b ( @bookmarks ) {
 977     print '[/Title ', ps_string($b->[2]), "\n";
 978     print '/Count ', $b->[1], ' ' if ( $b->[1] );
 979     print '/Dest /',$b->[0]," /OUT pdfmark\n";
 980 }
 981
 982 # Ask the PostScript interpreter for the proper size media
 983 print "setpagesize\n";
 984 print "%%EndSetup\n";
 985
 986 # Start a PostScript page
 987 sub ps_start_page() {
 988     $ps_page++;
 989     print "%%Page: $ps_page $ps_page\n";
 990     print "%%BeginPageSetup\n";
 991     print "save\n";
 992     print "%%EndPageSetup\n";
 993     print '/', $ps_page, " pa\n";
 994 }
 995
 996 # End a PostScript page
 997 sub ps_end_page($) {
 998     my($pn) = @_;
 999     if ( $pn ) {
1000         print "($ps_page)", (($ps_page & 1) ? 'pageodd' : 'pageeven'), "\n";
1001     }
1002     print "restore showpage\n";
1003 }
1004
1005 $ps_page = 0;
1006
1007 # Title page and inner cover
1008 ps_start_page();
1009 $title = $metadata{'title'};
1010 $title =~ s/ \- / $emdash /;
1011 $pstitle = ps_string($title);
1012
1013 # FIX THIS: This shouldn't be hard-coded like this
1014 print <<EOF;
1015 lmarg pageheight 2 mul 3 div moveto
1016 tfont0 setfont
1017 /title linkdest ${pstitle} show
1018 lmarg pageheight 2 mul 3 div 10 sub moveto
1019 0 setlinecap 3 setlinewidth
1020 pagewidth lmarg sub rmarg sub 0 rlineto stroke
1021 /nasmlogo {
1022 gsave 1 dict begin
1023 /sz exch def
1024 /Courier-Bold findfont sz scalefont setfont
1025 moveto
1026 0.85 1.22 scale
1027 [(-~~..~:\#;L       .-:\#;L,.-   .~:\#:;.T  -~~.~:;. .~:;. )
1028 ( E8+U    *T     +U\'   *T\#  .97     *L   E8+\'  *;T\'  *;, )
1029 ( D97     \`*L  .97     \'*L   \"T;E+:,     D9     *L    *L )
1030 ( H7       I\#  T7       I\#        \"*:.   H7     I\#    I\# )
1031 ( U:       :8  *\#+    , :8  T,      79   U:     :8    :8 )
1032 (,\#B.     .IE,  \"T;E*  .IE, J *+;\#:T*\"  ,\#B.   .IE,  .IE,)] {
1033 currentpoint 3 -1 roll
1034 sz -0.10 mul 0 3 -1 roll ashow
1035 sz 0.72 mul sub moveto
1036 } forall
1037 end grestore
1038 } def
1039 0.6 setgray
1040 pagewidth 2 div 143 sub
1041 pageheight 2 div 33 add
1042 12 nasmlogo
1043 EOF
1044 ps_end_page(0);
1045
1046 $curpage = 2;
1047 ps_start_page();
1048 foreach $line ( @pslines ) {
1049     my $linfo = $line->[0];
1050
1051     if ( $$linfo[4] != $curpage ) {
1052         ps_end_page($curpage > 2);
1053         ps_start_page();
1054         $curpage = $$linfo[4];
1055     }
1056
1057     print '[';
1058     my $curfont = 0;
1059     foreach my $c ( @{$line->[1]} ) {
1060         if ( $$c[0] >= 0 ) {
1061             if ( $curfont != $$c[0] ) {
1062                 print ($curfont = $$c[0]);
1063             }
1064             print ps_string($$c[1]);
1065         } elsif ( $$c[0] == -1 ) {
1066             print '{el}';       # End link
1067         } elsif ( $$c[0] == -2 ) {
1068             print '{/',$$c[1],' xl}'; # xref link
1069         } elsif ( $$c[0] == -3 ) {
1070             print '{',ps_string($$c[1]),'wl}'; # web link
1071         } elsif ( $$c[0] == -4 ) {
1072             # Index anchor -- ignore
1073         } elsif ( $$c[0] == -5 ) {
1074             print '{/',$$c[1],' xa}'; #xref anchor
1075         } elsif ( $$c[0] == -6 ) {
1076             print '][';         # Start a new array
1077             $curfont = 0;
1078         } elsif ( $$c[0] == -7 ) {
1079             print '{/',$$c[1],' pl}'; # page link
1080         } else {
1081             die "Unknown annotation";
1082         }
1083     }
1084     print ']';
1085     if ( defined($$linfo[2]) ) {
1086         foreach my $x ( @{$$linfo[2]} ) {
1087             if ( $$x[0] == $AuxStr ) {
1088                 print ps_string($$x[1]);
1089             } elsif ( $$x[0] == $AuxPage ) {
1090                 print $ps_xref_page{$$x[1]},' ';
1091             } elsif ( $$x[0] == $AuxPageStr ) {
1092                 print ps_string($ps_xref_page{$$x[1]});
1093             } elsif ( $$x[0] == $AuxXRef ) {
1094                 print '/',ps_xref($$x[1]),' ';
1095             } elsif ( $$x[0] == $AuxNum ) {
1096                 print $$x[1],' ';
1097             } else {
1098                 die "Unknown auxilliary data type";
1099             }
1100         }
1101     }
1102     print ($psconf{pageheight}-$psconf{topmarg}-$$linfo[5]);
1103     print ' ', $$linfo[6] if ( defined($$linfo[6]) );
1104     print ' ', $$linfo[0].$$linfo[1], "\n";
1105 }
1106
1107 ps_end_page(1);
1108 print "%%EOF\n";