doc/genps.pl

   1 #!/usr/bin/perl
   2 #
   3 # Format the documentation as PostScript
   4 #
   5
   6 require 'psfonts.ph';           # The fonts we want to use
   7 require 'pswidth.ph';           # PostScript string width
   8
   9 use Fcntl;
  10
  11 #
  12 # PostScript configurables; these values are also available to the
  13 # PostScript code itself
  14 #
  15 %psconf = (
  16            pagewidth => 595,    # Page width in PostScript points
  17            pageheight => 792,   # Page height in PostScript points
  18            lmarg => 100,        # Left margin in PostScript points
  19            rmarg => 50,         # Right margin in PostScript points
  20            topmarg => 100,      # Top margin in PostScript points
  21            botmarg => 100,      # Bottom margin in PostScript points
  22            plmarg => 50,        # Page number position relative to left margin
  23            prmarg => 0,         # Page number position relative to right margin
  24            pymarg => 50,        # Page number position relative to bot margin
  25            startcopyright => 100, # How much above the bottom margin is the
  26                                   # copyright notice stuff
  27            bulladj => 12,       # How much to indent a bullet paragraph
  28            tocind => 12,        # TOC indentation per level
  29            tocpnz => 24,        # Width of TOC page number only zone
  30            tocdots => 8,        # Spacing between TOC dots
  31            idxspace => 24,      # Minimum space between index title and pg#
  32            idxindent => 24,     # How much to indent a subindex entry
  33            idxgutter => 24,     # Space between index columns
  34            idxcolumns => 2,     # Number of index columns
  35            );
  36
  37 %psbool = (
  38            colorlinks => 0,     # Set links in blue rather than black
  39            );
  40
  41 # Known paper sizes
  42 %papersizes = (
  43                'a4'     => [595, 842], # ISO standard paper size
  44                'letter' => [612, 792], # US common paper size
  45                'pa4'    => [595, 792], # Compromise ("portable a4")
  46                'b4'     => [709,1002], # ISO intermediate paper size
  47                'legal'  => [612,1008], # US intermediate paper size
  48                'a3'     => [842,1190], # ISO double paper size
  49                '11x17'  => [792,1224], # US double paper size
  50                );
  51
  52 #
  53 # Parse the command line
  54 #
  55 undef $input;
  56 while ( $arg = shift(@ARGV) ) {
  57     if ( $arg =~ /^\-(|no\-)(.*)$/ ) {
  58         $parm = $2;
  59         $true = ($1 eq '') ? 1 : 0;
  60         if ( $true && defined($papersizes{$parm}) ) {
  61             $psconf{pagewidth}  = $papersizes{$parm}->[0];
  62             $psconf{pageheight} = $papersizes{$parm}->[1];
  63         } elsif ( defined($psbool{$parm}) ) {
  64             $psbool{$parm} = $true;
  65         } elsif ( $true && defined($psconf{$parm}) ) {
  66             $psconf{$parm} = shift(@ARGV);
  67         } else {
  68             die "$0: Unknown option: $arg\n";
  69         }
  70     } else {
  71         $input = $arg;
  72     }
  73 }
  74
  75 #
  76 # Document formatting parameters
  77 #
  78 $paraskip = 6;                  # Space between paragraphs
  79 $chapstart = 30;                # Space before a chapter heading
  80 $chapskip = 24;                 # Space after a chapter heading
  81 $tocskip = 6;                   # Space between TOC entries
  82
  83 # Configure post-paragraph skips for each kind of paragraph
  84 %skiparray = ('chap' => $chapskip, 'appn' => $chapstart,
  85               'head' => $paraskip, 'subh' => $paraskip,
  86               'norm' => $paraskip, 'bull' => $paraskip,
  87               'code' => $paraskip, 'toc0' => $tocskip,
  88               'toc1' => $tocskip,  'toc2' => $tocskip);
  89
  90 #
  91 # Custom encoding vector.  This is basically the same as
  92 # ISOLatin1Encoding (a level 2 feature, so we dont want to use it),
  93 # but with a few extra characters thrown in.  It is basically a
  94 # modified Windows 1252 codepage, minus, for now, the euro sign (\200
  95 # is reserved for euro.)
  96 #
  97 @NASMEncoding =
  98 (
  99  (undef)x32,
 100  'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
 101  'ampersand', 'quoteright', 'parenleft',
 102  'parenright', 'asterisk', 'plus', 'comma', 'minus',
 103  'period', 'slash', 'zero', 'one', 'two', 'three',
 104  'four', 'five', 'six', 'seven', 'eight', 'nine',
 105  'colon', 'semicolon', 'less', 'equal', 'greater',
 106  'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
 107  'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q',
 108  'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
 109  'bracketleft', 'backslash', 'bracketright',
 110  'asciicircum', 'underscore', 'quoteleft', 'a', 'b',
 111  'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
 112  'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
 113  'w', 'x', 'y', 'z', 'braceleft', 'bar',
 114  'braceright', 'asciitilde', undef,
 115  undef, 'macron', 'quotesinglbase', 'florin',
 116  'quotedblbase', 'ellipsis', 'dagger', 'dbldagger',
 117  'circumflex', 'perthousand', 'Scaron', 'guilsinglleft',
 118  'OE', 'hungarumlaut', 'Zcaron', 'caron',
 119  'ogonek', 'grave', 'quotesingle', 'quotedblleft',
 120  'quotedblright', 'bullet', 'endash', 'emdash',
 121  'tilde', 'trademark', 'scaron', 'guilsignlright',
 122  'oe', 'ring', 'zcaron', 'Ydieresis',
 123  'space', 'exclamdown', 'cent', 'sterling',
 124  'currency', 'yen', 'brokenbar', 'section',
 125  'dieresis', 'copyright', 'ordfeminine',
 126  'guillemotleft', 'logicalnot', 'hyphen',
 127  'registered', 'macron', 'degree', 'plusminus',
 128  'twosuperior', 'threesuperior', 'acute', 'mu',
 129  'paragraph', 'periodcentered', 'cedilla',
 130  'onesuperior', 'ordmasculine', 'guillemotright',
 131  'onequarter', 'onehalf', 'threequarters',
 132  'questiondown', 'Agrave', 'Aacute', 'Acircumflex',
 133  'Atilde', 'Adieresis', 'Aring', 'AE', 'Ccedilla',
 134  'Egrave', 'Eacute', 'Ecircumflex', 'Edieresis',
 135  'Igrave', 'Iacute', 'Icircumflex', 'Idieresis',
 136  'Eth', 'Ntilde', 'Ograve', 'Oacute', 'Ocircumflex',
 137  'Otilde', 'Odieresis', 'multiply', 'Oslash',
 138  'Ugrave', 'Uacute', 'Ucircumflex', 'Udieresis',
 139  'Yacute', 'Thorn', 'germandbls', 'agrave', 'aacute',
 140  'acircumflex', 'atilde', 'adieresis', 'aring', 'ae',
 141  'ccedilla', 'egrave', 'eacute', 'ecircumflex',
 142  'edieresis', 'igrave', 'iacute', 'icircumflex',
 143  'idieresis', 'eth', 'ntilde', 'ograve', 'oacute',
 144  'ocircumflex', 'otilde', 'odieresis', 'divide',
 145  'oslash', 'ugrave', 'uacute', 'ucircumflex',
 146  'udieresis', 'yacute', 'thorn', 'ydieresis'
 147 );
 148
 149 $emdash = "\227";
 150 $endash = "\226";
 151 $bullet = "\225";
 152
 153 #
 154 # First, format the stuff coming from the front end into
 155 # a cleaner representation
 156 #
 157 if ( defined($input) ) {
 158     sysopen(PARAS, $input, O_RDONLY) or
 159         die "$0: cannot open $input: $!\n";
 160 } else {
 161     open(PARAS, "<&STDIN") or die "$0: $!\n";
 162 }
 163 while ( defined($line = <PARAS>) ) {
 164     chomp $line;
 165     $data = <PARAS>;
 166     chomp $data;
 167     if ( $line =~ /^meta :(.*)$/ ) {
 168         $metakey = $1;
 169         $metadata{$metakey} = $data;
 170     } elsif ( $line =~ /^indx :(.*)$/ ) {
 171         $ixentry = $1;
 172         push(@ixentries, $ixentry);
 173         $ixterms{$ixentry} = [split(/\037/, $data)];
 174         # Look for commas.  This is easier done on the string
 175         # representation, so do it now.
 176         if ( $data =~ /^(.*)\,\037sp\037/ ) {
 177             $ixprefix = $1;
 178             $ixprefix =~ s/\037n $//; # Discard possible font change at end
 179             $ixhasprefix{$ixentry} = $ixprefix;
 180             if ( !$ixprefixes{$ixprefix} ) {
 181                 $ixcommafirst{$ixentry}++;
 182             }
 183             $ixprefixes{$ixprefix}++;
 184         } else {
 185             # A complete term can also be used as a prefix
 186             $ixprefixes{$data}++;
 187         }
 188     } else {
 189         push(@ptypes, $line);
 190         push(@paras, [split(/\037/, $data)]);
 191     }
 192 }
 193 close(PARAS);
 194
 195 #
 196 # Convert an integer to a chosen base
 197 #
 198 sub int2base($$) {
 199     my($i,$b) = @_;
 200     my($s) = '';
 201     my($n) = '';
 202     my($z) = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
 203     return '0' if ($i == 0);
 204     if ( $i < 0 ) { $n = '-'; $i = -$i; }
 205     while ( $i ) {
 206         $s = substr($z,$i%$b,1) . $s;
 207         $i = int($i/$b);
 208     }
 209     return $n.$s;
 210 }
 211
 212 #
 213 # Convert a string to a rendering array
 214 #
 215 sub string2array($)
 216 {
 217     my($s) = @_;
 218     my(@a) = ();
 219
 220     while ( $s =~ /^(\s+|\S+)(.*)$/ ) {
 221         push(@a, [0,$1]);
 222         $s = $2;
 223     }
 224
 225     return @a;
 226 }
 227
 228 #
 229 # Take a crossreference name and generate the PostScript name for it.
 230 #
 231 # This hack produces a somewhat smaller PDF...
 232 #%ps_xref_list = ();
 233 #$ps_xref_next = 0;
 234 #sub ps_xref($) {
 235 #    my($s) = @_;
 236 #    my $q = $ps_xref_list{$s};
 237 #    return $q if ( defined($ps_xref_list{$s}) );
 238 #    $q = 'X'.int2base($ps_xref_next++, 52);
 239 #    $ps_xref_list{$s} = $q;
 240 #    return $q;
 241 #}
 242
 243 # Somewhat bigger PDF, but one which obeys # URLs
 244 sub ps_xref($) {
 245     return @_[0];
 246 }
 247
 248 #
 249 # Flow lines according to a particular font set and width
 250 #
 251 # A "font set" is represented as an array containing
 252 # arrays of pairs: [<size>, <metricref>]
 253 #
 254 # Each line is represented as:
 255 # [ [type,first|last,aux,fontset,page,ypos,optional col],
 256 #   [rendering array] ]
 257 #
 258 # A space character may be "squeezed" by up to this much
 259 # (as a fraction of the normal width of a space.)
 260 #
 261 $ps_space_squeeze = 0.00;       # Min space width 100%
 262 sub ps_flow_lines($$$@) {
 263     my($wid, $fontset, $type, @data) = @_;
 264     my($fonts) = $$fontset{fonts};
 265     my($e);
 266     my($w)  = 0;                # Width of current line
 267     my($sw) = 0;                # Width of current line due to spaces
 268     my(@l)  = ();               # Current line
 269     my(@ls) = ();               # Accumulated output lines
 270     my(@xd) = ();               # Metadata that goes with subsequent text
 271     my $hasmarker = 0;          # Line has -6 marker
 272     my $pastmarker = 0;         # -6 marker found
 273
 274     # If there is a -6 marker anywhere in the paragraph,
 275     # *each line* output needs to have a -6 marker
 276     foreach $e ( @data ) {
 277         $hasmarker = 1 if ( $$e[0] == -6 );
 278     }
 279
 280     $w = 0;
 281     foreach $e ( @data ) {
 282         if ( $$e[0] < 0 ) {
 283             # Type is metadata.  Zero width.
 284             if ( $$e[0] == -6 ) {
 285                 $pastmarker = 1;
 286             }
 287             if ( $$e[0] == -1 || $$e[0] == -6 ) {
 288                 # -1 (end anchor) or -6 (marker) goes with the preceeding
 289                 # text, otherwise with the subsequent text
 290                 push(@l, $e);
 291             } else {
 292                 push(@xd, $e);
 293             }
 294         } else {
 295             my $ew = ps_width($$e[1], $fontset->{fonts}->[$$e[0]][1],
 296                               \@NASMEncoding) *
 297                 ($fontset->{fonts}->[$$e[0]][0]/1000);
 298             my $sp = $$e[1];
 299             $sp =~ tr/[^ ]//d;  # Delete nonspaces
 300             my $esw = ps_width($sp, $fontset->{fonts}->[$$e[0]][1],
 301                                \@NASMEncoding) *
 302                 ($fontset->{fonts}->[$$e[0]][0]/1000);
 303
 304             if ( ($w+$ew) - $ps_space_squeeze*($sw+$esw) > $wid ) {
 305                 # Begin new line
 306                 # Search backwards for previous space chunk
 307                 my $lx = scalar(@l)-1;
 308                 my @rm = ();
 309                 while ( $lx >= 0 ) {
 310                     while ( $lx >= 0 && $l[$lx]->[0] < 0 ) {
 311                         # Skip metadata
 312                         $pastmarker = 0 if ( $l[$lx]->[0] == -6 );
 313                         $lx--;
 314                     };
 315                     if ( $lx >= 0 ) {
 316                         if ( $l[$lx]->[1] eq ' ' ) {
 317                             splice(@l, $lx, 1);
 318                             @rm = splice(@l, $lx);
 319                             last; # Found place to break
 320                         } else {
 321                             $lx--;
 322                         }
 323                     }
 324                 }
 325
 326                 # Now @l contains the stuff to remain on the old line
 327                 # If we broke the line inside a link, then split the link
 328                 # into two.
 329                 my $lkref = undef;
 330                 foreach my $lc ( @l ) {
 331                     if ( $$lc[0] == -2 || $$lc[0] == -3 || $lc[0] == -7 ) {
 332                         $lkref = $lc;
 333                     } elsif ( $$lc[0] == -1 ) {
 334                         undef $lkref;
 335                     }
 336                 }
 337
 338                 if ( defined($lkref) ) {
 339                     push(@l, [-1,undef]); # Terminate old reference
 340                     unshift(@rm, $lkref); # Duplicate reference on new line
 341                 }
 342
 343                 if ( $hasmarker ) {
 344                     if ( $pastmarker ) {
 345                         unshift(@rm,[-6,undef]); # New line starts with marker
 346                     } else {
 347                         push(@l,[-6,undef]); # Old line ends with marker
 348                     }
 349                 }
 350
 351                 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]);
 352                 @l = @rm;
 353
 354                 $w = $sw = 0;
 355                 # Compute the width of the remainder array
 356                 for my $le ( @l ) {
 357                     if ( $$le[0] >= 0 ) {
 358                         my $xew = ps_width($$le[1],
 359                                            $fontset->{fonts}->[$$le[0]][1],
 360                                            \@NASMEncoding) *
 361                             ($fontset->{fonts}->[$$le[0]][0]/1000);
 362                         my $xsp = $$le[1];
 363                         $xsp =~ tr/[^ ]//d;     # Delete nonspaces
 364                         my $xsw = ps_width($xsp,
 365                                            $fontset->{fonts}->[$$le[0]][1],
 366                                            \@NASMEncoding) *
 367                             ($fontset->{fonts}->[$$le[0]][0]/1000);
 368                         $w += $xew;  $sw += $xsw;
 369                     }
 370                 }
 371             }
 372             push(@l, @xd);      # Accumulated metadata
 373             @xd = ();
 374             if ( $$e[1] ne '' ) {
 375                 push(@l, $e);
 376                 $w += $ew; $sw += $esw;
 377             }
 378         }
 379     }
 380     push(@l,@xd);
 381     if ( scalar(@l) ) {
 382         push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]); # Final line
 383     }
 384
 385     # Mark the first line as first and the last line as last
 386     if ( scalar(@ls) ) {
 387         $ls[0]->[0]->[1] |= 1;     # First in para
 388         $ls[-1]->[0]->[1] |= 2;    # Last in para
 389     }
 390     return @ls;
 391 }
 392
 393 #
 394 # Once we have broken things into lines, having multiple chunks
 395 # with the same font index is no longer meaningful.  Merge
 396 # adjacent chunks to keep down the size of the whole file.
 397 #
 398 sub ps_merge_chunks(@) {
 399     my(@ci) = @_;
 400     my($c, $lc);
 401     my(@co, $eco);
 402
 403     undef $lc;
 404     @co = ();
 405     $eco = -1;                  # Index of the last entry in @co
 406     foreach $c ( @ci ) {
 407         if ( defined($lc) && $$c[0] == $lc && $$c[0] >= 0 ) {
 408             $co[$eco]->[1] .= $$c[1];
 409         } else {
 410             push(@co, $c);  $eco++;
 411             $lc = $$c[0];
 412         }
 413     }
 414     return @co;
 415 }
 416
 417 #
 418 # Convert paragraphs to rendering arrays.  Each
 419 # element in the array contains (font, string),
 420 # where font can be one of:
 421 # -1 end link
 422 # -2 begin crossref
 423 # -3 begin weblink
 424 # -4 index item anchor
 425 # -5 crossref anchor
 426 # -6 left/right marker (used in the index)
 427 # -7 page link (used in the index)
 428 #  0 normal
 429 #  1 empatic (italic)
 430 #  2 code (fixed spacing)
 431 #
 432
 433 sub mkparaarray($@) {
 434     my($ptype, @chunks) = @_;
 435
 436     my @para = ();
 437     my $in_e = 0;
 438     my $chunk;
 439
 440     if ( $ptype =~ /^code/ ) {
 441         foreach $chunk ( @chunks ) {
 442             push(@para, [2, $chunk]);
 443         }
 444     } else {
 445         foreach $chunk ( @chunks ) {
 446             my $type = substr($chunk,0,2);
 447             my $text = substr($chunk,2);
 448
 449             if ( $type eq 'sp' ) {
 450                 push(@para, [$in_e?1:0, ' ']);
 451             } elsif ( $type eq 'da' ) {
 452                 push(@para, [$in_e?1:0, $endash]);
 453             } elsif ( $type eq 'n ' ) {
 454                 push(@para, [0, $text]);
 455                 $in_e = 0;
 456             } elsif ( $type =~ '^e' ) {
 457                 push(@para, [1, $text]);
 458                 $in_e = ($type eq 'es' || $type eq 'e ');
 459             } elsif ( $type eq 'c ' ) {
 460                 push(@para, [2, $text]);
 461                 $in_e = 0;
 462             } elsif ( $type eq 'x ' ) {
 463                 push(@para, [-2, ps_xref($text)]);
 464             } elsif ( $type eq 'xe' ) {
 465                 push(@para, [-1, undef]);
 466             } elsif ( $type eq 'wc' || $type eq 'w ' ) {
 467                 $text =~ /\<(.*)\>(.*)$/;
 468                 my $link = $1; $text = $2;
 469                 push(@para, [-3, $link]);
 470                 push(@para, [($type eq 'wc') ? 2:0, $text]);
 471                 push(@para, [-1, undef]);
 472                 $in_e = 0;
 473             } elsif ( $type eq 'i ' ) {
 474                 push(@para, [-4, $text]);
 475             } else {
 476                 die "Unexpected paragraph chunk: $chunk";
 477             }
 478         }
 479     }
 480     return @para;
 481 }
 482
 483 $npara = scalar(@paras);
 484 for ( $i = 0 ; $i < $npara ; $i++ ) {
 485     $paras[$i] = [mkparaarray($ptypes[$i], @{$paras[$i]})];
 486 }
 487
 488 #
 489 # This converts a rendering array to a simple string
 490 #
 491 sub ps_arraytostr(@) {
 492     my $s = '';
 493     my $c;
 494     foreach $c ( @_ ) {
 495         $s .= $$c[1] if ( $$c[0] >= 0 );
 496     }
 497     return $s;
 498 }
 499
 500 #
 501 # This generates a duplicate of a paragraph
 502 #
 503 sub ps_dup_para(@) {
 504     my(@i) = @_;
 505     my(@o) = ();
 506     my($c);
 507
 508     foreach $c ( @i ) {
 509         my @cc = @{$c};
 510         push(@o, [@cc]);
 511     }
 512     return @o;
 513 }
 514
 515 #
 516 # This generates a duplicate of a paragraph, stripping anchor
 517 # tags (-4 and -5)
 518 #
 519 sub ps_dup_para_noanchor(@) {
 520     my(@i) = @_;
 521     my(@o) = ();
 522     my($c);
 523
 524     foreach $c ( @i ) {
 525         my @cc = @{$c};
 526         push(@o, [@cc]) unless ( $cc[0] == -4 || $cc[0] == -5 );
 527     }
 528     return @o;
 529 }
 530
 531 #
 532 # Scan for header paragraphs and fix up their contents;
 533 # also generate table of contents and PDF bookmarks.
 534 #
 535 @tocparas = ([[-5, 'contents'], [0,'Contents']]);
 536 @tocptypes = ('chap');
 537 @bookmarks = (['title', 0, 'Title Page'], ['contents', 0, 'Contents']);
 538 %bookref = ();
 539 for ( $i = 0 ; $i < $npara ; $i++ ) {
 540     my $xtype = $ptypes[$i];
 541     my $ptype = substr($xtype,0,4);
 542     my $str;
 543     my $book;
 544
 545     if ( $ptype eq 'chap' || $ptype eq 'appn' ) {
 546         unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
 547             die "Bad para";
 548         }
 549         my $secn = $1;
 550         my $sech = $2;
 551         my $xref = ps_xref($sech);
 552         my $chap = ($ptype eq 'chap')?'Chapter':'Appendix';
 553
 554         $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
 555         push(@bookmarks, $book);
 556         $bookref{$secn} = $book;
 557
 558         push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
 559         push(@tocptypes, 'toc0'.' :'.$sech.':'.$chap.' '.$secn.':');
 560
 561         unshift(@{$paras[$i]},
 562                 [-5, $xref], [0,$chap.' '.$secn.':'], [0, ' ']);
 563     } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
 564         unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
 565             die "Bad para";
 566         }
 567         my $secn = $1;
 568         my $sech = $2;
 569         my $xref = ps_xref($sech);
 570         my $pref;
 571         $pref = $secn; $pref =~ s/\.[^\.]+$//; # Find parent node
 572
 573         $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
 574         push(@bookmarks, $book);
 575         $bookref{$secn} = $book;
 576         $bookref{$pref}->[1]--; # Adjust count for parent node
 577
 578         push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
 579         push(@tocptypes,
 580              (($ptype eq 'subh') ? 'toc2':'toc1').' :'.$sech.':'.$secn);
 581
 582         unshift(@{$paras[$i]}, [-5, $xref]);
 583     }
 584 }
 585
 586 #
 587 # Add TOC to beginning of paragraph list
 588 #
 589 unshift(@paras,  @tocparas);  undef @tocparas;
 590 unshift(@ptypes, @tocptypes); undef @tocptypes;
 591
 592 #
 593 # Add copyright notice to the beginning
 594 #
 595 unshift(@paras, [[0, "\251"], [0, ' '], [0,$metadata{'year'}],
 596                  [0, ' '], string2array($metadata{'author'})],
 597         [[0, ' ']], [string2array($metadata{'license'})]);
 598 unshift(@ptypes, 'norm', 'norm', 'norm');
 599
 600 $npara = scalar(@paras);
 601
 602 #
 603 # No lines generated, yet.
 604 #
 605 @pslines    = ();
 606
 607 #
 608 # Line Auxilliary Information Types
 609 #
 610 $AuxStr     = 1;                # String
 611 $AuxPage    = 2;                # Page number (from xref)
 612 $AuxPageStr = 3;                # Page number as a PostScript string
 613 $AuxXRef    = 4;                # Cross reference as a name
 614 $AuxNum     = 5;                # Number
 615
 616 #
 617 # Break or convert paragraphs into lines, and push them
 618 # onto the @pslines array.
 619 #
 620 sub ps_break_lines($$) {
 621     my ($paras,$ptypes) = @_;
 622
 623     my $linewidth  = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
 624     my $bullwidth  = $linewidth-$psconf{bulladj};
 625     my $indxwidth  = ($linewidth-$psconf{idxgutter})/$psconf{idxcolumns}
 626                      -$psconf{idxspace};
 627
 628     my $npara = scalar(@{$paras});
 629     my $i;
 630
 631     for ( $i = 0 ; $i < $npara ; $i++ ) {
 632         my $xtype = $ptypes->[$i];
 633         my $ptype = substr($xtype,0,4);
 634         my @data = @{$paras->[$i]};
 635         my @ls = ();
 636         if ( $ptype eq 'code' ) {
 637             my $p;
 638             # Code paragraph; each chunk is a line
 639             foreach $p ( @data ) {
 640                 push(@ls, [[$ptype,0,undef,\%BodyFont,0,0],[$p]]);
 641             }
 642             $ls[0]->[0]->[1] |= 1;           # First in para
 643             $ls[-1]->[0]->[1] |= 2;      # Last in para
 644         } elsif ( $ptype eq 'chap' || $ptype eq 'appn' ) {
 645             # Chapters are flowed normally, but in an unusual font
 646             @ls = ps_flow_lines($linewidth, \%ChapFont, $ptype, @data);
 647         } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
 648             unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
 649                 die "Bad para";
 650             }
 651             my $secn = $1;
 652             my $sech = $2;
 653             my $font = ($ptype eq 'head') ? \%HeadFont : \%SubhFont;
 654             @ls = ps_flow_lines($linewidth, $font, $ptype, @data);
 655             # We need the heading number as auxillary data
 656             $ls[0]->[0]->[2] = [[$AuxStr,$secn]];
 657         } elsif ( $ptype eq 'norm' ) {
 658             @ls = ps_flow_lines($linewidth, \%BodyFont, $ptype, @data);
 659         } elsif ( $ptype eq 'bull' ) {
 660             @ls = ps_flow_lines($bullwidth, \%BodyFont, $ptype, @data);
 661         } elsif ( $ptype =~ /^toc/ ) {
 662             unless ( $xtype =~/^\S+ :([^:]*):(.*)$/ ) {
 663                 die "Bad para";
 664             }
 665             my $xref = $1;
 666             my $refname = $2.' ';
 667             my $ntoc = substr($ptype,3,1)+0;
 668             my $refwidth = ps_width($refname, $BodyFont{fonts}->[0][1],
 669                                     \@NASMEncoding) *
 670                 ($BodyFont{fonts}->[0][0]/1000);
 671
 672             @ls = ps_flow_lines($linewidth-$ntoc*$psconf{tocind}-
 673                                 $psconf{tocpnz}-$refwidth,
 674                                 \%BodyFont, $ptype, @data);
 675
 676             # Auxilliary data: for the first line, the cross reference symbol
 677             # and the reference name; for all lines but the first, the
 678             # reference width; and for the last line, the page number
 679             # as a string.
 680             my $nl = scalar(@ls);
 681             $ls[0]->[0]->[2] = [[$AuxStr,$refname], [$AuxXRef,$xref]];
 682             for ( $j = 1 ; $j < $nl ; $j++ ) {
 683                 $ls[$j]->[0]->[2] = [[$AuxNum,$refwidth]];
 684             }
 685             push(@{$ls[$nl-1]->[0]->[2]}, [$AuxPageStr,$xref]);
 686         } elsif ( $ptype =~ /^idx/ ) {
 687             my $lvl = substr($ptype,3,1)+0;
 688
 689             @ls = ps_flow_lines($indxwidth-$lvl*$psconf{idxindent},
 690                                 \%BodyFont, $ptype, @data);
 691         } else {
 692             die "Unknown para type: $ptype";
 693         }
 694         # Merge adjacent identical chunks
 695         foreach $l ( @ls ) {
 696             @{$$l[1]} = ps_merge_chunks(@{$$l[1]});
 697         }
 698         push(@pslines,@ls);
 699     }
 700 }
 701
 702 # Break the main body text into lines.
 703 ps_break_lines(\@paras, \@ptypes);
 704
 705 #
 706 # Break lines in to pages
 707 #
 708
 709 # Where to start on page 2, the copyright page
 710 $curpage = 2;                   # Start on page 2
 711 $curypos = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg}-
 712     $psconf{startcopyright};
 713 undef $columnstart;             # Not outputting columnar text
 714 undef $curcolumn;               # Current column
 715 $nlines = scalar(@pslines);
 716
 717 #
 718 # This formats lines inside the global @pslines array into pages,
 719 # updating the page and y-coordinate entries.  Start at the
 720 # $startline position in @pslines and go to but not including
 721 # $endline.  The global variables $curpage, $curypos, $columnstart
 722 # and $curcolumn are updated appropriately.
 723 #
 724 sub ps_break_pages($$) {
 725     my($startline, $endline) = @_;
 726
 727     # Paragraph types which should never be broken
 728     my $nobreakregexp = "^(chap|appn|head|subh|toc.|idx.)\$";
 729     # Paragraph types which are heading (meaning they should not be broken
 730     # immediately after)
 731     my $nobreakafter = "^(chap|appn|head|subh)\$";
 732     # Paragraph types which should never be broken *before*
 733     my $nobreakbefore = "^idx[1-9]\$";
 734     # Paragraph types which are set in columnar format
 735     my $columnregexp = "^idx.\$";
 736
 737     my $upageheight = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg};
 738
 739     my $i;
 740
 741     for ( $i = $startline ; $i < $endline ; $i++ ) {
 742         my $linfo = $pslines[$i]->[0];
 743         if ( ($$linfo[0] eq 'chap' || $$linfo[0] eq 'appn' )
 744              && ($$linfo[1] & 1) ) {
 745             # First line of a new chapter heading.  Start a new page.
 746             undef $columnstart;
 747             $curpage++ if ( $curypos > 0 || defined($columnstart) );
 748             $curypos = $chapstart;
 749         } elsif ( defined($columnstart) && $$linfo[0] !~ /$columnregexp/o ) {
 750             undef $columnstart;
 751             $curpage++;
 752             $curypos = 0;
 753         }
 754
 755         if ( $$linfo[0] =~ /$columnregexp/o && !defined($columnstart) ) {
 756             $columnstart = $curypos;
 757             $curcolumn = 0;
 758         }
 759
 760         # Adjust position by the appropriate leading
 761         $curypos += $$linfo[3]->{leading};
 762
 763         # Record the page and y-position
 764         $$linfo[4] = $curpage;
 765         $$linfo[5] = $curypos;
 766         $$linfo[6] = $curcolumn if ( defined($columnstart) );
 767
 768         if ( $curypos > $upageheight ) {
 769             # We need to break the page before this line.
 770             my $broken = 0;             # No place found yet
 771             while ( !$broken && $pslines[$i]->[0]->[4] == $curpage ) {
 772                 my $linfo = $pslines[$i]->[0];
 773                 my $pinfo = $pslines[$i-1]->[0];
 774
 775                 if ( $$linfo[1] == 2 ) {
 776                     # This would be an orphan, don't break.
 777                 } elsif ( $$linfo[1] & 1 ) {
 778                     # Sole line or start of paragraph.  Break unless
 779                     # the previous line was part of a heading.
 780                     $broken = 1 if ( $$pinfo[0] !~ /$nobreakafter/o &&
 781                                      $$linfo[0] !~ /$nobreakbefore/o );
 782                 } else {
 783                     # Middle of paragraph.  Break unless we're in a
 784                     # no-break paragraph, or the previous line would
 785                     # end up being a widow.
 786                     $broken = 1 if ( $$linfo[0] !~ /$nobreakregexp/o &&
 787                                      $$pinfo[1] != 1 );
 788                 }
 789                 $i--;
 790             }
 791             die "Nowhere to break page $curpage\n" if ( !$broken );
 792             # Now $i should point to line immediately before the break, i.e.
 793             # the next paragraph should be the first on the new page
 794             if ( defined($columnstart) &&
 795                  ++$curcolumn < $psconf{idxcolumns} ) {
 796                 # We're actually breaking text into columns, not pages
 797                 $curypos = $columnstart;
 798             } else {
 799                 undef $columnstart;
 800                 $curpage++;
 801                 $curypos = 0;
 802             }
 803             next;
 804         }
 805
 806         # Add end of paragraph skip
 807         if ( $$linfo[1] & 2 ) {
 808             $curypos += $skiparray{$$linfo[0]};
 809         }
 810     }
 811 }
 812
 813 ps_break_pages(0,$nlines);      # Break the main text body into pages
 814
 815 #
 816 # Find the page number of all the indices
 817 #
 818 %ps_xref_page   = ();           # Crossref anchor pages
 819 %ps_index_pages = ();           # Index item pages
 820 $nlines = scalar(@pslines);
 821 for ( $i = 0 ; $i < $nlines ; $i++ ) {
 822     my $linfo = $pslines[$i]->[0];
 823     foreach my $c ( @{$pslines[$i]->[1]} ) {
 824         if ( $$c[0] == -4 ) {
 825             if ( !defined($ps_index_pages{$$c[1]}) ) {
 826                 $ps_index_pages{$$c[1]} = [];
 827             } elsif ( $ps_index_pages{$$c[1]}->[-1] eq $$linfo[4] ) {
 828                 # Pages are emitted in order; if this is a duplicated
 829                 # entry it will be the last one
 830                 next;           # Duplicate
 831             }
 832             push(@{$ps_index_pages{$$c[1]}}, $$linfo[4]);
 833         } elsif ( $$c[0] == -5 ) {
 834             $ps_xref_page{$$c[1]} = $$linfo[4];
 835         }
 836     }
 837 }
 838
 839 #
 840 # Emit index paragraphs
 841 #
 842 $startofindex = scalar(@pslines);
 843 @ixparas = ([[-5,'index'],[0,'Index']]);
 844 @ixptypes = ('chap');
 845
 846 foreach $k ( @ixentries ) {
 847     my $n,$i;
 848     my $ixptype = 'idx0';
 849     my $prefix = $ixhasprefix{$k};
 850     my @ixpara = mkparaarray($ixptype,@{$ixterms{$k}});
 851     my $commapos = undef;
 852
 853     if ( defined($prefix) && $ixprefixes{$prefix} > 1 ) {
 854         # This entry has a "hanging comma"
 855         for ( $i = 0 ; $i < scalar(@ixpara)-1 ; $i++ ) {
 856             if ( substr($ixpara[$i]->[1],-1,1) eq ',' &&
 857                  $ixpara[$i+1]->[1] eq ' ' ) {
 858                 $commapos = $i;
 859                 last;
 860             }
 861         }
 862     }
 863     if ( defined($commapos) ) {
 864         if ( $ixcommafirst{$k} ) {
 865             # This is the first entry; generate the
 866             # "hanging comma" entry
 867             my @precomma = splice(@ixpara,0,$commapos);
 868             if ( $ixpara[0]->[1] eq ',' ) {
 869                 shift(@ixpara); # Discard lone comma
 870             } else {
 871                 # Discard attached comma
 872                 $ixpara[0]->[1] =~ s/\,$//;
 873                 push(@precomma,shift(@ixpara));
 874             }
 875             push(@precomma, [-6,undef]);
 876             push(@ixparas, [@precomma]);
 877             push(@ixptypes, $ixptype);
 878             shift(@ixpara);     # Remove space
 879         } else {
 880             splice(@ixpara,0,$commapos+2);
 881         }
 882         $ixptype = 'idx1';
 883     }
 884
 885     push(@ixpara, [-6,undef]);  # Left/right marker
 886     $i = 1;  $n = scalar(@{$ps_index_pages{$k}});
 887     foreach $p ( @{$ps_index_pages{$k}} ) {
 888         if ( $i++ == $n ) {
 889             push(@ixpara,[-7,$p],[0,"$p"],[-1,undef]);
 890         } else {
 891             push(@ixpara,[-7,$p],[0,"$p,"],[-1,undef],[0,' ']);
 892         }
 893     }
 894
 895     push(@ixparas, [@ixpara]);
 896     push(@ixptypes, $ixptype);
 897 }
 898
 899 #
 900 # Flow index paragraphs into lines
 901 #
 902 ps_break_lines(\@ixparas, \@ixptypes);
 903
 904 #
 905 # Format index into pages
 906 #
 907 $nlines = scalar(@pslines);
 908 ps_break_pages($startofindex, $nlines);
 909
 910 #
 911 # Push index onto bookmark list
 912 #
 913 push(@bookmarks, ['index', 0, 'Index']);
 914
 915 # Get the list of fonts used
 916 %ps_all_fonts = ();
 917 foreach $fset ( @AllFonts ) {
 918     foreach $font ( @{$fset->{fonts}} ) {
 919         $ps_all_fonts{$font->[1]->{name}}++;
 920     }
 921 }
 922
 923 # Emit the PostScript DSC header
 924 print "%!PS-Adobe-3.0\n";
 925 print "%%Pages: $curpage\n";
 926 print "%%BoundingBox: 0 0 ", $psconf{pagewidth}, ' ', $psconf{pageheight}, "\n";
 927 print "%%Creator: NASM psflow.pl\n";
 928 print "%%DocumentData: Clean7Bit\n";
 929 print "%%DocumentFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
 930 print "%%DocumentNeededFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
 931 print "%%Orientation: Portrait\n";
 932 print "%%PageOrder: Ascend\n";
 933 print "%%EndComments\n";
 934 print "%%BeginProlog\n";
 935
 936 # Emit the configurables as PostScript tokens
 937 foreach $c ( keys(%psconf) ) {
 938     print "/$c ", $psconf{$c}, " def\n";
 939 }
 940 foreach $c ( keys(%psbool) ) {
 941     print "/$c ", ($psbool{$c}?'true':'false'), " def\n";
 942 }
 943
 944 # Emit custom encoding vector
 945 $zstr = '/NASMEncoding [ ';
 946 foreach $c ( @NASMEncoding ) {
 947     my $z = '/'.(defined($c)?$c:'.notdef ').' ';
 948     if ( length($zstr)+length($z) > 72 ) {
 949         print $zstr,"\n";
 950         $zstr = ' ';
 951     }
 952     $zstr .= $z;
 953 }
 954 print $zstr, "] def\n";
 955
 956 # Font recoding routine
 957 # newname fontname --
 958 print "/nasmenc {\n";
 959 print "  findfont dup length dict begin\n";
 960 print "    { 1 index /FID ne {def}{pop pop} ifelse } forall\n";
 961 print "    /Encoding NASMEncoding def\n";
 962 print "    currentdict\n";
 963 print "  end\n";
 964 print "  definefont pop\n";
 965 print "} def\n";
 966
 967 # Emit fontset definitions
 968 foreach $fset ( @AllFonts ) {
 969     my $i = 0;
 970     my @zfonts = ();
 971     my %allfonts = ();
 972     foreach $font ( @{$fset->{fonts}} ) {
 973         $allfonts{$font->[1]->{name}}++;
 974     }
 975     foreach $font ( keys(%allfonts) ) {
 976         print '/',$font,'-NASM /',$font," nasmenc\n";
 977     }
 978     foreach $font ( @{$fset->{fonts}} ) {
 979         print '/', $fset->{name}, $i, ' ',
 980         '/', $font->[1]->{name}, '-NASM findfont ',
 981         $font->[0], " scalefont def\n";
 982         push(@zfonts, $fset->{name}.$i);
 983         $i++;
 984     }
 985     print '/', $fset->{name}, ' [', join(' ',@zfonts), "] def\n";
 986 }
 987
 988 # Emit the canned PostScript prologue
 989 open(PSHEAD, "< head.ps");
 990 while ( defined($line = <PSHEAD>) ) {
 991     print $line;
 992 }
 993 close(PSHEAD);
 994 print "%%EndProlog\n";
 995
 996 # Generate a PostScript string
 997 sub ps_string($) {
 998     my ($s) = @_;
 999     my ($i,$c);
1000     my ($o) = '(';
1001     my ($l) = length($s);
1002     for ( $i = 0 ; $i < $l ; $i++ ) {
1003         $c = substr($s,$i,1);
1004         if ( ord($c) < 32 || ord($c) > 126 ) {
1005             $o .= sprintf("\\%03o", ord($c));
1006         } elsif ( $c eq '(' || $c eq ')' || $c eq "\\" ) {
1007             $o .= "\\".$c;
1008         } else {
1009             $o .= $c;
1010         }
1011     }
1012     return $o.')';
1013 }
1014
1015 # Generate PDF bookmarks
1016 print "%%BeginSetup\n";
1017 foreach $b ( @bookmarks ) {
1018     print '[/Title ', ps_string($b->[2]), "\n";
1019     print '/Count ', $b->[1], ' ' if ( $b->[1] );
1020     print '/Dest /',$b->[0]," /OUT pdfmark\n";
1021 }
1022
1023 # Ask the PostScript interpreter for the proper size media
1024 print "setpagesize\n";
1025 print "%%EndSetup\n";
1026
1027 # Start a PostScript page
1028 sub ps_start_page() {
1029     $ps_page++;
1030     print "%%Page: $ps_page $ps_page\n";
1031     print "%%BeginPageSetup\n";
1032     print "save\n";
1033     print "%%EndPageSetup\n";
1034     print '/', $ps_page, " pa\n";
1035 }
1036
1037 # End a PostScript page
1038 sub ps_end_page($) {
1039     my($pn) = @_;
1040     if ( $pn ) {
1041         print "($ps_page)", (($ps_page & 1) ? 'pageodd' : 'pageeven'), "\n";
1042     }
1043     print "restore showpage\n";
1044 }
1045
1046 $ps_page = 0;
1047
1048 # Title page and inner cover
1049 ps_start_page();
1050 $title = $metadata{'title'};
1051 $title =~ s/ \- / $emdash /;
1052 $pstitle = ps_string($title);
1053
1054 # FIX THIS: This shouldn't be hard-coded like this
1055 print <<EOF;
1056 lmarg pageheight 2 mul 3 div moveto
1057 tfont0 setfont
1058 /title linkdest ${pstitle} show
1059 lmarg pageheight 2 mul 3 div 10 sub moveto
1060 0 setlinecap 3 setlinewidth
1061 pagewidth lmarg sub rmarg sub 0 rlineto stroke
1062 /nasmlogo {
1063 gsave 1 dict begin
1064 /sz exch def
1065 /Courier-Bold findfont sz scalefont setfont
1066 moveto
1067 0.85 1.22 scale
1068 [(-~~..~:\#;L       .-:\#;L,.-   .~:\#:;.T  -~~.~:;. .~:;. )
1069 ( E8+U    *T     +U\'   *T\#  .97     *L   E8+\'  *;T\'  *;, )
1070 ( D97     \`*L  .97     \'*L   \"T;E+:,     D9     *L    *L )
1071 ( H7       I\#  T7       I\#        \"*:.   H7     I\#    I\# )
1072 ( U:       :8  *\#+    , :8  T,      79   U:     :8    :8 )
1073 (,\#B.     .IE,  \"T;E*  .IE, J *+;\#:T*\"  ,\#B.   .IE,  .IE,)] {
1074 currentpoint 3 -1 roll
1075 sz -0.10 mul 0 3 -1 roll ashow
1076 sz 0.72 mul sub moveto
1077 } forall
1078 end grestore
1079 } def
1080 0.6 setgray
1081 pagewidth 2 div 143 sub
1082 pageheight 2 div 33 add
1083 12 nasmlogo
1084 EOF
1085 ps_end_page(0);
1086
1087 $curpage = 2;
1088 ps_start_page();
1089 foreach $line ( @pslines ) {
1090     my $linfo = $line->[0];
1091
1092     if ( $$linfo[4] != $curpage ) {
1093         ps_end_page($curpage > 2);
1094         ps_start_page();
1095         $curpage = $$linfo[4];
1096     }
1097
1098     print '[';
1099     my $curfont = 0;
1100     foreach my $c ( @{$line->[1]} ) {
1101         if ( $$c[0] >= 0 ) {
1102             if ( $curfont != $$c[0] ) {
1103                 print ($curfont = $$c[0]);
1104             }
1105             print ps_string($$c[1]);
1106         } elsif ( $$c[0] == -1 ) {
1107             print '{el}';       # End link
1108         } elsif ( $$c[0] == -2 ) {
1109             print '{/',$$c[1],' xl}'; # xref link
1110         } elsif ( $$c[0] == -3 ) {
1111             print '{',ps_string($$c[1]),'wl}'; # web link
1112         } elsif ( $$c[0] == -4 ) {
1113             # Index anchor -- ignore
1114         } elsif ( $$c[0] == -5 ) {
1115             print '{/',$$c[1],' xa}'; #xref anchor
1116         } elsif ( $$c[0] == -6 ) {
1117             print '][';         # Start a new array
1118             $curfont = 0;
1119         } elsif ( $$c[0] == -7 ) {
1120             print '{/',$$c[1],' pl}'; # page link
1121         } else {
1122             die "Unknown annotation";
1123         }
1124     }
1125     print ']';
1126     if ( defined($$linfo[2]) ) {
1127         foreach my $x ( @{$$linfo[2]} ) {
1128             if ( $$x[0] == $AuxStr ) {
1129                 print ps_string($$x[1]);
1130             } elsif ( $$x[0] == $AuxPage ) {
1131                 print $ps_xref_page{$$x[1]},' ';
1132             } elsif ( $$x[0] == $AuxPageStr ) {
1133                 print ps_string($ps_xref_page{$$x[1]});
1134             } elsif ( $$x[0] == $AuxXRef ) {
1135                 print '/',ps_xref($$x[1]),' ';
1136             } elsif ( $$x[0] == $AuxNum ) {
1137                 print $$x[1],' ';
1138             } else {
1139                 die "Unknown auxilliary data type";
1140             }
1141         }
1142     }
1143     print ($psconf{pageheight}-$psconf{topmarg}-$$linfo[5]);
1144     print ' ', $$linfo[6] if ( defined($$linfo[6]) );
1145     print ' ', $$linfo[0].$$linfo[1], "\n";
1146 }
1147
1148 ps_end_page(1);
1149 print "%%EOF\n";