bin/collateindex.pl

   1 #!/usr/bin/perl -- # -*- Perl -*-
   2 #
   3 # $Id: collateindex.pl,v 1.10 2004/10/24 17:05:41 petere78 Exp $
   4
   5 =head1 NAME
   6
   7 collateindex.pl - generate DocBook index files
   8
   9 =head1 SYNOPSIS
  10
  11 B<collateindex.pl> [B<-f>] [B<-g>] [B<-i> I<id>] [B<-I> I<scope>] [B<-N>]
  12                 [B<-o> F<file>] [B<-p>] [B<-P> F<file>] [B<-q>] [B<-s> I<name>]
  13                 [B<-S> I<scope>] [B<-t> I<name>] [B<-x>] F<file>
  14
  15 =head1 DESCRIPTION
  16
  17 B<collateindex.pl> creates index data for DocBook XML or SGML files.
  18
  19 =cut
  20
  21 use File::Basename;
  22 use Getopt::Std;
  23
  24 $me = basename($0);
  25
  26 $usage = "Usage: $0 [options] file
  27 Try \"perldoc $me\" for documentation.\n";
  28
  29 ( $version = '$Revision: 1.10 $' ) =~ s/^\$[R]evision:\s*([^ ]*)\s*\$$/$1/;
  30
  31 =head1 OPTIONS
  32
  33 =over 5
  34
  35 =item B<-f>
  36
  37 Force the output file to be written, even if it appears to have been
  38 edited by hand.
  39
  40 =item B<-g>
  41
  42 Group terms with IndexDiv based on the first letter of the term (or
  43 its SortAs attribute).  (This might not handle all language environments.)
  44
  45 =item B<-i> I<id>
  46
  47 The ID to use for the E<lt>indexE<gt> tag.
  48
  49 =item B<-I> I<scope>
  50
  51 The implied scope, must be C<all>, C<local>, or C<global>.  IndexTerms
  52 which do not specify a scope will have the implied scope.  If
  53 unspecified, C<all> is assumed.
  54
  55 =item B<-N>
  56
  57 New index (generates an empty index file).
  58
  59 =item B<-o> F<file>
  60
  61 Output to F<file>. Defaults to F<stdout>.
  62
  63 =item B<-p>
  64
  65 Link to points in the document.  The default is to link to the closest
  66 containing section.
  67
  68 =item B<-P> F<file>
  69
  70 Read a preamble from F<file>.  The contents of F<file> will be
  71 inserted before the E<lt>indexE<gt> tag.
  72
  73 =item B<-q>
  74
  75 Run quietly.
  76
  77 =item B<-s> I<name>
  78
  79 Name the IndexDiv that contains symbols.  The default is C<Symbols>.
  80 Meaningless if B<-g> is not used.
  81
  82 =item B<-S> I<scope>
  83
  84 Scope of the index, must be C<all>, C<local>, or C<global>.  If
  85 unspecified, C<all> is assumed.
  86
  87 =item B<-t> I<name>
  88
  89 Title for the index.
  90
  91 =item B<-x>
  92
  93 Make a SetIndex.
  94
  95 =item B<-V>
  96
  97 Print version number and exit.
  98
  99 =item F<file>
 100
 101 The file containing index data generated with the DocBook DSSSL
 102 HTML stylesheet (usually called F<HTML.index>).
 103
 104 =back
 105
 106 =cut
 107
 108
 109 die $usage if ! getopts('Dfgi:NpP:s:o:S:I:t:xqV');
 110
 111 $linkpoints   = $opt_p;
 112 $lettergroups = $opt_g;
 113 $symbolsname  = $opt_s || "Symbols";
 114 $title        = $opt_t;
 115 $preamble     = $opt_P;
 116 $outfile      = $opt_o || '-';
 117 $indexid      = $opt_i;
 118 $scope        = uc($opt_S) || 'ALL';
 119 $impliedscope = uc($opt_I) || 'ALL';
 120 $setindex     = $opt_x;
 121 $forceoutput  = $opt_f;
 122 $newindex     = $opt_N;
 123 $debug        = $opt_D;
 124 $quiet        = $opt_q;
 125
 126 if ( $opt_V ) {
 127     print "collateindex.pl $version\n";
 128     exit 0;
 129 }
 130
 131 $indextag     = $setindex ? 'setindex' : 'index';
 132
 133 if ($newindex) {
 134     safe_open(*OUT, $outfile);
 135     if ($indexid) {
 136         print OUT "<$indextag id='$indexid'>\n\n";
 137     } else {
 138         print OUT "<$indextag>\n\n";
 139     }
 140
 141     print OUT "<!-- This file was produced by collateindex.pl.         -->\n";
 142     print OUT "<!-- Remove this comment if you edit this file by hand! -->\n";
 143
 144     print OUT "</$indextag>\n";
 145     exit 0;
 146 }
 147
 148 $dat = shift @ARGV || die $usage;
 149 die "$me: file \"$dat\" does not exist\n" if ! -f $dat;
 150
 151 %legal_scopes = ('ALL' => 1, 'LOCAL' => 1, 'GLOBAL' => 1);
 152 if ($scope && !$legal_scopes{$scope}) {
 153     die "$me: invalid scope: $scope\n";
 154 }
 155 if ($impliedscope && !$legal_scopes{$impliedscope}) {
 156     die "$me: invalid implied scope: $impliedscope\n";
 157 }
 158
 159 @term = ();
 160 %id   = ();
 161
 162 $termcount = 0;
 163
 164 $quiet || print STDERR "Processing $dat...\n";
 165
 166 # Read the index file, creating an array of objects.  Each object
 167 # represents and indexterm and has fields for the content of the
 168 # indexterm
 169
 170 open (F, $dat);
 171 while (<F>) {
 172     chop;
 173     chop if /\r$/;
 174
 175     if (/^\/indexterm/i) {
 176         push (@term, $idx);
 177         next;
 178     }
 179
 180     if (/^indexterm (.*)$/i) {
 181         $termcount++;
 182         $idx = {};
 183         $idx->{'zone'} = {};
 184         $idx->{'href'} = $1;
 185         $idx->{'count'} = $termcount;
 186         $idx->{'scope'} = $impliedscope;
 187         next;
 188     }
 189
 190     if (/^indexpoint (.*)$/i) {
 191         $idx->{'hrefpoint'} = $1;
 192         next;
 193     }
 194
 195     if (/^title (.*)$/i) {
 196         $idx->{'title'} = $1;
 197         next;
 198     }
 199
 200     if (/^primary[\[ ](.*)$/i) {
 201         if (/^primary\[(.*?)\] (.*)$/i) {
 202             $idx->{'psortas'} = &escape($1);
 203             $idx->{'primary'} = &escape($2);
 204         } else {
 205             $idx->{'psortas'} = &escape($1);
 206             $idx->{'primary'} = &escape($1);
 207         }
 208         next;
 209     }
 210
 211     if (/^secondary[\[ ](.*)$/i) {
 212         if (/^secondary\[(.*?)\] (.*)$/i) {
 213             $idx->{'ssortas'} = &escape($1);
 214             $idx->{'secondary'} = &escape($2);
 215         } else {
 216             $idx->{'ssortas'} = &escape($1);
 217             $idx->{'secondary'} = &escape($1);
 218         }
 219         next;
 220     }
 221
 222     if (/^tertiary[\[ ](.*)$/i) {
 223         if (/^tertiary\[(.*?)\] (.*)$/i) {
 224             $idx->{'tsortas'} = &escape($1);
 225             $idx->{'tertiary'} = &escape($2);
 226         } else {
 227             $idx->{'tsortas'} = &escape($1);
 228             $idx->{'tertiary'} = &escape($1);
 229         }
 230         next;
 231     }
 232
 233     if (/^see (.*)$/i) {
 234         $idx->{'see'} = &escape($1);
 235         next;
 236     }
 237
 238     if (/^seealso (.*)$/i) {
 239         $idx->{'seealso'} = &escape($1);
 240         next;
 241     }
 242
 243     if (/^significance (.*)$/i) {
 244         $idx->{'significance'} = &escape($1);
 245         next;
 246     }
 247
 248     if (/^class (.*)$/i) {
 249         $idx->{'class'} = &escape($1);
 250         next;
 251     }
 252
 253     if (/^scope (.*)$/i) {
 254         $idx->{'scope'} = &escape(uc($1));
 255         next;
 256     }
 257
 258     if (/^startref (.*)$/i) {
 259         $idx->{'startref'} = $1;
 260         next;
 261     }
 262
 263     if (/^id (.*)$/i) {
 264         $idx->{'id'} = $1;
 265         $id{$1} = $idx;
 266         next;
 267     }
 268
 269     if (/^zone (.*)$/i) {
 270         my($href) = $1;
 271         $_ = scalar(<F>);
 272         chop;
 273         die "$me: invalid zone: $_\n" if !/^title (.*)$/i;
 274         $idx->{'zone'}->{$href} = $1;
 275         next;
 276     }
 277
 278     die "$me: unrecognized tag in input: $_\n";
 279 }
 280 close (F);
 281
 282 $quiet || print STDERR "$termcount entries loaded...\n";
 283
 284 # Fixup the startrefs...
 285 # In DocBook, STARTREF is a #CONREF attribute; support this by copying
 286 # all of the fields from the indexterm with the id specified by STARTREF
 287 # to the indexterm that has the STARTREF.
 288 foreach $idx (@term) {
 289     my($ididx, $field);
 290     if ($idx->{'startref'}) {
 291         $ididx = $id{$idx->{'startref'}};
 292         foreach $field ('primary', 'secondary', 'tertiary', 'see', 'seealso',
 293                         'psortas', 'ssortas', 'tsortas', 'significance',
 294                         'class', 'scope') {
 295             $idx->{$field} = $ididx->{$field};
 296         }
 297     }
 298 }
 299
 300 # Sort the index terms
 301 @term = sort termsort @term;
 302
 303 # Move all of the non-alphabetic entries to the front of the index.
 304 @term = sortsymbols(@term);
 305
 306 safe_open(*OUT, $outfile);
 307
 308 # Write the index...
 309 if ($indexid) {
 310     print OUT "<$indextag id='$indexid'>\n\n";
 311 } else {
 312     print OUT "<$indextag>\n\n";
 313 }
 314
 315 print OUT "<!-- This file was produced by collateindex.pl.         -->\n";
 316 print OUT "<!-- Remove this comment if you edit this file by hand! -->\n";
 317
 318 print OUT "<!-- ULINK is abused here.
 319
 320       The URL attribute holds the URL that points from the index entry
 321       back to the appropriate place in the output produced by the HTML
 322       stylesheet. (It's much easier to calculate this URL in the first
 323       pass.)
 324
 325       The Role attribute holds the ID (either real or manufactured) of
 326       the corresponding INDEXTERM.  This is used by the print backends
 327       to produce page numbers.
 328
 329       The entries below are sorted and collated into the correct order.
 330       Duplicates may be removed in the HTML backend, but in the print
 331       backends, it is impossible to suppress duplicate pages or coalesce
 332       sequences of pages into a range.
 333 -->\n\n";
 334
 335 print OUT "<title>$title</title>\n\n" if $title;
 336
 337 $last = {};     # the last indexterm we processed
 338 $first = 1;     # this is the first one
 339 $group = "";    # we're not in a group yet
 340 $lastout = "";  # we've not put anything out yet
 341 @seealsos = (); # See also stack.
 342
 343 foreach $idx (@term) {
 344     next if $idx->{'startref'}; # no way to represent spans...
 345     next if ($idx->{'scope'} eq 'LOCAL') && ($scope eq 'GLOBAL');
 346     next if ($idx->{'scope'} eq 'GLOBAL') && ($scope eq 'LOCAL');
 347     next if &same($idx, $last); # suppress duplicates
 348
 349     $termcount--;
 350
 351     # If primary changes, output a whole new index term, otherwise just
 352     # output another secondary or tertiary, as appropriate.  We know from
 353     # sorting that the terms will always be in the right order.
 354     if (!&tsame($last, $idx, 'primary')) {
 355         print "DIFF PRIM\n" if $debug;
 356         &end_entry() if not $first;
 357
 358         if ($lettergroups) {
 359             # If we're grouping, make the right indexdivs
 360             $letter = $idx->{'psortas'};
 361             $letter = $idx->{'primary'} if !$letter;
 362             $letter = uc(substr($letter, 0, 1));
 363
 364             # symbols are a special case
 365             if (($letter lt 'A') || ($letter gt 'Z')) {
 366                 if (($group eq '')
 367                     || (($group ge 'A') && ($group le 'Z'))) {
 368                     print OUT "</indexdiv>\n" if !$first;
 369                     print OUT "<indexdiv><title>$symbolsname</title>\n\n";
 370                     $group = $letter;
 371                 }
 372             } elsif (($group eq '') || ($group ne $letter)) {
 373                 print OUT "</indexdiv>\n" if !$first;
 374                 print OUT "<indexdiv><title>$letter</title>\n\n";
 375                 $group = $letter;
 376             }
 377         }
 378
 379         $first = 0; # there can only be on first ;-)
 380
 381         print OUT "<indexentry>\n";
 382         print OUT "  <primaryie>", $idx->{'primary'};
 383         $lastout = "primaryie";
 384
 385         if ($idx->{'secondary'}) {
 386             print OUT "\n  </primaryie>\n";
 387             print OUT "  <secondaryie>", $idx->{'secondary'};
 388             $lastout = "secondaryie";
 389         };
 390
 391         if ($idx->{'tertiary'}) {
 392             print OUT "\n  </secondaryie>\n";
 393             print OUT "  <tertiaryie>", $idx->{'tertiary'};
 394             $lastout = "tertiaryie";
 395         }
 396     } elsif (!&tsame($last, $idx, 'secondary')) {
 397         print "DIFF SEC\n" if $debug;
 398
 399         print OUT "\n  </$lastout>\n" if $lastout;
 400
 401         foreach (@seealsos) {
 402             # it'd be nice to make this a link...
 403             print OUT $indent, "  <seealsoie>", &escape($_), "</seealsoie>\n";
 404         }
 405         @seealsos = ();
 406
 407         print OUT "  <secondaryie>", $idx->{'secondary'};
 408         $lastout = "secondaryie";
 409         if ($idx->{'tertiary'}) {
 410             print OUT "\n  </secondaryie>\n";
 411             print OUT "  <tertiaryie>", $idx->{'tertiary'};
 412             $lastout = "tertiaryie";
 413         }
 414     } elsif (!&tsame($last, $idx, 'tertiary')) {
 415         print "DIFF TERT\n" if $debug;
 416
 417         print OUT "\n  </$lastout>\n" if $lastout;
 418
 419         foreach (@seealsos) {
 420             # it'd be nice to make this a link...
 421             print OUT $indent, "  <seealsoie>", &escape($_), "</seealsoie>\n";
 422         }
 423         @seealsos = ();
 424
 425         if ($idx->{'tertiary'}) {
 426             print OUT "  <tertiaryie>", $idx->{'tertiary'};
 427             $lastout = "tertiaryie";
 428         }
 429     }
 430
 431     &print_term($idx);
 432
 433     $last = $idx;
 434 }
 435
 436 # Termcount is > 0 iff some entries were skipped.
 437 $quiet || print STDERR "$termcount entries ignored...\n";
 438
 439 &end_entry();
 440
 441 print OUT "</indexdiv>\n" if $lettergroups;
 442 print OUT "</$indextag>\n";
 443
 444 close (OUT);
 445
 446 $quiet || print STDERR "Done.\n";
 447
 448 sub same {
 449     my($a) = shift;
 450     my($b) = shift;
 451
 452     my($aP) = $a->{'psortas'} || $a->{'primary'};
 453     my($aS) = $a->{'ssortas'} || $a->{'secondary'};
 454     my($aT) = $a->{'tsortas'} || $a->{'tertiary'};
 455
 456     my($bP) = $b->{'psortas'} || $b->{'primary'};
 457     my($bS) = $b->{'ssortas'} || $b->{'secondary'};
 458     my($bT) = $b->{'tsortas'} || $b->{'tertiary'};
 459
 460     my($same);
 461
 462     $aP =~ s/^\s*//; $aP =~ s/\s*$//; $aP = uc($aP);
 463     $aS =~ s/^\s*//; $aS =~ s/\s*$//; $aS = uc($aS);
 464     $aT =~ s/^\s*//; $aT =~ s/\s*$//; $aT = uc($aT);
 465     $bP =~ s/^\s*//; $bP =~ s/\s*$//; $bP = uc($bP);
 466     $bS =~ s/^\s*//; $bS =~ s/\s*$//; $bS = uc($bS);
 467     $bT =~ s/^\s*//; $bT =~ s/\s*$//; $bT = uc($bT);
 468
 469 #    print "[$aP]=[$bP]\n";
 470 #    print "[$aS]=[$bS]\n";
 471 #    print "[$aT]=[$bT]\n";
 472
 473     # Two index terms are the same if:
 474     # 1. the primary, secondary, and tertiary entries are the same
 475     #    (or have the same SORTAS)
 476     # AND
 477     # 2. They occur in the same titled section
 478     # AND
 479     # 3. They point to the same place
 480     #
 481     # Notes: Scope is used to suppress some entries, but can't be used
 482     #          for comparing duplicates.
 483     #        Interpretation of "the same place" depends on whether or
 484     #          not $linkpoints is true.
 485
 486     $same = (($aP eq $bP)
 487              && ($aS eq $bS)
 488              && ($aT eq $bT)
 489              && ($a->{'title'} eq $b->{'title'})
 490              && ($a->{'href'} eq $b->{'href'}));
 491
 492     # If we're linking to points, they're only the same if they link
 493     # to exactly the same spot.
 494     $same = $same && ($a->{'hrefpoint'} eq $b->{'hrefpoint'})
 495         if $linkpoints;
 496
 497     if ($same) {
 498        warn "$me: duplicated index entry found: $aP $aS $aT\n";
 499     }
 500
 501     $same;
 502 }
 503
 504 sub tsame {
 505     # Unlike same(), tsame only compares a single term
 506     my($a) = shift;
 507     my($b) = shift;
 508     my($term) = shift;
 509     my($sterm) = substr($term, 0, 1) . "sortas";
 510     my($A, $B);
 511
 512     $A = $a->{$sterm} || $a->{$term};
 513     $B = $b->{$sterm} || $b->{$term};
 514
 515     $A =~ s/^\s*//; $A =~ s/\s*$//; $A = uc($A);
 516     $B =~ s/^\s*//; $B =~ s/\s*$//; $B = uc($B);
 517
 518     return $A eq $B;
 519 }
 520
 521 sub end_entry {
 522     # End any open elements...
 523     print OUT "\n  </$lastout>\n" if $lastout;
 524
 525     foreach (@seealsos) {
 526         # it'd be nice to make this a link...
 527         print OUT $indent, "  <seealsoie>", &escape($_), "</seealsoie>\n";
 528     }
 529     @seealsos = ();
 530
 531     print OUT "</indexentry>\n\n";
 532     $lastout = "";
 533 }
 534
 535 sub print_term {
 536     # Print out the links for an indexterm.  There can be more than
 537     # one if the term has a ZONE that points to more than one place.
 538     # (do we do the right thing in that case?)
 539     my($idx) = shift;
 540     my($key, $indent, @hrefs);
 541     my(%href) = ();
 542     my(%phref) = ();
 543
 544     $indent = "    ";
 545
 546     if ($idx->{'see'}) {
 547         # it'd be nice to make this a link...
 548         if ($lastout) {
 549             print OUT "\n  </$lastout>\n";
 550             $lastout = "";
 551         }
 552         print OUT $indent, "<seeie>", &escape($idx->{'see'}), "</seeie>\n";
 553         return;
 554     }
 555
 556     if (keys %{$idx->{'zone'}}) {
 557         foreach $key (keys %{$idx->{'zone'}}) {
 558             $href{$key} = $idx->{'zone'}->{$key};
 559             $phref{$key} = $key;
 560         }
 561     } else {
 562         $href{$idx->{'href'}} = $idx->{'title'};
 563         $phref{$idx->{'href'}} = $idx->{'hrefpoint'};
 564     }
 565
 566     # We can't use <LINK> because we don't know the ID of the term in the
 567     # original source (and, in fact, it might not have one).
 568     print OUT ",\n";
 569     @hrefs = keys %href;
 570     while (@hrefs) {
 571         my($linkend) = "";
 572         my($role) = "";
 573         $key = shift @hrefs;
 574         if ($linkpoints) {
 575             $linkend = $phref{$key};
 576         } else {
 577             $linkend = $key;
 578         }
 579
 580         $role = $phref{$key};
 581         $role = $1 if $role =~ /\#(.*)$/;
 582         $role = $1 if $role =~ /(.*)\./;
 583
 584         print OUT $indent;
 585         print OUT "<ulink url=\"$linkend\" role=\"$role\">";
 586         print OUT "<emphasis>" if ($idx->{'significance'} eq 'PREFERRED');
 587         print OUT &escape($href{$key});
 588         print OUT "</emphasis>" if ($idx->{'significance'} eq 'PREFERRED');
 589         print OUT "</ulink>";
 590     }
 591
 592     if ($idx->{'seealso'}) {
 593         push @seealsos, $idx->{'seealso'};
 594     }
 595 }
 596
 597 sub termsort {
 598     my($aP) = $a->{'psortas'} || $a->{'primary'};
 599     my($aS) = $a->{'ssortas'} || $a->{'secondary'};
 600     my($aT) = $a->{'tsortas'} || $a->{'tertiary'};
 601     my($ap) = $a->{'count'};
 602
 603     my($bP) = $b->{'psortas'} || $b->{'primary'};
 604     my($bS) = $b->{'ssortas'} || $b->{'secondary'};
 605     my($bT) = $b->{'tsortas'} || $b->{'tertiary'};
 606     my($bp) = $b->{'count'};
 607
 608     $aP =~ s/^\s*//; $aP =~ s/\s*$//; $aP = uc($aP);
 609     $aS =~ s/^\s*//; $aS =~ s/\s*$//; $aS = uc($aS);
 610     $aT =~ s/^\s*//; $aT =~ s/\s*$//; $aT = uc($aT);
 611     $bP =~ s/^\s*//; $bP =~ s/\s*$//; $bP = uc($bP);
 612     $bS =~ s/^\s*//; $bS =~ s/\s*$//; $bS = uc($bS);
 613     $bT =~ s/^\s*//; $bT =~ s/\s*$//; $bT = uc($bT);
 614
 615     if ($aP eq $bP) {
 616         if ($aS eq $bS) {
 617             if ($aT eq $bT) {
 618                 # make sure seealso's always sort to the bottom
 619                 return 1 if ($a->{'seealso'});
 620                 return -1  if ($b->{'seealso'});
 621                 # if everything else is the same, keep these elements
 622                 # in document order (so the index links are in the right
 623                 # order)
 624                 return $ap <=> $bp;
 625             } else {
 626                 return $aT cmp $bT;
 627             }
 628         } else {
 629             return $aS cmp $bS;
 630         }
 631     } else {
 632         return $aP cmp $bP;
 633     }
 634 }
 635
 636 sub sortsymbols {
 637     my(@term) = @_;
 638     my(@new) = ();
 639     my(@sym) = ();
 640     my($letter);
 641     my($idx);
 642
 643     # Move the non-letter things to the front.  Should digits be thier
 644     # own group?  Maybe...
 645     foreach $idx (@term) {
 646         $letter = $idx->{'psortas'};
 647         $letter = $idx->{'primary'} if !$letter;
 648         $letter = uc(substr($letter, 0, 1));
 649
 650         if (($letter lt 'A') || ($letter gt 'Z')) {
 651             push (@sym, $idx);
 652         } else {
 653             push (@new, $idx);
 654         }
 655     }
 656
 657     return (@sym, @new);
 658 }
 659
 660 sub safe_open {
 661     local(*OUT) = shift;
 662     local(*F, $_);
 663
 664     if (($outfile ne '-') && (!$forceoutput)) {
 665         my($handedit) = 1;
 666         if (open (OUT, $outfile)) {
 667             while (<OUT>) {
 668                 if (/<!-- Remove this comment if you edit this file by hand! -->/){
 669                     $handedit = 0;
 670                     last;
 671                 }
 672             }
 673             close (OUT);
 674         } else {
 675             $handedit = 0;
 676         }
 677
 678         if ($handedit) {
 679             print STDERR "$me: file \"$outfile\" appears to have been edited by hand\n";
 680             print STDERR "Use the -f option or specify a different output file name.\n";
 681             exit 1;
 682         }
 683     }
 684
 685     open (OUT, ">$outfile") || die "$me: could not open file \"$outfile\": $!\n";
 686
 687     if ($preamble) {
 688         # Copy the preamble
 689         if (open(F, $preamble)) {
 690             while (<F>) {
 691                 print OUT $_;
 692             }
 693             close(F);
 694         } else {
 695             warn "$me: could not open preamble file \"$preamble\": $!\n";
 696         }
 697     }
 698 }
 699
 700 sub escape {
 701     # make sure & and < don't show up in the index
 702     local $_ = shift;
 703     s/&/&amp;/sg;
 704     s/</&lt;/sg;
 705     s/>/&gt;/sg; # what the heck
 706
 707     return $_;
 708 }
 709
 710
 711
 712 =head1 EXAMPLE
 713
 714 B<collateindex.pl> B<-o> F<index.sgml> F<HTML.index>
 715
 716 =head1 EXIT STATUS
 717
 718 =over 5
 719
 720 =item B<0>
 721
 722 Success
 723
 724 =item B<1>
 725
 726 Failure
 727
 728 =back
 729
 730 =head1 AUTHOR
 731
 732 Norm Walsh E<lt>ndw@nwalsh.comE<gt>
 733
 734 Minor updates by Adam Di Carlo E<lt>adam@onshore.comE<gt> and Peter Eisentraut E<lt>peter_e@gmx.netE<gt>
 735
 736 =cut
 737