cvs2cl.pl

   1 #!/bin/sh
   2 exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
   3 #!perl -w
   4
   5
   6 ##############################################################
   7 ###                                                        ###
   8 ### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
   9 ###                                                        ###
  10 ##############################################################
  11
  12 ## $Revision: 1.1 $
  13 ## $Date: 2005/10/29 14:58:02 $
  14 ## $Author: suzhe $
  15 ##
  16
  17 use strict;
  18
  19 use File::Basename qw( fileparse );
  20 use Getopt::Long   qw( GetOptions );
  21 use Text::Wrap     qw( );
  22 use Time::Local    qw( timegm );
  23 use User::pwent    qw( getpwnam );
  24
  25 # The Plan:
  26 #
  27 # Read in the logs for multiple files, spit out a nice ChangeLog that
  28 # mirrors the information entered during `cvs commit'.
  29 #
  30 # The problem presents some challenges. In an ideal world, we could
  31 # detect files with the same author, log message, and checkin time --
  32 # each <filelist, author, time, logmessage> would be a changelog entry.
  33 # We'd sort them; and spit them out.  Unfortunately, CVS is *not atomic*
  34 # so checkins can span a range of times.  Also, the directory structure
  35 # could be hierarchical.
  36 #
  37 # Another question is whether we really want to have the ChangeLog
  38 # exactly reflect commits. An author could issue two related commits,
  39 # with different log entries, reflecting a single logical change to the
  40 # source. GNU style ChangeLogs group these under a single author/date.
  41 # We try to do the same.
  42 #
  43 # So, we parse the output of `cvs log', storing log messages in a
  44 # multilevel hash that stores the mapping:
  45 #   directory => author => time => message => filelist
  46 # As we go, we notice "nearby" commit times and store them together
  47 # (i.e., under the same timestamp), so they appear in the same log
  48 # entry.
  49 #
  50 # When we've read all the logs, we twist this mapping into
  51 # a time => author => message => filelist mapping for each directory.
  52 #
  53 # If we're not using the `--distributed' flag, the directory is always
  54 # considered to be `./', even as descend into subdirectories.
  55
  56 # Call Tree
  57
  58 # name                         number of lines (10.xii.03)
  59 # parse_options                         192
  60 # derive_changelog                       13
  61 # +-maybe_grab_accumulation_date         38
  62 # +-read_changelog                      277
  63 #   +-maybe_read_user_map_file           94
  64 #     +-run_ext                           9
  65 #   +-read_file_path                     29
  66 #   +-read_symbolic_name                 43
  67 #   +-read_revision                      49
  68 #   +-read_date_author_and_state         25
  69 #     +-parse_date_author_and_state      20
  70 #   +-read_branches                      36
  71 # +-output_changelog                    424
  72 #   +-pretty_file_list                  290
  73 #     +-common_path_prefix               35
  74 #   +-preprocess_msg_text                30
  75 #     +-min                               1
  76 #   +-mywrap                             16
  77 #   +-last_line_len                       5
  78 #   +-wrap_log_entry                    177
  79 #
  80 # Utilities
  81 #
  82 # xml_escape                              6
  83 # slurp_file                             11
  84 # debug                                   5
  85 # version                                 2
  86 # usage                                 142
  87
  88 # -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
  89 #
  90 # Note about a bug-slash-opportunity:
  91 # -----------------------------------
  92 #
  93 # There's a bug in Text::Wrap, which affects cvs2cl.  This script
  94 # reveals it:
  95 #
  96 #   #!/usr/bin/perl -w
  97 #
  98 #   use Text::Wrap;
  99 #
 100 #   my $test_text =
 101 #   "This script demonstrates a bug in Text::Wrap.  The very long line
 102 #   following this paragraph will be relocated relative to the surrounding
 103 #   text:
 104 #
 105 #   ====================================================================
 106 #
 107 #   See?  When the bug happens, we'll get the line of equal signs below
 108 #   this paragraph, even though it should be above.";
 109 #
 110 #
 111 #   # Print out the test text with no wrapping:
 112 #   print "$test_text";
 113 #   print "\n";
 114 #   print "\n";
 115 #
 116 #   # Now print it out wrapped, and see the bug:
 117 #   print wrap ("\t", "        ", "$test_text");
 118 #   print "\n";
 119 #   print "\n";
 120 #
 121 # If the line of equal signs were one shorter, then the bug doesn't
 122 # happen.  Interesting.
 123 #
 124 # Anyway, rather than fix this in Text::Wrap, we might as well write a
 125 # new wrap() which has the following much-needed features:
 126 #
 127 # * initial indentation, like current Text::Wrap()
 128 # * subsequent line indentation, like current Text::Wrap()
 129 # * user chooses among: force-break long words, leave them alone, or die()?
 130 # * preserve existing indentation: chopped chunks from an indented line
 131 #   are indented by same (like this line, not counting the asterisk!)
 132 # * optional list of things to preserve on line starts, default ">"
 133 #
 134 # Note that the last two are essentially the same concept, so unify in
 135 # implementation and give a good interface to controlling them.
 136 #
 137 # And how about:
 138 #
 139 # Optionally, when encounter a line pre-indented by same as previous
 140 # line, then strip the newline and refill, but indent by the same.
 141 # Yeah...
 142
 143 # Globals --------------------------------------------------------------------
 144
 145 # In case we have to print it out:
 146 my $VERSION = '$Revision: 1.1 $';
 147 $VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
 148
 149 ## Vars set by options:
 150
 151 # Print debugging messages?
 152 my $Debug = 0;
 153
 154 # Just show version and exit?
 155 my $Print_Version = 0;
 156
 157 # Just print usage message and exit?
 158 my $Print_Usage = 0;
 159
 160 # What file should we generate (defaults to "ChangeLog")?
 161 my $Log_File_Name = "ChangeLog";
 162
 163 # Grab most recent entry date from existing ChangeLog file, just add
 164 # to that ChangeLog.
 165 my $Cumulative = 0;
 166
 167 # `cvs log -d`, this will repeat the last entry in the old log.  This is OK,
 168 # as it guarantees at least one entry in the update changelog, which means
 169 # that there will always be a date to extract for the next update.  The repeat
 170 # entry can be removed in postprocessing, if necessary.
 171
 172 # MJP 2003-08-02
 173 # I don't think this actually does anything useful
 174 my $Update = 0;
 175
 176 # Expand usernames to email addresses based on a map file?
 177 my $User_Map_File = '';
 178 my $User_Passwd_File;
 179 my $Mail_Domain;
 180
 181 # Output log in chronological order? [default is reverse chronological order]
 182 my $Chronological_Order = 0;
 183
 184 # Grab user details via gecos
 185 my $Gecos = 0;
 186
 187 # User domain for gecos email addresses
 188 my $Domain;
 189
 190 # Output to a file or to stdout?
 191 my $Output_To_Stdout = 0;
 192
 193 # Eliminate empty log messages?
 194 my $Prune_Empty_Msgs = 0;
 195
 196 # Tags of which not to output
 197 my %ignore_tags;
 198
 199 # Show only revisions with Tags
 200 my %show_tags;
 201
 202 # Don't call Text::Wrap on the body of the message
 203 my $No_Wrap = 0;
 204
 205 # Indentation of log messages
 206 my $Indent = "\t";
 207
 208 # Don't do any pretty print processing
 209 my $Summary = 0;
 210
 211 # Separates header from log message.  Code assumes it is either " " or
 212 # "\n\n", so if there's ever an option to set it to something else,
 213 # make sure to go through all conditionals that use this var.
 214 my $After_Header = " ";
 215
 216 # XML Encoding
 217 my $XML_Encoding = '';
 218
 219 # Format more for programs than for humans.
 220 my $XML_Output = 0;
 221 my $No_XML_Namespace = 0;
 222 my $No_XML_ISO_Date = 0;
 223
 224 # Do some special tweaks for log data that was written in FSF
 225 # ChangeLog style.
 226 my $FSF_Style = 0;
 227
 228 # Show times in UTC instead of local time
 229 my $UTC_Times = 0;
 230
 231 # Show times in output?
 232 my $Show_Times = 1;
 233
 234 # Show day of week in output?
 235 my $Show_Day_Of_Week = 0;
 236
 237 # Show revision numbers in output?
 238 my $Show_Revisions = 0;
 239
 240 # Show dead files in output?
 241 my $Show_Dead = 0;
 242
 243 # Hide dead trunk files which were created as a result of additions on a
 244 # branch?
 245 my $Hide_Branch_Additions = 1;
 246
 247 # Show tags (symbolic names) in output?
 248 my $Show_Tags = 0;
 249
 250 # Show tags separately in output?
 251 my $Show_Tag_Dates = 0;
 252
 253 # Show branches by symbolic name in output?
 254 my $Show_Branches = 0;
 255
 256 # Show only revisions on these branches or their ancestors.
 257 my @Follow_Branches;
 258 # Show only revisions on these branches or their ancestors; ignore descendent
 259 # branches.
 260 my @Follow_Only;
 261
 262 # Don't bother with files matching this regexp.
 263 my @Ignore_Files;
 264
 265 # How exactly we match entries.  We definitely want "o",
 266 # and user might add "i" by using --case-insensitive option.
 267 my $Case_Insensitive = 0;
 268
 269 # Maybe only show log messages matching a certain regular expression.
 270 my $Regexp_Gate = '';
 271
 272 # Pass this global option string along to cvs, to the left of `log':
 273 my $Global_Opts = '';
 274
 275 # Pass this option string along to the cvs log subcommand:
 276 my $Command_Opts = '';
 277
 278 # Read log output from stdin instead of invoking cvs log?
 279 my $Input_From_Stdin = 0;
 280
 281 # Don't show filenames in output.
 282 my $Hide_Filenames = 0;
 283
 284 # Don't shorten directory names from filenames.
 285 my $Common_Dir = 1;
 286
 287 # Max checkin duration. CVS checkin is not atomic, so we may have checkin
 288 # times that span a range of time. We assume that checkins will last no
 289 # longer than $Max_Checkin_Duration seconds, and that similarly, no
 290 # checkins will happen from the same users with the same message less
 291 # than $Max_Checkin_Duration seconds apart.
 292 my $Max_Checkin_Duration = 180;
 293
 294 # What to put at the front of [each] ChangeLog.
 295 my $ChangeLog_Header = '';
 296
 297 # Whether to enable 'delta' mode, and for what start/end tags.
 298 my $Delta_Mode = 0;
 299 my $Delta_From = '';
 300 my $Delta_To = '';
 301
 302 my $TestCode;
 303
 304 # Whether to parse filenames from the RCS filename, and if so what
 305 # prefix to strip.
 306 my $RCS_Root;
 307
 308 # Whether to output information on the # of lines added and removed
 309 # by each file modification.
 310 my $Show_Lines_Modified = 0;
 311
 312 ## end vars set by options.
 313
 314 # latest observed times for the start/end tags in delta mode
 315 my $Delta_StartTime = 0;
 316 my $Delta_EndTime = 0;
 317
 318 my $No_Ancestors = 0;
 319
 320 my $No_Extra_Indent = 0;
 321
 322 my $GroupWithinDate = 0;
 323
 324 # ----------------------------------------------------------------------------
 325
 326 package CVS::Utils::ChangeLog::EntrySet;
 327
 328 sub new {
 329   my $class = shift;
 330   my %self;
 331   bless \%self, $class;
 332 }
 333
 334 # -------------------------------------
 335
 336 sub output_changelog {
 337   my $output_type = $XML_Output ? 'XML' : 'Text';
 338   my $output_class = "CVS::Utils::ChangeLog::EntrySet::Output::${output_type}";
 339   my $output = $output_class->new(follow_branches => \@Follow_Branches,
 340                                   follow_only     => \@Follow_Only,
 341                                   ignore_tags     => \%ignore_tags,
 342                                   show_tags       => \%show_tags,
 343                                  );
 344   $output->output_changelog(@_);
 345 }
 346
 347 # -------------------------------------
 348
 349 sub add_fileentry {
 350   my ($self, $file_full_path, $time, $revision, $state, $lines,
 351       $branch_names, $branch_roots, $branch_numbers,
 352       $symbolic_names, $author, $msg_txt) = @_;
 353
 354       my $qunk =
 355         CVS::Utils::ChangeLog::FileEntry->new($file_full_path, $time, $revision,
 356                                               $state, $lines,
 357                                               $branch_names, $branch_roots,
 358                                               $branch_numbers,
 359                                               $symbolic_names);
 360
 361       # We might be including revision numbers and/or tags and/or
 362       # branch names in the output.  Most of the code from here to
 363       # loop-end deals with organizing these in qunk.
 364
 365       unless ( $Hide_Branch_Additions
 366                and
 367                $msg_txt =~ /file .+ was initially added on branch \S+./ ) {
 368         # Add this file to the list
 369         # (We use many spoonfuls of autovivication magic. Hashes and arrays
 370         # will spring into existence if they aren't there already.)
 371
 372         &main::debug ("(pushing log msg for ". $qunk->dir_key . $qunk->filename . ")\n");
 373
 374         # Store with the files in this commit.  Later we'll loop through
 375         # again, making sure that revisions with the same log message
 376         # and nearby commit times are grouped together as one commit.
 377         $self->{$qunk->dir_key}{$author}{$time}{$msg_txt} =
 378           CVS::Utils::ChangeLog::Message->new($msg_txt)
 379               unless exists $self->{$qunk->dir_key}{$author}{$time}{$msg_txt};
 380         $self->{$qunk->dir_key}{$author}{$time}{$msg_txt}->add_fileentry($qunk);
 381       }
 382
 383 }
 384
 385 # ----------------------------------------------------------------------------
 386
 387 package CVS::Utils::ChangeLog::EntrySet::Output::Text;
 388
 389 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
 390
 391 use File::Basename qw( fileparse );
 392
 393 sub new {
 394   my $class = shift;
 395   my $self = $class->SUPER::new(@_);
 396 }
 397
 398 # -------------------------------------
 399
 400 sub wday {
 401   my $self = shift; my $class = ref $self;
 402   my ($wday) = @_;
 403
 404   return $Show_Day_Of_Week ? ' ' . $class->weekday_en($wday) : '';
 405 }
 406
 407 # -------------------------------------
 408
 409 sub header_line {
 410   my $self = shift;
 411   my ($time, $author, $lastdate) = @_;
 412
 413   my $header_line = '';
 414
 415   my (undef,$min,$hour,$mday,$mon,$year,$wday)
 416     = $UTC_Times ? gmtime($time) : localtime($time);
 417
 418   my $date = $self->fdatetime($time);
 419
 420   if ($Show_Times) {
 421     $header_line =
 422       sprintf "%s  %s\n\n", $date, $author;
 423   } else {
 424     if ( ! defined $lastdate or $date ne $lastdate or ! $GroupWithinDate ) {
 425       if ( $GroupWithinDate ) {
 426         $header_line = "$date\n\n";
 427       } else {
 428         $header_line = "$date  $author\n\n";
 429       }
 430     } else {
 431       $header_line = '';
 432     }
 433   }
 434 }
 435
 436 # -------------------------------------
 437
 438 sub preprocess_msg_text {
 439   my $self = shift;
 440   my ($text) = @_;
 441
 442   $text = $self->SUPER::preprocess_msg_text($text);
 443
 444   unless ( $No_Wrap ) {
 445     # Strip off lone newlines, but only for lines that don't begin with
 446     # whitespace or a mail-quoting character, since we want to preserve
 447     # that kind of formatting.  Also don't strip newlines that follow a
 448     # period; we handle those specially next.  And don't strip
 449     # newlines that precede an open paren.
 450     1 while $text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g;
 451
 452     # If a newline follows a period, make sure that when we bring up the
 453     # bottom sentence, it begins with two spaces.
 454     1 while $text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2  $3/g;
 455   }
 456
 457   return $text;
 458 }
 459
 460 # -------------------------------------
 461
 462 # Here we take a bunch of qunks and convert them into printed
 463 # summary that will include all the information the user asked for.
 464 sub pretty_file_list {
 465   my $self = shift;
 466
 467   return ''
 468     if $Hide_Filenames;
 469
 470   my $qunksref = shift;
 471
 472   my @filenames;
 473   my $beauty = '';          # The accumulating header string for this entry.
 474   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
 475   my %unanimous_tags;       # Tags found in all qunks
 476   my %all_branches;         # Branches found in any qunk
 477   my $fbegun = 0;           # Did we begin printing filenames yet?
 478
 479   my ($common_dir, $qunkrefs) =
 480     $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches), $qunksref);
 481
 482   my @qunkrefs = @$qunkrefs;
 483
 484   # Not XML output, so complexly compactify for chordate consumption.  At this
 485   # point we have enough global information about all the qunks to organize
 486   # them non-redundantly for output.
 487
 488   if ($common_dir) {
 489     # Note that $common_dir still has its trailing slash
 490     $beauty .= "$common_dir: ";
 491   }
 492
 493   if ($Show_Branches)
 494   {
 495     # For trailing revision numbers.
 496     my @brevisions;
 497
 498     foreach my $branch (keys (%all_branches))
 499     {
 500       foreach my $qunkref (@qunkrefs)
 501       {
 502         if ((defined ($qunkref->branch))
 503             and ($qunkref->branch eq $branch))
 504         {
 505           if ($fbegun) {
 506             # kff todo: comma-delimited in XML too?  Sure.
 507             $beauty .= ", ";
 508           }
 509           else {
 510             $fbegun = 1;
 511           }
 512           my $fname = substr ($qunkref->filename, length ($common_dir));
 513           $beauty .= $fname;
 514           $qunkref->{'printed'} = 1;  # Just setting a mark bit, basically
 515
 516           if ( $Show_Tags and defined $qunkref->tags ) {
 517             my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
 518
 519             if (@tags) {
 520               $beauty .= " (tags: ";
 521               $beauty .= join (', ', @tags);
 522               $beauty .= ")";
 523             }
 524           }
 525
 526           if ($Show_Revisions) {
 527             # Collect the revision numbers' last components, but don't
 528             # print them -- they'll get printed with the branch name
 529             # later.
 530             $qunkref->revision =~ /.+\.([\d]+)$/;
 531             push (@brevisions, $1);
 532
 533             # todo: we're still collecting branch roots, but we're not
 534             # showing them anywhere.  If we do show them, it would be
 535             # nifty to just call them revision "0" on a the branch.
 536             # Yeah, that's the ticket.
 537           }
 538         }
 539       }
 540       $beauty .= " ($branch";
 541       if (@brevisions) {
 542         if ((scalar (@brevisions)) > 1) {
 543           $beauty .= ".[";
 544           $beauty .= (join (',', @brevisions));
 545           $beauty .= "]";
 546         }
 547         else {
 548           # Square brackets are spurious here, since there's no range to
 549           # encapsulate
 550           $beauty .= ".$brevisions[0]";
 551         }
 552       }
 553       $beauty .= ")";
 554     }
 555   }
 556
 557   # Okay; any qunks that were done according to branch are taken care
 558   # of, and marked as printed.  Now print everyone else.
 559
 560   my %fileinfo_printed;
 561   foreach my $qunkref (@qunkrefs)
 562   {
 563     next if (defined ($qunkref->{'printed'}));   # skip if already printed
 564
 565     my $b = substr ($qunkref->filename, length ($common_dir));
 566     # todo: Shlomo's change was this:
 567     # $beauty .= substr ($qunkref->filename,
 568     #              (($common_dir eq "./") ? '' : length ($common_dir)));
 569     $qunkref->{'printed'} = 1;  # Set a mark bit.
 570
 571     if ($Show_Revisions || $Show_Tags || $Show_Dead)
 572     {
 573       my $started_addendum = 0;
 574
 575       if ($Show_Revisions) {
 576         $started_addendum = 1;
 577         $b .= " (";
 578         $b .= $qunkref->revision;
 579       }
 580       if ($Show_Dead && $qunkref->state =~ /dead/)
 581       {
 582         # Deliberately not using $started_addendum. Keeping it simple.
 583         $b .= "[DEAD]";
 584       }
 585       if ($Show_Tags && (defined $qunkref->tags)) {
 586         my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
 587         if ((scalar (@tags)) > 0) {
 588           if ($started_addendum) {
 589             $b .= ", ";
 590           }
 591           else {
 592             $b .= " (tags: ";
 593           }
 594           $b .= join (', ', @tags);
 595           $started_addendum = 1;
 596         }
 597       }
 598       if ($started_addendum) {
 599         $b .= ")";
 600       }
 601     }
 602
 603     unless ( exists $fileinfo_printed{$b} ) {
 604       if ($fbegun) {
 605         $beauty .= ", ";
 606       } else {
 607         $fbegun = 1;
 608       }
 609       $beauty .= $b, $fileinfo_printed{$b} = 1;
 610     }
 611   }
 612
 613   # Unanimous tags always come last.
 614   if ($Show_Tags && %unanimous_tags)
 615   {
 616     $beauty .= " (utags: ";
 617     $beauty .= join (', ', sort keys (%unanimous_tags));
 618     $beauty .= ")";
 619   }
 620
 621   # todo: still have to take care of branch_roots?
 622
 623   $beauty = "$beauty:";
 624
 625   return $beauty;
 626 }
 627
 628 # -------------------------------------
 629
 630 sub output_tagdate {
 631   my $self = shift;
 632   my ($fh, $time, $tag) = @_;
 633
 634   my $fdatetime = $self->fdatetime($time);
 635   print $fh "$fdatetime  tag $tag\n\n";
 636   return;
 637 }
 638
 639 # -------------------------------------
 640
 641 sub format_body {
 642   my $self = shift;
 643   my ($msg, $files, $qunklist) = @_;
 644
 645   my $body;
 646
 647   if ( $No_Wrap and ! $Summary ) {
 648     $msg = $self->preprocess_msg_text($msg);
 649     $files = $self->mywrap("\t", "\t  ", "* $files");
 650     $msg =~ s/\n(.+)/\n$Indent$1/g;
 651     unless ($After_Header eq " ") {
 652       $msg =~ s/^(.+)/$Indent$1/g;
 653     }
 654     if ( $Hide_Filenames ) {
 655       $body = $After_Header . $msg;
 656     } else {
 657       $body = $files . $After_Header . $msg;
 658     }
 659   } elsif ( $Summary ) {
 660     my ($filelist, $qunk);
 661     my (@DeletedQunks, @AddedQunks, @ChangedQunks);
 662
 663     $msg = $self->preprocess_msg_text($msg);
 664     #
 665     #     Sort the files (qunks) according to the operation that was
 666     # performed.  Files which were added have no line change
 667     # indicator, whereas deleted files have state dead.
 668     #
 669     foreach $qunk ( @$qunklist ) {
 670       if ( "dead" eq $qunk->state) {
 671         push @DeletedQunks, $qunk;
 672       } elsif ( ! defined $qunk->lines ) {
 673         push @AddedQunks, $qunk;
 674       } else {
 675         push @ChangedQunks, $qunk;
 676       }
 677     }
 678     #
 679     #     The qunks list was  originally in tree search order.  Let's
 680     # get that back.  The lists, if they exist, will be reversed upon
 681     # processing.
 682     #
 683
 684     #
 685     #     Now write the three sections onto $filelist
 686     #
 687     if ( @DeletedQunks ) {
 688       $filelist .= "\tDeleted:\n";
 689       foreach $qunk ( @DeletedQunks ) {
 690         $filelist .= "\t\t" . $qunk->filename;
 691         $filelist .= " (" . $qunk->revision . ")";
 692         $filelist .= "\n";
 693       }
 694       undef @DeletedQunks;
 695     }
 696
 697     if ( @AddedQunks ) {
 698       $filelist .= "\tAdded:\n";
 699       foreach $qunk (@AddedQunks) {
 700         $filelist .= "\t\t" . $qunk->filename;
 701         $filelist .= " (" . $qunk->revision . ")";
 702         $filelist .= "\n";
 703       }
 704       undef @AddedQunks ;
 705     }
 706
 707     if ( @ChangedQunks ) {
 708       $filelist .= "\tChanged:\n";
 709       foreach $qunk (@ChangedQunks) {
 710         $filelist .= "\t\t" . $qunk->filename;
 711         $filelist .= " (" . $qunk->revision . ")";
 712         $filelist .= ", \"" . $qunk->state . "\"";
 713         $filelist .= ", lines: " . $qunk->lines;
 714         $filelist .= "\n";
 715       }
 716       undef @ChangedQunks;
 717     }
 718
 719     chomp $filelist;
 720
 721     if ( $Hide_Filenames ) {
 722       $filelist = '';
 723     }
 724
 725     $msg =~ s/\n(.*)/\n$Indent$1/g;
 726     unless ( $After_Header eq " " or $FSF_Style ) {
 727       $msg =~ s/^(.*)/$Indent$1/g;
 728     }
 729
 730     unless ( $No_Wrap ) {
 731       if ( $FSF_Style ) {
 732         $msg = $self->wrap_log_entry($msg, '', 69, 69);
 733         chomp($msg);
 734         chomp($msg);
 735       } else {
 736         $msg = $self->mywrap('', $Indent, "$msg");
 737         $msg =~ s/[ \t]+\n/\n/g;
 738       }
 739     }
 740
 741     $body = $filelist . $After_Header . $msg;
 742   } else {  # do wrapping, either FSF-style or regular
 743     my $latter_wrap = $No_Extra_Indent ? $Indent : "$Indent  ";
 744
 745     if ( $FSF_Style ) {
 746       $files = $self->mywrap($Indent, $latter_wrap, "* $files");
 747
 748       my $files_last_line_len = 0;
 749       if ( $After_Header eq " " ) {
 750         $files_last_line_len = $self->last_line_len($files);
 751         $files_last_line_len += 1;  # for $After_Header
 752       }
 753
 754       $msg = $self->wrap_log_entry($msg, $latter_wrap, 69-$files_last_line_len, 69);
 755       $body = $files . $After_Header . $msg;
 756     } else {  # not FSF-style
 757       $msg = $self->preprocess_msg_text($msg);
 758       $body = $files . $After_Header . $msg;
 759       $body = $self->mywrap($Indent, $latter_wrap, "* $body");
 760       $body =~ s/[ \t]+\n/\n/g;
 761     }
 762   }
 763
 764   return $body;
 765 }
 766
 767 # ----------------------------------------------------------------------------
 768
 769 package CVS::Utils::ChangeLog::EntrySet::Output::XML;
 770
 771 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
 772
 773 use File::Basename qw( fileparse );
 774
 775 sub new {
 776   my $class = shift;
 777   my $self = $class->SUPER::new(@_);
 778 }
 779
 780 # -------------------------------------
 781
 782 sub header_line {
 783   my $self = shift;
 784   my ($time, $author, $lastdate) = @_;
 785
 786   my $header_line = '';
 787
 788   my $isoDate;
 789
 790   my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
 791
 792   # Ideally, this would honor $UTC_Times and use +HH:MM syntax
 793   $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
 794                      $y + 1900, $m + 1, $d, $H, $M, $S);
 795
 796   my (undef,$min,$hour,$mday,$mon,$year,$wday)
 797     = $UTC_Times ? gmtime($time) : localtime($time);
 798
 799   my $date = $self->fdatetime($time);
 800   $wday = $self->wday($wday);
 801
 802   $header_line =
 803     sprintf ("<date>%4u-%02u-%02u</date>\n${wday}<time>%02u:%02u</time>\n",
 804              $year+1900, $mon+1, $mday, $hour, $min);
 805   $header_line .= "<isoDate>$isoDate</isoDate>\n"
 806     unless $No_XML_ISO_Date;
 807   $header_line .= sprintf("<author>%s</author>\n" , $author);
 808 }
 809
 810 # -------------------------------------
 811
 812 sub wday {
 813   my $self = shift; my $class = ref $self;
 814   my ($wday) = @_;
 815
 816   return '<weekday>' . $class->weekday_en($wday) . "</weekday>\n";
 817 }
 818
 819 # -------------------------------------
 820
 821 sub escape {
 822   my $self = shift;
 823
 824   my $txt = shift;
 825   $txt =~ s/&/&amp;/g;
 826   $txt =~ s/</&lt;/g;
 827   $txt =~ s/>/&gt;/g;
 828   return $txt;
 829 }
 830
 831 # -------------------------------------
 832
 833 sub output_header {
 834   my $self = shift;
 835   my ($fh) = @_;
 836
 837   my $encoding    =
 838     length $XML_Encoding ? qq'encoding="$XML_Encoding"' : '';
 839   my $version     = 'version="1.0"';
 840   my $declaration =
 841     sprintf '<?xml %s?>', join ' ', grep length, $version, $encoding;
 842   my $root        =
 843     $No_XML_Namespace ?
 844       '<changelog>'     :
 845         '<changelog xmlns="http://www.red-bean.com/xmlns/cvs2cl/">';
 846   print $fh "$declaration\n\n$root\n\n";
 847 }
 848
 849 # -------------------------------------
 850
 851 sub output_footer {
 852   my $self = shift;
 853   my ($fh) = @_;
 854
 855   print $fh "</changelog>\n";
 856 }
 857
 858 # -------------------------------------
 859
 860 sub preprocess_msg_text {
 861   my $self = shift;
 862   my ($text) = @_;
 863
 864   $text = $self->SUPER::preprocess_msg_text($text);
 865
 866   $text = $self->escape($text);
 867   chomp $text;
 868   $text = "<msg>${text}</msg>\n";
 869
 870   return $text;
 871 }
 872
 873 # -------------------------------------
 874
 875 # Here we take a bunch of qunks and convert them into a printed
 876 # summary that will include all the information the user asked for.
 877 sub pretty_file_list {
 878   my $self = shift;
 879   my ($qunksref) = @_;
 880
 881   my $beauty = '';          # The accumulating header string for this entry.
 882   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
 883   my %unanimous_tags;       # Tags found in all qunks
 884   my %all_branches;         # Branches found in any qunk
 885   my $fbegun = 0;           # Did we begin printing filenames yet?
 886
 887   my ($common_dir, $qunkrefs) =
 888     $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches),
 889       $qunksref);
 890
 891   my @qunkrefs = @$qunkrefs;
 892
 893   # If outputting XML, then our task is pretty simple, because we
 894   # don't have to detect common dir, common tags, branch prefixing,
 895   # etc.  We just output exactly what we have, and don't worry about
 896   # redundancy or readability.
 897
 898   foreach my $qunkref (@qunkrefs)
 899   {
 900     my $filename    = $qunkref->filename;
 901     my $state       = $qunkref->state;
 902     my $revision    = $qunkref->revision;
 903     my $tags        = $qunkref->tags;
 904     my $branch      = $qunkref->branch;
 905     my $branchroots = $qunkref->roots;
 906     my $lines       = $qunkref->lines;
 907
 908     $filename = $self->escape($filename);   # probably paranoia
 909     $revision = $self->escape($revision);   # definitely paranoia
 910
 911     $beauty .= "<file>\n";
 912     $beauty .= "<name>${filename}</name>\n";
 913     $beauty .= "<cvsstate>${state}</cvsstate>\n";
 914     $beauty .= "<revision>${revision}</revision>\n";
 915
 916     if ($Show_Lines_Modified
 917         && $lines && $lines =~ m/\+(\d+)\s+-(\d+)/) {
 918         $beauty .= "<linesadded>$1</linesadded>\n";
 919         $beauty .= "<linesremoved>$2</linesremoved>\n";
 920     }
 921
 922     if ($branch) {
 923       $branch   = $self->escape($branch);     # more paranoia
 924       $beauty .= "<branch>${branch}</branch>\n";
 925     }
 926     foreach my $tag (@$tags) {
 927       $tag = $self->escape($tag);  # by now you're used to the paranoia
 928       $beauty .= "<tag>${tag}</tag>\n";
 929     }
 930     foreach my $root (@$branchroots) {
 931       $root = $self->escape($root);  # which is good, because it will continue
 932       $beauty .= "<branchroot>${root}</branchroot>\n";
 933     }
 934     $beauty .= "</file>\n";
 935   }
 936
 937   # Theoretically, we could go home now.  But as long as we're here,
 938   # let's print out the common_dir and utags, as a convenience to
 939   # the receiver (after all, earlier code calculated that stuff
 940   # anyway, so we might as well take advantage of it).
 941
 942   if ((scalar (keys (%unanimous_tags))) > 1) {
 943     foreach my $utag ((keys (%unanimous_tags))) {
 944       $utag = $self->escape($utag);   # the usual paranoia
 945       $beauty .= "<utag>${utag}</utag>\n";
 946     }
 947   }
 948   if ($common_dir) {
 949     $common_dir = $self->escape($common_dir);
 950     $beauty .= "<commondir>${common_dir}</commondir>\n";
 951   }
 952
 953   # That's enough for XML, time to go home:
 954   return $beauty;
 955 }
 956
 957 # -------------------------------------
 958
 959 sub output_tagdate {
 960   my $self = shift;
 961   my ($fh, $time, $tag) = @_;
 962
 963   my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
 964
 965   # Ideally, this would honor $UTC_Times and use +HH:MM syntax
 966   my $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
 967                        $y + 1900, $m + 1, $d, $H, $M, $S);
 968
 969   print $fh "<tagdate>\n";
 970   print $fh "<tagisodate>$isoDate</tagisodate>\n";
 971   print $fh "<tagdatetag>$tag</tagdatetag>\n";
 972   print $fh "</tagdate>\n\n";
 973   return;
 974 }
 975
 976 # -------------------------------------
 977
 978 sub output_entry {
 979   my $self = shift;
 980   my ($fh, $entry) = @_;
 981   print $fh "<entry>\n$entry</entry>\n\n";
 982 }
 983
 984 # -------------------------------------
 985
 986 sub format_body {
 987   my $self = shift;
 988   my ($msg, $files, $qunklist) = @_;
 989
 990   $msg = $self->preprocess_msg_text($msg);
 991   return $files . $msg;
 992 }
 993
 994 # ----------------------------------------------------------------------------
 995
 996 package CVS::Utils::ChangeLog::EntrySet::Output;
 997
 998 use Carp           qw( croak );
 999 use File::Basename qw( fileparse );
1000
1001 # Class Utility Functions -------------
1002
1003 { # form closure
1004
1005 my @weekdays = (qw(Sunday Monday Tuesday Wednesday Thursday Friday Saturday));
1006 sub weekday_en {
1007   my $class = shift;
1008   return $weekdays[$_[0]];
1009 }
1010
1011 }
1012
1013 # -------------------------------------
1014
1015 sub new {
1016   my ($proto, %args) = @_;
1017   my $class = ref $proto || $proto;
1018
1019   my $follow_branches = delete $args{follow_branches};
1020   my $follow_only     = delete $args{follow_only};
1021   my $ignore_tags     = delete $args{ignore_tags};
1022   my $show_tags       = delete $args{show_tags};
1023   die "Unrecognized arg to EntrySet::Output::new: '$_'\n"
1024     for keys %args;
1025
1026   bless +{follow_branches => $follow_branches,
1027           follow_only     => $follow_only,
1028           show_tags       => $show_tags,
1029           ignore_tags     => $ignore_tags,
1030          }, $class;
1031 }
1032
1033 # Abstract Subrs ----------------------
1034
1035 sub wday               { croak "Whoops.  Abtract method call (wday).\n" }
1036 sub pretty_file_list   { croak "Whoops.  Abtract method call (pretty_file_list).\n" }
1037 sub output_tagdate     { croak "Whoops.  Abtract method call (output_tagdate).\n" }
1038 sub header_line        { croak "Whoops.  Abtract method call (header_line).\n" }
1039
1040 # Instance Subrs ----------------------
1041
1042 sub output_header { }
1043
1044 # -------------------------------------
1045
1046 sub output_entry {
1047   my $self = shift;
1048   my ($fh, $entry) = @_;
1049   print $fh "$entry\n";
1050 }
1051
1052 # -------------------------------------
1053
1054 sub output_footer { }
1055
1056 # -------------------------------------
1057
1058 sub escape { return $_[1] }
1059
1060 # -------------------------------------
1061
1062 sub _revision_is_wanted {
1063   my ($self, $qunk) = @_;
1064
1065   my ($revision, $branch_numbers) = @{$qunk}{qw( revision branch_numbers )};
1066   my $follow_branches = $self->{follow_branches};
1067   my $follow_only     = $self->{follow_only};
1068
1069   for my $ignore_tag (keys %{$self->{ignore_tags}}) {
1070     return
1071       if defined $qunk->{tags} and grep $_ eq $ignore_tag, @{$qunk->{tags}};
1072   }
1073
1074   if ( keys %{$self->{show_tags}} ) {
1075     for my $show_tag (keys %{$self->{show_tags}}) {
1076       return
1077         if ! defined $qunk->{tags} or ! grep $_ eq $show_tag, @{$qunk->{tags}};
1078     }
1079   }
1080
1081   return 1
1082     unless @$follow_branches + @$follow_only; # no follow is follow all
1083
1084   for my $x (map([$_, 1], @$follow_branches),
1085              map([$_, 0], @$follow_only    )) {
1086     my ($branch, $followsub) = @$x;
1087
1088     # Special case for following trunk revisions
1089     return 1
1090       if $branch =~ /^trunk$/i and $revision =~ /^[0-9]+\.[0-9]+$/;
1091
1092     if ( my $branch_number = $branch_numbers->{$branch} ) {
1093       # Are we on one of the follow branches or an ancestor of same?
1094
1095       # If this revision is a prefix of the branch number, or possibly is less
1096       # in the minormost number, OR if this branch number is a prefix of the
1097       # revision, then yes.  Otherwise, no.
1098
1099       # So below, we determine if any of those conditions are met.
1100
1101       # Trivial case: is this revision on the branch?  (Compare this way to
1102       # avoid regexps that screw up Emacs indentation, argh.)
1103       if ( substr($revision, 0, (length($branch_number) + 1))
1104            eq
1105            ($branch_number . ".") ) {
1106         if ( $followsub ) {
1107           return 1;
1108 #        } elsif ( length($revision) == length($branch_number)+2 ) {
1109         } elsif ( substr($revision, length($branch_number)+1) =~ /^\d+$/ ) {
1110           return 1;
1111         }
1112       } elsif ( length($branch_number) > length($revision)
1113                 and
1114                 ! $No_Ancestors ) {
1115         # Non-trivial case: check if rev is ancestral to branch
1116
1117         # r_left still has the trailing "."
1118         my ($r_left, $r_end) = ($revision =~ /^((?:\d+\.)+)(\d+)$/);
1119
1120         # b_left still has trailing "."
1121         # b_mid has no trailing "."
1122         my ($b_left, $b_mid) = ($branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/);
1123         return 1
1124           if $r_left eq $b_left and $r_end <= $b_mid;
1125       }
1126     }
1127   }
1128
1129   return;
1130 }
1131
1132 # -------------------------------------
1133
1134 sub output_changelog {
1135 my $self = shift; my $class = ref $self;
1136   my ($grand_poobah) = @_;
1137   ### Process each ChangeLog
1138
1139   while (my ($dir,$authorhash) = each %$grand_poobah)
1140   {
1141     &main::debug ("DOING DIR: $dir\n");
1142
1143     # Here we twist our hash around, from being
1144     #   author => time => message => filelist
1145     # in %$authorhash to
1146     #   time => author => message => filelist
1147     # in %changelog.
1148     #
1149     # This is also where we merge entries.  The algorithm proceeds
1150     # through the timeline of the changelog with a sliding window of
1151     # $Max_Checkin_Duration seconds; within that window, entries that
1152     # have the same log message are merged.
1153     #
1154     # (To save space, we zap %$authorhash after we've copied
1155     # everything out of it.)
1156
1157     my %changelog;
1158     while (my ($author,$timehash) = each %$authorhash)
1159     {
1160       my %stamptime;
1161       foreach my $time (sort {$a <=> $b} (keys %$timehash))
1162       {
1163         my $msghash = $timehash->{$time};
1164         while (my ($msg,$qunklist) = each %$msghash)
1165         {
1166           my $stamptime = $stamptime{$msg};
1167           if ((defined $stamptime)
1168               and (($time - $stamptime) < $Max_Checkin_Duration)
1169               and (defined $changelog{$stamptime}{$author}{$msg}))
1170           {
1171             push(@{$changelog{$stamptime}{$author}{$msg}}, $qunklist->files);
1172           }
1173           else {
1174             $changelog{$time}{$author}{$msg} = $qunklist->files;
1175             $stamptime{$msg} = $time;
1176           }
1177         }
1178       }
1179     }
1180     undef (%$authorhash);
1181
1182     ### Now we can write out the ChangeLog!
1183
1184     my ($logfile_here, $logfile_bak, $tmpfile);
1185     my $lastdate;
1186
1187     if (! $Output_To_Stdout) {
1188       $logfile_here =  $dir . $Log_File_Name;
1189       $logfile_here =~ s/^\.\/\//\//;   # fix any leading ".//" problem
1190       $tmpfile      = "${logfile_here}.cvs2cl$$.tmp";
1191       $logfile_bak  = "${logfile_here}.bak";
1192
1193       open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\"";
1194     }
1195     else {
1196       open (LOG_OUT, ">-") or die "Unable to open stdout for writing";
1197     }
1198
1199     print LOG_OUT $ChangeLog_Header;
1200
1201     my %tag_date_printed;
1202
1203     $self->output_header(\*LOG_OUT);
1204
1205     my @key_list = ();
1206     if($Chronological_Order) {
1207         @key_list = sort {$a <=> $b} (keys %changelog);
1208     } else {
1209         @key_list = sort {$b <=> $a} (keys %changelog);
1210     }
1211     foreach my $time (@key_list)
1212     {
1213       next if ($Delta_Mode &&
1214                (($time <= $Delta_StartTime) ||
1215                 ($time > $Delta_EndTime && $Delta_EndTime)));
1216
1217       # Set up the date/author line.
1218       # kff todo: do some more XML munging here, on the header
1219       # part of the entry:
1220       my (undef,$min,$hour,$mday,$mon,$year,$wday)
1221           = $UTC_Times ? gmtime($time) : localtime($time);
1222
1223       $wday = $self->wday($wday);
1224       # XML output includes everything else, we might as well make
1225       # it always include Day Of Week too, for consistency.
1226       my $authorhash = $changelog{$time};
1227       if ( $Show_Tag_Dates || $XML_Output ) {
1228         my %tags;
1229         while (my ($author,$mesghash) = each %$authorhash) {
1230           while (my ($msg,$qunk) = each %$mesghash) {
1231             for my $qunkref2 (@$qunk) {
1232               if (defined ($qunkref2->tags)) {
1233                 for my $tag (@{$qunkref2->tags}) {
1234                   $tags{$tag} = 1;
1235                 }
1236               }
1237             }
1238           }
1239         }
1240         # Sort here for determinism to ease testing
1241         foreach my $tag (sort keys %tags) {
1242           if ( ! defined $tag_date_printed{$tag} ) {
1243             $tag_date_printed{$tag} = $time;
1244             $self->output_tagdate(\*LOG_OUT, $time, $tag);
1245           }
1246         }
1247       }
1248       while (my ($author,$mesghash) = each %$authorhash)
1249       {
1250         # If XML, escape in outer loop to avoid compound quoting:
1251         $author = $self->escape($author);
1252
1253       FOOBIE:
1254         # We sort here to enable predictable ordering for the testing porpoises
1255         for my $msg (sort keys %$mesghash)
1256         {
1257           my $qunklist = $mesghash->{$msg};
1258
1259           my @qunklist =
1260             grep $self->_revision_is_wanted($_), @$qunklist;
1261
1262           next FOOBIE unless @qunklist;
1263
1264           my $files               = $self->pretty_file_list(\@qunklist);
1265           my $header_line;          # date and author
1266           my $wholething;           # $header_line + $body
1267
1268           my $date = $self->fdatetime($time);
1269           $header_line = $self->header_line($time, $author, $lastdate);
1270           $lastdate = $date;
1271
1272           $Text::Wrap::huge = 'overflow'
1273             if $Text::Wrap::VERSION >= 2001.0130;
1274           # Reshape the body according to user preferences.
1275           my $body = $self->format_body($msg, $files, \@qunklist);
1276
1277           $body =~ s/[ \t]+\n/\n/g;
1278           $wholething = $header_line . $body;
1279
1280           # One last check: make sure it passes the regexp test, if the
1281           # user asked for that.  We have to do it here, so that the
1282           # test can match against information in the header as well
1283           # as in the text of the log message.
1284
1285           # How annoying to duplicate so much code just because I
1286           # can't figure out a way to evaluate scalars on the trailing
1287           # operator portion of a regular expression.  Grrr.
1288           if ($Case_Insensitive) {
1289             unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/oi ) ) {
1290               $self->output_entry(\*LOG_OUT, $wholething);
1291             }
1292           }
1293           else {
1294             unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/o ) ) {
1295               $self->output_entry(\*LOG_OUT, $wholething);
1296             }
1297           }
1298         }
1299       }
1300     }
1301
1302     $self->output_footer(\*LOG_OUT);
1303
1304     close (LOG_OUT);
1305
1306     if ( ! $Output_To_Stdout ) {
1307       # If accumulating, append old data to new before renaming.  But
1308       # don't append the most recent entry, since it's already in the
1309       # new log due to CVS's idiosyncratic interpretation of "log -d".
1310       if ($Cumulative && -f $logfile_here) {
1311         open NEW_LOG, ">>$tmpfile"
1312           or die "trouble appending to $tmpfile ($!)";
1313
1314         open OLD_LOG, "<$logfile_here"
1315           or die "trouble reading from $logfile_here ($!)";
1316
1317         my $started_first_entry = 0;
1318         my $passed_first_entry = 0;
1319         while (<OLD_LOG>) {
1320           if ( ! $passed_first_entry ) {
1321             if ( ( ! $started_first_entry )
1322                 and /^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/ ) {
1323               $started_first_entry = 1;
1324             } elsif ( /^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/ ) {
1325               $passed_first_entry = 1;
1326               print NEW_LOG $_;
1327             }
1328           } else {
1329             print NEW_LOG $_;
1330           }
1331         }
1332
1333         close NEW_LOG;
1334         close OLD_LOG;
1335       }
1336
1337       if ( -f $logfile_here ) {
1338         rename $logfile_here, $logfile_bak;
1339       }
1340       rename $tmpfile, $logfile_here;
1341     }
1342   }
1343 }
1344
1345 # -------------------------------------
1346
1347 # Don't call this wrap, because with 5.5.3, that clashes with the
1348 # (unconditional :-( ) export of wrap() from Text::Wrap
1349 sub mywrap {
1350   my $self = shift;
1351   my ($indent1, $indent2, @text) = @_;
1352   # If incoming text looks preformatted, don't get clever
1353   my $text = Text::Wrap::wrap($indent1, $indent2, @text);
1354   if ( grep /^\s+/m, @text ) {
1355     return $text;
1356   }
1357   my @lines = split /\n/, $text;
1358   $indent2 =~ s!^((?: {8})+)!"\t" x (length($1)/8)!e;
1359   $lines[0] =~ s/^$indent1\s+/$indent1/;
1360   s/^$indent2\s+/$indent2/
1361     for @lines[1..$#lines];
1362   my $newtext = join "\n", @lines;
1363   $newtext .= "\n"
1364     if substr($text, -1) eq "\n";
1365   return $newtext;
1366 }
1367
1368 # -------------------------------------
1369
1370 sub preprocess_msg_text {
1371   my $self = shift;
1372   my ($text) = @_;
1373
1374   # Strip out carriage returns (as they probably result from DOSsy editors).
1375   $text =~ s/\r\n/\n/g;
1376   # If it *looks* like two newlines, make it *be* two newlines:
1377   $text =~ s/\n\s*\n/\n\n/g;
1378
1379   return $text;
1380 }
1381
1382 # -------------------------------------
1383
1384 sub last_line_len {
1385   my $self = shift;
1386
1387   my $files_list = shift;
1388   my @lines = split (/\n/, $files_list);
1389   my $last_line = pop (@lines);
1390   return length ($last_line);
1391 }
1392
1393 # -------------------------------------
1394
1395 # A custom wrap function, sensitive to some common constructs used in
1396 # log entries.
1397 sub wrap_log_entry {
1398   my $self = shift;
1399
1400   my $text = shift;                  # The text to wrap.
1401   my $left_pad_str = shift;          # String to pad with on the left.
1402
1403   # These do NOT take left_pad_str into account:
1404   my $length_remaining = shift;      # Amount left on current line.
1405   my $max_line_length  = shift;      # Amount left for a blank line.
1406
1407   my $wrapped_text = '';             # The accumulating wrapped entry.
1408   my $user_indent = '';              # Inherited user_indent from prev line.
1409
1410   my $first_time = 1;                # First iteration of the loop?
1411   my $suppress_line_start_match = 0; # Set to disable line start checks.
1412
1413   my @lines = split (/\n/, $text);
1414   while (@lines)   # Don't use `foreach' here, it won't work.
1415   {
1416     my $this_line = shift (@lines);
1417     chomp $this_line;
1418
1419     if ($this_line =~ /^(\s+)/) {
1420       $user_indent = $1;
1421     }
1422     else {
1423       $user_indent = '';
1424     }
1425
1426     # If it matches any of the line-start regexps, print a newline now...
1427     if ($suppress_line_start_match)
1428     {
1429       $suppress_line_start_match = 0;
1430     }
1431     elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/)
1432            || ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/)
1433            || ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/)
1434            || ($this_line =~ /^(\s+)(\S+)/)
1435            || ($this_line =~ /^(\s*)- +/)
1436            || ($this_line =~ /^()\s*$/)
1437            || ($this_line =~ /^(\s*)\*\) +/)
1438            || ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/))
1439     {
1440       # Make a line break immediately, unless header separator is set
1441       # and this line is the first line in the entry, in which case
1442       # we're getting the blank line for free already and shouldn't
1443       # add an extra one.
1444       unless (($After_Header ne " ") and ($first_time))
1445       {
1446         if ($this_line =~ /^()\s*$/) {
1447           $suppress_line_start_match = 1;
1448           $wrapped_text .= "\n${left_pad_str}";
1449         }
1450
1451         $wrapped_text .= "\n${left_pad_str}";
1452       }
1453
1454       $length_remaining = $max_line_length - (length ($user_indent));
1455     }
1456
1457     # Now that any user_indent has been preserved, strip off leading
1458     # whitespace, so up-folding has no ugly side-effects.
1459     $this_line =~ s/^\s*//;
1460
1461     # Accumulate the line, and adjust parameters for next line.
1462     my $this_len = length ($this_line);
1463     if ($this_len == 0)
1464     {
1465       # Blank lines should cancel any user_indent level.
1466       $user_indent = '';
1467       $length_remaining = $max_line_length;
1468     }
1469     elsif ($this_len >= $length_remaining) # Line too long, try breaking it.
1470     {
1471       # Walk backwards from the end.  At first acceptable spot, break
1472       # a new line.
1473       my $idx = $length_remaining - 1;
1474       if ($idx < 0) { $idx = 0 };
1475       while ($idx > 0)
1476       {
1477         if (substr ($this_line, $idx, 1) =~ /\s/)
1478         {
1479           my $line_now = substr ($this_line, 0, $idx);
1480           my $next_line = substr ($this_line, $idx);
1481           $this_line = $line_now;
1482
1483           # Clean whitespace off the end.
1484           chomp $this_line;
1485
1486           # The current line is ready to be printed.
1487           $this_line .= "\n${left_pad_str}";
1488
1489           # Make sure the next line is allowed full room.
1490           $length_remaining = $max_line_length - (length ($user_indent));
1491
1492           # Strip next_line, but then preserve any user_indent.
1493           $next_line =~ s/^\s*//;
1494
1495           # Sneak a peek at the user_indent of the upcoming line, so
1496           # $next_line (which will now precede it) can inherit that
1497           # indent level.  Otherwise, use whatever user_indent level
1498           # we currently have, which might be none.
1499           my $next_next_line = shift (@lines);
1500           if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) {
1501             $next_line = $1 . $next_line if (defined ($1));
1502             # $length_remaining = $max_line_length - (length ($1));
1503             $next_next_line =~ s/^\s*//;
1504           }
1505           else {
1506             $next_line = $user_indent . $next_line;
1507           }
1508           if (defined ($next_next_line)) {
1509             unshift (@lines, $next_next_line);
1510           }
1511           unshift (@lines, $next_line);
1512
1513           # Our new next line might, coincidentally, begin with one of
1514           # the line-start regexps, so we temporarily turn off
1515           # sensitivity to that until we're past the line.
1516           $suppress_line_start_match = 1;
1517
1518           last;
1519         }
1520         else
1521         {
1522           $idx--;
1523         }
1524       }
1525
1526       if ($idx == 0)
1527       {
1528         # We bottomed out because the line is longer than the
1529         # available space.  But that could be because the space is
1530         # small, or because the line is longer than even the maximum
1531         # possible space.  Handle both cases below.
1532
1533         if ($length_remaining == ($max_line_length - (length ($user_indent))))
1534         {
1535           # The line is simply too long -- there is no hope of ever
1536           # breaking it nicely, so just insert it verbatim, with
1537           # appropriate padding.
1538           $this_line = "\n${left_pad_str}${this_line}";
1539         }
1540         else
1541         {
1542           # Can't break it here, but may be able to on the next round...
1543           unshift (@lines, $this_line);
1544           $length_remaining = $max_line_length - (length ($user_indent));
1545           $this_line = "\n${left_pad_str}";
1546         }
1547       }
1548     }
1549     else  # $this_len < $length_remaining, so tack on what we can.
1550     {
1551       # Leave a note for the next iteration.
1552       $length_remaining = $length_remaining - $this_len;
1553
1554       if ($this_line =~ /\.$/)
1555       {
1556         $this_line .= "  ";
1557         $length_remaining -= 2;
1558       }
1559       else  # not a sentence end
1560       {
1561         $this_line .= " ";
1562         $length_remaining -= 1;
1563       }
1564     }
1565
1566     # Unconditionally indicate that loop has run at least once.
1567     $first_time = 0;
1568
1569     $wrapped_text .= "${user_indent}${this_line}";
1570   }
1571
1572   # One last bit of padding.
1573   $wrapped_text .= "\n";
1574
1575   return $wrapped_text;
1576 }
1577
1578 # -------------------------------------
1579
1580 sub _pretty_file_list {
1581   my $self = shift;
1582
1583   my ($unanimous_tags, $non_unanimous_tags, $all_branches, $qunksref) = @_;
1584
1585   my @qunkrefs =
1586     grep +( ( ! $_->tags_exists
1587               or
1588               ! grep exists $ignore_tags{$_}, @{$_->tags})
1589             and
1590             ( ! keys %show_tags
1591               or
1592               ( $_->tags_exists
1593                 and
1594                 grep exists $show_tags{$_}, @{$_->tags} )
1595             )
1596           ),
1597     @$qunksref;
1598
1599   my $common_dir;           # Dir prefix common to all files ('' if none)
1600
1601   # First, loop over the qunks gathering all the tag/branch names.
1602   # We'll put them all in non_unanimous_tags, and take out the
1603   # unanimous ones later.
1604  QUNKREF:
1605   foreach my $qunkref (@qunkrefs)
1606   {
1607     # Keep track of whether all the files in this commit were in the
1608     # same directory, and memorize it if so.  We can make the output a
1609     # little more compact by mentioning the directory only once.
1610     if ($Common_Dir && (scalar (@qunkrefs)) > 1)
1611     {
1612       if (! (defined ($common_dir)))
1613       {
1614         my ($base, $dir);
1615         ($base, $dir, undef) = fileparse ($qunkref->filename);
1616
1617         if ((! (defined ($dir)))  # this first case is sheer paranoia
1618             or ($dir eq '')
1619             or ($dir eq "./")
1620             or ($dir eq ".\\"))
1621         {
1622           $common_dir = '';
1623         }
1624         else
1625         {
1626           $common_dir = $dir;
1627         }
1628       }
1629       elsif ($common_dir ne '')
1630       {
1631         # Already have a common dir prefix, so how much of it can we preserve?
1632         $common_dir = &main::common_path_prefix ($qunkref->filename, $common_dir);
1633       }
1634     }
1635     else  # only one file in this entry anyway, so common dir not an issue
1636     {
1637       $common_dir = '';
1638     }
1639
1640     if (defined ($qunkref->branch)) {
1641       $all_branches->{$qunkref->branch} = 1;
1642     }
1643     if (defined ($qunkref->tags)) {
1644       foreach my $tag (@{$qunkref->tags}) {
1645         $non_unanimous_tags->{$tag} = 1;
1646       }
1647     }
1648   }
1649
1650   # Any tag held by all qunks will be printed specially... but only if
1651   # there are multiple qunks in the first place!
1652   if ((scalar (@qunkrefs)) > 1) {
1653     foreach my $tag (keys (%$non_unanimous_tags)) {
1654       my $everyone_has_this_tag = 1;
1655       foreach my $qunkref (@qunkrefs) {
1656         if ((! (defined ($qunkref->tags)))
1657             or (! (grep ($_ eq $tag, @{$qunkref->tags})))) {
1658           $everyone_has_this_tag = 0;
1659         }
1660       }
1661       if ($everyone_has_this_tag) {
1662         $unanimous_tags->{$tag} = 1;
1663         delete $non_unanimous_tags->{$tag};
1664       }
1665     }
1666   }
1667
1668   return $common_dir, \@qunkrefs;
1669 }
1670
1671 # -------------------------------------
1672
1673 sub fdatetime {
1674   my $self = shift;
1675
1676   my ($year, $mday, $mon, $wday, $hour, $min);
1677
1678   if ( @_ > 1 ) {
1679     ($year, $mday, $mon, $wday, $hour, $min) = @_;
1680   } else {
1681     my ($time) = @_;
1682     (undef, $min, $hour, $mday, $mon, $year, $wday) =
1683       $UTC_Times ? gmtime($time) : localtime($time);
1684
1685     $year += 1900;
1686     $mon  += 1;
1687     $wday  = $self->wday($wday);
1688   }
1689
1690   my $fdate = $self->fdate($year, $mon, $mday, $wday);
1691
1692   if ($Show_Times) {
1693     my $ftime = $self->ftime($hour, $min);
1694     return "$fdate $ftime";
1695   } else {
1696     return $fdate;
1697   }
1698 }
1699
1700 # -------------------------------------
1701
1702 sub fdate {
1703   my $self = shift;
1704
1705   my ($year, $mday, $mon, $wday);
1706
1707   if ( @_ > 1 ) {
1708     ($year, $mon, $mday, $wday) = @_;
1709   } else {
1710     my ($time) = @_;
1711     (undef, undef, undef, $mday, $mon, $year, $wday) =
1712       $UTC_Times ? gmtime($time) : localtime($time);
1713
1714     $year += 1900;
1715     $mon  += 1;
1716     $wday  = $self->wday($wday);
1717   }
1718
1719   return sprintf '%4u-%02u-%02u%s', $year, $mon, $mday, $wday;
1720 }
1721
1722 # -------------------------------------
1723
1724 sub ftime {
1725   my $self = shift;
1726
1727   my ($hour, $min);
1728
1729   if ( @_ > 1 ) {
1730     ($hour, $min) = @_;
1731   } else {
1732     my ($time) = @_;
1733     (undef, $min, $hour) = $UTC_Times ? gmtime($time) : localtime($time);
1734   }
1735
1736   return sprintf '%02u:%02u', $hour, $min;
1737 }
1738
1739 # ----------------------------------------------------------------------------
1740
1741 package CVS::Utils::ChangeLog::Message;
1742
1743 sub new {
1744   my $class = shift;
1745   my ($msg) = @_;
1746
1747   my %self = (msg => $msg, files => []);
1748
1749   bless \%self, $class;
1750 }
1751
1752 sub add_fileentry {
1753   my $self = shift;
1754   my ($fileentry) = @_;
1755
1756   die "Not a fileentry: $fileentry"
1757     unless $fileentry->isa('CVS::Utils::ChangeLog::FileEntry');
1758
1759   push @{$self->{files}}, $fileentry;
1760 }
1761
1762 sub files { wantarray ? @{$_[0]->{files}} : $_[0]->{files} }
1763
1764 # ----------------------------------------------------------------------------
1765
1766 package CVS::Utils::ChangeLog::FileEntry;
1767
1768 use File::Basename qw( fileparse );
1769
1770 # Each revision of a file has a little data structure (a `qunk')
1771 # associated with it.  That data structure holds not only the
1772 # file's name, but any additional information about the file
1773 # that might be needed in the output, such as the revision
1774 # number, tags, branches, etc.  The reason to have these things
1775 # arranged in a data structure, instead of just appending them
1776 # textually to the file's name, is that we may want to do a
1777 # little rearranging later as we write the output.  For example,
1778 # all the files on a given tag/branch will go together, followed
1779 # by the tag in parentheses (so trunk or otherwise non-tagged
1780 # files would go at the end of the file list for a given log
1781 # message).  This rearrangement is a lot easier to do if we
1782 # don't have to reparse the text.
1783 #
1784 # A qunk looks like this:
1785 #
1786 #   {
1787 #     filename    =>    "hello.c",
1788 #     revision    =>    "1.4.3.2",
1789 #     time        =>    a timegm() return value (moment of commit)
1790 #     tags        =>    [ "tag1", "tag2", ... ],
1791 #     branch      =>    "branchname" # There should be only one, right?
1792 #     roots       =>    [ "branchtag1", "branchtag2", ... ]
1793 #     lines       =>    "+x -y" # or undefined; x and y are integers
1794 #   }
1795
1796 # Single top-level ChangeLog, or one per subdirectory?
1797 my $distributed;
1798 sub distributed { $#_ ? ($distributed = $_[1]) : $distributed; }
1799
1800 sub new {
1801   my $class = shift;
1802   my ($path, $time, $revision, $state, $lines,
1803       $branch_names, $branch_roots, $branch_numbers, $symbolic_names) = @_;
1804
1805   my %self = (time     => $time,
1806               revision => $revision,
1807               state    => $state,
1808               lines    => $lines,
1809               branch_numbers => $branch_numbers,
1810              );
1811
1812   if ( $distributed ) {
1813     @self{qw(filename dir_key)} = fileparse($path);
1814   } else {
1815     @self{qw(filename dir_key)} = ($path, './');
1816   }
1817
1818   { # Scope for $branch_prefix
1819     (my ($branch_prefix) = ($revision =~ /((?:\d+\.)+)\d+/));
1820     $branch_prefix =~ s/\.$//;
1821     if ( $branch_names->{$branch_prefix} ) {
1822       my $branch_name = $branch_names->{$branch_prefix};
1823       $self{branch}   = $branch_name;
1824       $self{branches} = [$branch_name];
1825     }
1826     while ( $branch_prefix =~ s/^(\d+(?:\.\d+\.\d+)+)\.\d+\.\d+$/$1/ ) {
1827       push @{$self{branches}}, $branch_names->{$branch_prefix}
1828         if exists $branch_names->{$branch_prefix};
1829     }
1830   }
1831
1832   # If there's anything in the @branch_roots array, then this
1833   # revision is the root of at least one branch.  We'll display
1834   # them as branch names instead of revision numbers, the
1835   # substitution for which is done directly in the array:
1836   $self{'roots'} = [ map { $branch_names->{$_} } @$branch_roots ]
1837     if @$branch_roots;
1838
1839   if ( exists $symbolic_names->{$revision} ) {
1840     $self{tags} = delete $symbolic_names->{$revision};
1841     &main::delta_check($time, $self{tags});
1842   }
1843
1844   bless \%self, $class;
1845 }
1846
1847 sub filename       { $_[0]->{filename}       }
1848 sub dir_key        { $_[0]->{dir_key}        }
1849 sub revision       { $_[0]->{revision}       }
1850 sub branch         { $_[0]->{branch}         }
1851 sub state          { $_[0]->{state}          }
1852 sub lines          { $_[0]->{lines}          }
1853 sub roots          { $_[0]->{roots}          }
1854 sub branch_numbers { $_[0]->{branch_numbers} }
1855
1856 sub tags        { $_[0]->{tags}     }
1857 sub tags_exists {
1858   exists $_[0]->{tags};
1859 }
1860
1861 # This may someday be used in a more sophisticated calculation of what other
1862 # files are involved in this commit.  For now, we don't use it much except for
1863 # delta mode, because the common-commit-detection algorithm is hypothesized to
1864 # be "good enough" as it stands.
1865 sub time     { $_[0]->{time}     }
1866
1867 # ----------------------------------------------------------------------------
1868
1869 package CVS::Utils::ChangeLog::EntrySetBuilder;
1870
1871 use File::Basename qw( fileparse );
1872 use Time::Local    qw( timegm );
1873
1874 use constant MAILNAME => "/etc/mailname";
1875
1876 # In 'cvs log' output, one long unbroken line of equal signs separates files:
1877 use constant FILE_SEPARATOR => '=' x 77;# . "\n";
1878 # In 'cvs log' output, a shorter line of dashes separates log messages within
1879 # a file:
1880 use constant REV_SEPARATOR  => '-' x 28;# . "\n";
1881
1882 use constant EMPTY_LOG_MESSAGE => '*** empty log message ***';
1883
1884 # -------------------------------------
1885
1886 sub new {
1887   my ($proto) = @_;
1888   my $class = ref $proto || $proto;
1889
1890   my $poobah  = CVS::Utils::ChangeLog::EntrySet->new;
1891   my $self = bless +{ grand_poobah => $poobah }, $class;
1892
1893   $self->clear_file;
1894   $self->maybe_read_user_map_file;
1895   return $self;
1896 }
1897
1898 # -------------------------------------
1899
1900 sub clear_msg {
1901   my ($self) = @_;
1902
1903   # Make way for the next message
1904   undef $self->{rev_msg};
1905   undef $self->{rev_time};
1906   undef $self->{rev_revision};
1907   undef $self->{rev_author};
1908   undef $self->{rev_state};
1909   undef $self->{lines};
1910   $self->{rev_branch_roots} = [];       # For showing which files are branch
1911                                         # ancestors.
1912   $self->{collecting_symbolic_names} = 0;
1913 }
1914
1915 # -------------------------------------
1916
1917 sub clear_file {
1918   my ($self) = @_;
1919   $self->clear_msg;
1920
1921   undef $self->{filename};
1922   $self->{branch_names}   = +{};        # We'll grab branch names while we're
1923                                         # at it.
1924   $self->{branch_numbers} = +{};        # Save some revisions for
1925                                         # @Follow_Branches
1926   $self->{symbolic_names} = +{};        # Where tag names get stored.
1927 }
1928
1929 # -------------------------------------
1930
1931 sub grand_poobah { $_[0]->{grand_poobah} }
1932
1933 # -------------------------------------
1934
1935 sub read_changelog {
1936   my ($self, $command) = @_;
1937
1938   local (*READER, *WRITER);
1939   my $pid;
1940   if (! $Input_From_Stdin) {
1941     pipe(READER, WRITER)
1942       or die "Couldn't form pipe: $!\n";
1943     $pid = fork;
1944     die "Couldn't fork: $!\n"
1945       if ! defined $pid;
1946     if ( ! $pid ) { # child
1947       open STDOUT, '>&=' . fileno WRITER
1948         or die "Couldn't dup stderr to ", fileno WRITER, "\n";
1949       # strangely, some perls give spurious warnings about STDIN being opened
1950       # for output only these close calls precede the STDOUT reopen above.
1951       # I think they must be reusing fd 1.
1952       close READER;
1953       close STDIN;
1954
1955       exec @$command;
1956     }
1957
1958     close WRITER;
1959
1960     &main::debug ("(run \"@$command\")\n");
1961   }
1962   else {
1963     open READER, '-' or die "unable to open stdin for reading";
1964   }
1965
1966   binmode READER;
1967
1968  XX_Log_Source:
1969   while (<READER>) {
1970     chomp;
1971     s!\r$!!;
1972
1973     # If on a new file and don't see filename, skip until we find it, and
1974     # when we find it, grab it.
1975     if ( ! defined $self->{filename} ) {
1976       $self->read_file_path($_);
1977     } elsif ( /^symbolic names:$/ ) {
1978       $self->{collecting_symbolic_names} = 1;
1979     } elsif ( $self->{collecting_symbolic_names} ) {
1980       $self->read_symbolic_name($_);
1981     } elsif ( $_ eq FILE_SEPARATOR and ! defined $self->{rev_revision} ) {
1982       $self->clear_file;
1983     } elsif ( ! defined $self->{rev_revision} ) {
1984         # If have file name, but not revision, and see revision, then grab
1985         # it.  (We collect unconditionally, even though we may or may not
1986         # ever use it.)
1987       $self->read_revision($_);
1988     } elsif ( ! defined $self->{rev_time} ) { # and /^date: /) {
1989       $self->read_date_author_and_state($_);
1990     } elsif ( /^branches:\s+(.*);$/ ) {
1991       $self->read_branches($1);
1992     } elsif ( ! ( $_ eq FILE_SEPARATOR or $_ eq REV_SEPARATOR ) ) {
1993       # If have file name, time, and author, then we're just grabbing
1994       # log message texts:
1995       $self->{rev_msg} .= $_ . "\n";   # Normally, just accumulate the message...
1996     } else {
1997       my $noadd = 0;
1998       if ( ! $self->{rev_msg}
1999            or $self->{rev_msg} =~ /^\s*(\.\s*)?$/
2000            or index($self->{rev_msg}, EMPTY_LOG_MESSAGE) > -1 ) {
2001         # ... until a msg separator is encountered:
2002         # Ensure the message contains something:
2003         $self->clear_msg, $noadd = 1
2004           if $Prune_Empty_Msgs;
2005         $self->{rev_msg} = "[no log message]\n";
2006       }
2007
2008       $self->add_file_entry
2009         unless $noadd;
2010
2011       if ( $_ eq FILE_SEPARATOR ) {
2012         $self->clear_file;
2013       } else {
2014         $self->clear_msg;
2015       }
2016     }
2017   }
2018
2019   close READER
2020     or die "Couldn't close pipe reader: $!\n";
2021   if ( defined $pid ) {
2022     my $rv;
2023     waitpid $pid, 0;
2024     0 == $?
2025       or $!=1, die sprintf("Problem reading log input (pid/exit/signal/core: %d/%d/%d/%d)\n",
2026                            $pid, $? >> 8, $? & 127, $? & 128);
2027   }
2028   return;
2029 }
2030
2031 # -------------------------------------
2032
2033 sub add_file_entry {
2034   $_[0]->grand_poobah->add_fileentry(@{$_[0]}{qw(filename rev_time rev_revision
2035                                                  rev_state lines branch_names
2036                                                  rev_branch_roots
2037                                                  branch_numbers
2038                                                  symbolic_names
2039                                                  rev_author rev_msg)});
2040 }
2041
2042 # -------------------------------------
2043
2044 sub maybe_read_user_map_file {
2045   my ($self) = @_;
2046
2047   my %expansions;
2048   my $User_Map_Input;
2049
2050   if ($User_Map_File)
2051   {
2052     if ( $User_Map_File =~ m{^([-\w\@+=.,\/]+):([-\w\@+=.,\/:]+)} and
2053          !-f $User_Map_File )
2054     {
2055       my $rsh = (exists $ENV{'CVS_RSH'} ? $ENV{'CVS_RSH'} : 'ssh');
2056       $User_Map_Input = "$rsh $1 'cat $2' |";
2057       &main::debug ("(run \"${User_Map_Input}\")\n");
2058     }
2059     else
2060     {
2061       $User_Map_Input = "<$User_Map_File";
2062     }
2063
2064     open (MAPFILE, $User_Map_Input)
2065         or die ("Unable to open $User_Map_File ($!)");
2066
2067     while (<MAPFILE>)
2068     {
2069       next if /^\s*#/;  # Skip comment lines.
2070       next if not /:/;  # Skip lines without colons.
2071
2072       # It is now safe to split on ':'.
2073       my ($username, $expansion) = split ':';
2074       chomp $expansion;
2075       $expansion =~ s/^'(.*)'$/$1/;
2076       $expansion =~ s/^"(.*)"$/$1/;
2077
2078       # If it looks like the expansion has a real name already, then
2079       # we toss the username we got from CVS log.  Otherwise, keep
2080       # it to use in combination with the email address.
2081
2082       if ($expansion =~ /^\s*<{0,1}\S+@.*/) {
2083         # Also, add angle brackets if none present
2084         if (! ($expansion =~ /<\S+@\S+>/)) {
2085           $expansions{$username} = "$username <$expansion>";
2086         }
2087         else {
2088           $expansions{$username} = "$username $expansion";
2089         }
2090       }
2091       else {
2092         $expansions{$username} = $expansion;
2093       }
2094     } # fi ($User_Map_File)
2095
2096     close (MAPFILE);
2097   }
2098
2099   if (defined $User_Passwd_File)
2100   {
2101     if ( ! defined $Domain ) {
2102       if ( -e MAILNAME ) {
2103         chomp($Domain = slurp_file(MAILNAME));
2104       } else {
2105       MAILDOMAIN_CMD:
2106         for ([qw(hostname -d)], 'dnsdomainname', 'domainname') {
2107           my ($text, $exit, $sig, $core) = run_ext($_);
2108           if ( $exit == 0 && $sig == 0 && $core == 0 ) {
2109             chomp $text;
2110             if ( length $text ) {
2111               $Domain = $text;
2112               last MAILDOMAIN_CMD;
2113             }
2114           }
2115         }
2116       }
2117     }
2118
2119     die "No mail domain found\n"
2120       unless defined $Domain;
2121
2122     open (MAPFILE, "<$User_Passwd_File")
2123         or die ("Unable to open $User_Passwd_File ($!)");
2124     while (<MAPFILE>)
2125     {
2126       # all lines are valid
2127       my ($username, $pw, $uid, $gid, $gecos, $homedir, $shell) = split ':';
2128       my $expansion = '';
2129       ($expansion) = split (',', $gecos)
2130         if defined $gecos && length $gecos;
2131
2132       my $mailname = $Domain eq '' ? $username : "$username\@$Domain";
2133       $expansions{$username} = "$expansion <$mailname>";
2134     }
2135     close (MAPFILE);
2136   }
2137
2138  $self->{usermap} = \%expansions;
2139 }
2140
2141 # -------------------------------------
2142
2143 sub read_file_path {
2144   my ($self, $line) = @_;
2145
2146   my $path;
2147
2148   if ( $line =~ /^Working file: (.*)/ ) {
2149     $path = $1;
2150   } elsif ( defined $RCS_Root
2151             and
2152             $line =~ m|^RCS file: $RCS_Root[/\\](.*),v$| ) {
2153     $path = $1;
2154     $path =~ s!Attic/!!;
2155   } else {
2156     return;
2157   }
2158
2159   if ( @Ignore_Files ) {
2160     my $base;
2161     ($base, undef, undef) = fileparse($path);
2162
2163     my $xpath = $Case_Insensitive ? lc($path) : $path;
2164     return
2165       if grep $path =~ /$_/, @Ignore_Files;
2166   }
2167
2168   $self->{filename} = $path;
2169   return;
2170 }
2171
2172 # -------------------------------------
2173
2174 sub read_symbolic_name {
2175   my ($self, $line) = @_;
2176
2177   # All tag names are listed with whitespace in front in cvs log
2178   # output; so if see non-whitespace, then we're done collecting.
2179   if ( /^\S/ ) {
2180     $self->{collecting_symbolic_names} = 0;
2181     return;
2182   } else {
2183     # we're looking at a tag name, so parse & store it
2184
2185     # According to the Cederqvist manual, in node "Tags", tag names must start
2186     # with an uppercase or lowercase letter and can contain uppercase and
2187     # lowercase letters, digits, `-', and `_'.  However, it's not our place to
2188     # enforce that, so we'll allow anything CVS hands us to be a tag:
2189     my ($tag_name, $tag_rev) = ($line =~ /^\s+([^:]+): ([\d.]+)$/);
2190
2191     # A branch number either has an odd number of digit sections
2192     # (and hence an even number of dots), or has ".0." as the
2193     # second-to-last digit section.  Test for these conditions.
2194     my $real_branch_rev = '';
2195     if ( $tag_rev =~ /^(\d+\.\d+\.)+\d+$/             # Even number of dots...
2196          and
2197          $tag_rev !~ /^(1\.)+1$/ ) {                  # ...but not "1.[1.]1"
2198       $real_branch_rev = $tag_rev;
2199     } elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) {  # Has ".0."
2200       $real_branch_rev = $1 . $3;
2201     }
2202
2203     # If we got a branch, record its number.
2204     if ( $real_branch_rev ) {
2205       $self->{branch_names}->{$real_branch_rev} = $tag_name;
2206       $self->{branch_numbers}->{$tag_name} = $real_branch_rev;
2207     } else {
2208       # Else it's just a regular (non-branch) tag.
2209       push @{$self->{symbolic_names}->{$tag_rev}}, $tag_name;
2210     }
2211   }
2212
2213   $self->{collecting_symbolic_names} = 1;
2214   return;
2215 }
2216
2217 # -------------------------------------
2218
2219 sub read_revision {
2220   my ($self, $line) = @_;
2221
2222   my ($revision) = ( $line =~ /^revision (\d+\.[\d.]+)/ );
2223
2224   return
2225     unless $revision;
2226
2227   $self->{rev_revision} = $revision;
2228   return;
2229 }
2230
2231 # -------------------------------------
2232
2233 { # Closure over %gecos_warned
2234 my %gecos_warned;
2235 sub read_date_author_and_state {
2236   my ($self, $line) = @_;
2237
2238   my ($time, $author, $state) = $self->parse_date_author_and_state($line);
2239
2240   if ( defined($self->{usermap}->{$author}) and $self->{usermap}->{$author} ) {
2241     $author = $self->{usermap}->{$author};
2242   } elsif ( defined $Domain or $Gecos == 1 ) {
2243     my $email = $author;
2244     $email = $author."@".$Domain
2245       if defined $Domain && $Domain ne '';
2246
2247     my $pw = getpwnam($author);
2248     my ($fullname, $office, $workphone, $homephone, $gcos);
2249     if ( defined $pw ) {
2250       $gcos = (getpwnam($author))[6];
2251       ($fullname, $office, $workphone, $homephone) =
2252         split /\s*,\s*/, $gcos;
2253     } else {
2254       warn "Couldn't find gecos info for author '$author'\n"
2255         unless $gecos_warned{$author}++;
2256       $fullname = '';
2257     }
2258     for (grep defined, $fullname, $office, $workphone, $homephone) {
2259       s/&/ucfirst(lc($pw->name))/ge;
2260     }
2261     $author = $fullname . "  <" . $email . ">"
2262       if $fullname ne '';
2263   }
2264
2265   $self->{rev_state}  = $state;
2266   $self->{rev_time}   = $time;
2267   $self->{rev_author} = $author;
2268   return;
2269 }
2270 }
2271
2272 # -------------------------------------
2273
2274 sub read_branches {
2275   # A "branches: ..." line here indicates that one or more branches
2276   # are rooted at this revision.  If we're showing branches, then we
2277   # want to show that fact as well, so we collect all the branches
2278   # that this is the latest ancestor of and store them in
2279   # $self->[rev_branch_roots}.  Just for reference, the format of the
2280   # line we're seeing at this point is:
2281   #
2282   #    branches:  1.5.2;  1.5.4;  ...;
2283   #
2284   # Okay, here goes:
2285   my ($self, $line) = @_;
2286
2287   # Ugh.  This really bothers me.  Suppose we see a log entry
2288   # like this:
2289   #
2290   #    ----------------------------
2291   #    revision 1.1
2292   #    date: 1999/10/17 03:07:38;  author: jrandom;  state: Exp;
2293   #    branches:  1.1.2;
2294   #    Intended first line of log message begins here.
2295   #    ----------------------------
2296   #
2297   # The question is, how we can tell the difference between that
2298   # log message and a *two*-line log message whose first line is
2299   #
2300   #    "branches:  1.1.2;"
2301   #
2302   # See the problem?  The output of "cvs log" is inherently
2303   # ambiguous.
2304   #
2305   # For now, we punt: we liberally assume that people don't
2306   # write log messages like that, and just toss a "branches:"
2307   # line if we see it but are not showing branches.  I hope no
2308   # one ever loses real log data because of this.
2309   if ( $Show_Branches ) {
2310     $line =~ s/(1\.)+1;|(1\.)+1$//;  # ignore the trivial branch 1.1.1
2311     $self->{rev_branch_roots} = [split /;\s+/, $line]
2312       if length $line;
2313   }
2314 }
2315
2316 # -------------------------------------
2317
2318 sub parse_date_author_and_state {
2319   my ($self, $line) = @_;
2320   # Parses the date/time and author out of a line like:
2321   #
2322   # date: 1999/02/19 23:29:05;  author: apharris;  state: Exp;
2323   #
2324   # or, in CVS 1.12.9:
2325   #
2326   # date: 2004-06-05 16:10:32 +0000; author: somebody; state: Exp;
2327
2328   my ($year, $mon, $mday, $hours, $min, $secs, $utcOffset, $author, $state, $rest) =
2329     $line =~
2330       m!(\d+)[-/](\d+)[-/](\d+)\s+(\d+):(\d+):(\d+)(\s+[+-]\d{4})?;\s+
2331         author:\s+([^;]+);\s+state:\s+([^;]+);(.*)!x
2332     or  die "Couldn't parse date ``$line''";
2333   die "Bad date or Y2K issues"
2334     unless $year > 1969 and $year < 2258;
2335   # Kinda arbitrary, but useful as a sanity check
2336   my $time = timegm($secs, $min, $hours, $mday, $mon-1, $year-1900);
2337   if ( defined $utcOffset ) {
2338     my ($plusminus, $hour, $minute) = ($utcOffset =~ m/([+-])(\d\d)(\d\d)/);
2339     my $offset = (($hour * 60) + $minute) * 60 * ($plusminus eq '+' ? -1 : 1);
2340     $time += $offset;
2341   }
2342   if ( $rest =~ m!\s+lines:\s+(.*)! ) {
2343     $self->{lines} = $1;
2344   }
2345
2346   return $time, $author, $state;
2347 }
2348
2349 # Subrs ----------------------------------------------------------------------
2350
2351 package main;
2352
2353 sub delta_check {
2354   my ($time, $tags) = @_;
2355
2356   # If we're in 'delta' mode, update the latest observed times for the
2357   # beginning and ending tags, and when we get around to printing output, we
2358   # will simply restrict ourselves to that timeframe...
2359   return
2360     unless $Delta_Mode;
2361
2362   $Delta_StartTime = $time
2363     if $time > $Delta_StartTime and grep { $_ eq $Delta_From } @$tags;
2364
2365   $Delta_EndTime = $time
2366     if $time > $Delta_EndTime and grep { $_ eq $Delta_To } @$tags;
2367 }
2368
2369 sub run_ext {
2370   my ($cmd) = @_;
2371   $cmd = [$cmd]
2372     unless ref $cmd;
2373   local $" = ' ';
2374   my $out = qx"@$cmd 2>&1";
2375   my $rv  = $?;
2376   my ($sig, $core, $exit) = ($? & 127, $? & 128, $? >> 8);
2377   return $out, $exit, $sig, $core;
2378 }
2379
2380 # -------------------------------------
2381
2382 # If accumulating, grab the boundary date from pre-existing ChangeLog.
2383 sub maybe_grab_accumulation_date {
2384   if (! $Cumulative || $Update) {
2385     return '';
2386   }
2387
2388   # else
2389
2390   open (LOG, "$Log_File_Name")
2391       or die ("trouble opening $Log_File_Name for reading ($!)");
2392
2393   my $boundary_date;
2394   while (<LOG>)
2395   {
2396     if (/^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/)
2397     {
2398       $boundary_date = "$1";
2399       last;
2400     }
2401   }
2402
2403   close (LOG);
2404
2405   # convert time from utc to local timezone if the ChangeLog has
2406   # dates/times in utc
2407   if ($UTC_Times && $boundary_date)
2408   {
2409     # convert the utc time to a time value
2410     my ($year,$mon,$mday,$hour,$min) = $boundary_date =~
2411       m#(\d+)-(\d+)-(\d+)\s+(\d+):(\d+)#;
2412     my $time = timegm(0,$min,$hour,$mday,$mon-1,$year-1900);
2413     # print the timevalue in the local timezone
2414     my ($ignore,$wday);
2415     ($ignore,$min,$hour,$mday,$mon,$year,$wday) = localtime($time);
2416     $boundary_date=sprintf ("%4u-%02u-%02u %02u:%02u",
2417                             $year+1900,$mon+1,$mday,$hour,$min);
2418   }
2419
2420   return $boundary_date;
2421 }
2422
2423 # -------------------------------------
2424
2425 # Fills up a ChangeLog structure in the current directory.
2426 sub derive_changelog {
2427   my ($command) = @_;
2428
2429   # See "The Plan" above for a full explanation.
2430
2431   # Might be adding to an existing ChangeLog
2432   my $accumulation_date = maybe_grab_accumulation_date;
2433   if ($accumulation_date) {
2434     # Insert -d immediately after 'cvs log'
2435     my $Log_Date_Command = "-d>${accumulation_date}";
2436
2437     my ($log_index) = grep $command->[$_] eq 'log', 0..$#$command;
2438     splice @$command, $log_index+1, 0, $Log_Date_Command;
2439     &debug ("(adding log msg starting from $accumulation_date)\n");
2440   }
2441
2442 #  output_changelog(read_changelog($command));
2443   my $builder = CVS::Utils::ChangeLog::EntrySetBuilder->new;
2444   $builder->read_changelog($command);
2445   $builder->grand_poobah->output_changelog;
2446 }
2447
2448 # -------------------------------------
2449
2450 sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
2451
2452 # -------------------------------------
2453
2454 sub common_path_prefix {
2455   my ($path1, $path2) = @_;
2456
2457   # For compatibility (with older versions of cvs2cl.pl), we think in UN*X
2458   # terms, and mould windoze filenames to match.  Is this really appropriate?
2459   # If a file is checked in under UN*X, and cvs log run on windoze, which way
2460   # do the path separators slope?  Can we use fileparse as per the local
2461   # conventions?  If so, we should probably have a user option to specify an
2462   # OS to emulate to handle stdin-fed logs.  If we did this, we could avoid
2463   # the nasty \-/ transmogrification below.
2464
2465   my ($dir1, $dir2) = map +(fileparse($_))[1], $path1, $path2;
2466
2467   # Transmogrify Windows filenames to look like Unix.
2468   # (It is far more likely that someone is running cvs2cl.pl under
2469   # Windows than that they would genuinely have backslashes in their
2470   # filenames.)
2471   tr!\\!/!
2472     for $dir1, $dir2;
2473
2474   my ($accum1, $accum2, $last_common_prefix) = ('') x 3;
2475
2476   my @path1 = grep length($_), split qr!/!, $dir1;
2477   my @path2 = grep length($_), split qr!/!, $dir2;
2478
2479   my @common_path;
2480   for (0..min($#path1,$#path2)) {
2481     if ( $path1[$_] eq $path2[$_]) {
2482       push @common_path, $path1[$_];
2483     } else {
2484       last;
2485     }
2486   }
2487
2488   return join '', map "$_/", @common_path;
2489 }
2490
2491 # -------------------------------------
2492
2493 sub parse_options {
2494   # Check this internally before setting the global variable.
2495   my $output_file;
2496
2497   # If this gets set, we encountered unknown options and will exit at
2498   # the end of this subroutine.
2499   my $exit_with_admonishment = 0;
2500
2501   # command to generate the log
2502   my @log_source_command = qw( cvs log );
2503
2504   my (@Global_Opts, @Local_Opts);
2505
2506   Getopt::Long::Configure(qw( bundling permute no_getopt_compat
2507                               pass_through no_ignore_case ));
2508   GetOptions('help|usage|h'   => \$Print_Usage,
2509              'debug'          => \$Debug,        # unadvertised option, heh
2510              'version'        => \$Print_Version,
2511
2512              'file|f=s'       => \$output_file,
2513              'accum'          => \$Cumulative,
2514              'update'         => \$Update,
2515              'fsf'            => \$FSF_Style,
2516              'rcs=s'          => \$RCS_Root,
2517              'usermap|U=s'    => \$User_Map_File,
2518              'gecos'          => \$Gecos,
2519              'domain=s'       => \$Domain,
2520              'passwd=s'       => \$User_Passwd_File,
2521              'window|W=i'     => \$Max_Checkin_Duration,
2522              'chrono'         => \$Chronological_Order,
2523              'ignore|I=s'     => \@Ignore_Files,
2524              'case-insensitive|C' => \$Case_Insensitive,
2525              'regexp|R=s'     => \$Regexp_Gate,
2526              'stdin'          => \$Input_From_Stdin,
2527              'stdout'         => \$Output_To_Stdout,
2528              'distributed|d'  => sub { CVS::Utils::ChangeLog::FileEntry->distributed(1) },
2529              'prune|P'        => \$Prune_Empty_Msgs,
2530              'no-wrap'        => \$No_Wrap,
2531              'gmt|utc'        => \$UTC_Times,
2532              'day-of-week|w'  => \$Show_Day_Of_Week,
2533              'revisions|r'    => \$Show_Revisions,
2534              'show-dead'      => \$Show_Dead,
2535              'tags|t'         => \$Show_Tags,
2536              'tagdates|T'     => \$Show_Tag_Dates,
2537              'branches|b'     => \$Show_Branches,
2538              'follow|F=s'     => \@Follow_Branches,
2539              'follow-only=s'  => \@Follow_Only,
2540              'xml-encoding=s' => \$XML_Encoding,
2541              'xml'            => \$XML_Output,
2542              'noxmlns'        => \$No_XML_Namespace,
2543              'no-xml-iso-date' => \$No_XML_ISO_Date,
2544              'no-ancestors'   => \$No_Ancestors,
2545              'lines-modified' => \$Show_Lines_Modified,
2546
2547              'no-indent'    => sub {
2548                $Indent = '';
2549              },
2550
2551              'summary'      => sub {
2552                $Summary = 1;
2553                $After_Header = "\n\n"; # Summary implies --separate-header
2554              },
2555
2556              'no-times'     => sub {
2557                $Show_Times = 0;
2558              },
2559
2560              'no-hide-branch-additions' => sub {
2561                $Hide_Branch_Additions = 0;
2562              },
2563
2564              'no-common-dir'  => sub {
2565                $Common_Dir = 0;
2566              },
2567
2568              'ignore-tag=s'   => sub {
2569                $ignore_tags{$_[1]} = 1;
2570              },
2571
2572              'show-tag=s'     => sub {
2573                $show_tags{$_[1]} = 1;
2574              },
2575
2576              # Deliberately undocumented.  This is not a public interface, and
2577              # may change/disappear at any time.
2578              'test-code=s'    => \$TestCode,
2579
2580              'delta=s'        => sub {
2581                my $arg = $_[1];
2582                if ( $arg =~
2583                     /^([A-Za-z][A-Za-z0-9_\-\]\[]*):([A-Za-z][A-Za-z0-9_\-\]\[]*)$/ ) {
2584                  $Delta_From = $1;
2585                  $Delta_To = $2;
2586                  $Delta_Mode = 1;
2587                } else {
2588                  die "--delta FROM_TAG:TO_TAG is what you meant to say.\n";
2589                }
2590              },
2591
2592              'FSF'             => sub {
2593                $Show_Times = 0;
2594                $Common_Dir = 0;
2595                $No_Extra_Indent = 1;
2596                $Indent = "\t";
2597              },
2598
2599              'header=s'        => sub {
2600                my $narg = $_[1];
2601                $ChangeLog_Header = &slurp_file ($narg);
2602                if (! defined ($ChangeLog_Header)) {
2603                  $ChangeLog_Header = '';
2604                }
2605              },
2606
2607              'global-opts|g=s' => sub {
2608                my $narg = $_[1];
2609                push @Global_Opts, $narg;
2610                splice @log_source_command, 1, 0, $narg;
2611              },
2612
2613              'log-opts|l=s' => sub {
2614                my $narg = $_[1];
2615                push @Local_Opts, $narg;
2616                push @log_source_command, $narg;
2617              },
2618
2619              'mailname=s'   => sub {
2620                my $narg = $_[1];
2621                warn "--mailname is deprecated; please use --domain instead\n";
2622                $Domain = $narg;
2623              },
2624
2625              'separate-header|S' => sub {
2626                $After_Header = "\n\n";
2627                $No_Extra_Indent = 1;
2628              },
2629
2630              'group-within-date' => sub {
2631                $GroupWithinDate = 1;
2632                $Show_Times = 0;
2633              },
2634
2635              'hide-filenames' => sub {
2636                $Hide_Filenames = 1;
2637                $After_Header = '';
2638              },
2639             )
2640     or die "options parsing failed\n";
2641
2642   push @log_source_command, map "$_", @ARGV;
2643
2644   ## Check for contradictions...
2645
2646   if ($Output_To_Stdout && CVS::Utils::ChangeLog::FileEntry->distributed) {
2647     print STDERR "cannot pass both --stdout and --distributed\n";
2648     $exit_with_admonishment = 1;
2649   }
2650
2651   if ($Output_To_Stdout && $output_file) {
2652     print STDERR "cannot pass both --stdout and --file\n";
2653     $exit_with_admonishment = 1;
2654   }
2655
2656   if ($Input_From_Stdin && @Global_Opts) {
2657     print STDERR "cannot pass both --stdin and -g\n";
2658     $exit_with_admonishment = 1;
2659   }
2660
2661   if ($Input_From_Stdin && @Local_Opts) {
2662     print STDERR "cannot pass both --stdin and -l\n";
2663     $exit_with_admonishment = 1;
2664   }
2665
2666   if ($XML_Output && $Cumulative) {
2667     print STDERR "cannot pass both --xml and --accum\n";
2668     $exit_with_admonishment = 1;
2669   }
2670
2671   # Other consistency checks and option-driven logic
2672
2673   # Bleargh.  Compensate for a deficiency of custom wrapping.
2674   if ( ($After_Header ne " ") and $FSF_Style ) {
2675     $After_Header .= "\t";
2676   }
2677
2678   @Ignore_Files = map lc, @Ignore_Files
2679     if $Case_Insensitive;
2680
2681   # Or if any other error message has already been printed out, we
2682   # just leave now:
2683   if ($exit_with_admonishment) {
2684     &usage ();
2685     exit (1);
2686   }
2687   elsif ($Print_Usage) {
2688     &usage ();
2689     exit (0);
2690   }
2691   elsif ($Print_Version) {
2692     &version ();
2693     exit (0);
2694   }
2695
2696   ## Else no problems, so proceed.
2697
2698   if ($output_file) {
2699     $Log_File_Name = $output_file;
2700   }
2701
2702   return \@log_source_command;
2703 }
2704
2705 # -------------------------------------
2706
2707 sub slurp_file {
2708   my $filename = shift || die ("no filename passed to slurp_file()");
2709   my $retstr;
2710
2711   open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)");
2712   local $/ = undef;
2713   $retstr = <SLURPEE>;
2714   close (SLURPEE);
2715   return $retstr;
2716 }
2717
2718 # -------------------------------------
2719
2720 sub debug {
2721   if ($Debug) {
2722     my $msg = shift;
2723     print STDERR $msg;
2724   }
2725 }
2726
2727 # -------------------------------------
2728
2729 sub version {
2730   print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n";
2731 }
2732
2733 # -------------------------------------
2734
2735 sub usage {
2736   &version ();
2737
2738   eval "use Pod::Usage qw( pod2usage )";
2739
2740    if ( $@ ) {
2741     print <<'END';
2742
2743 * Pod::Usage was not found.  The formatting may be suboptimal.  Consider
2744   upgrading your Perl --- Pod::Usage is standard from 5.6 onwards, and
2745   versions of perl prior to 5.6 are getting rather rusty, now.  Alternatively,
2746   install Pod::Usage direct from CPAN.
2747 END
2748
2749     local $/ = undef;
2750     my $message = <DATA>;
2751     $message =~ s/^=(head1|item) //gm;
2752     $message =~ s/^=(over|back).*\n//gm;
2753     $message =~ s/\n{3,}/\n\n/g;
2754     print $message;
2755   } else {
2756     print "\n";
2757     pod2usage( -exitval => 'NOEXIT',
2758                -verbose => 1,
2759                -output  => \*STDOUT,
2760              );
2761   }
2762
2763   return;
2764 }
2765
2766 # Main -----------------------------------------------------------------------
2767
2768 my $log_source_command = parse_options;
2769 if ( defined $TestCode ) {
2770   eval $TestCode;
2771   die "Eval failed: '$@'\n"
2772     if $@;
2773 } else {
2774   derive_changelog($log_source_command);
2775 }
2776
2777 __DATA__
2778
2779 =head1 NAME
2780
2781 cvs2cl.pl - convert cvs log messages to changelogs
2782
2783 =head1 SYNOPSIS
2784
2785 B<cvs2cl> [I<options>] [I<FILE1> [I<FILE2> ...]]
2786
2787 =head1 DESCRIPTION
2788
2789 cvs2cl produces a GNU-style ChangeLog for CVS-controlled sources by
2790 running "cvs log" and parsing the output. Duplicate log messages get
2791 unified in the Right Way.
2792
2793 The default output of cvs2cl is designed to be compact, formally unambiguous,
2794 but still easy for humans to read.  It should be largely self-explanatory; the
2795 one abbreviation that might not be obvious is "utags".  That stands for
2796 "universal tags" -- a universal tag is one held by all the files in a given
2797 change entry.
2798
2799 If you need output that's easy for a program to parse, use the B<--xml> option.
2800 Note that with XML output, just about all available information is included
2801 with each change entry, whether you asked for it or not, on the theory that
2802 your parser can ignore anything it's not looking for.
2803
2804 If filenames are given as arguments cvs2cl only shows log information for the
2805 named files.
2806
2807 =head1 OPTIONS
2808
2809 =over 4
2810
2811 =item B<-h>, B<-help>, B<--help>, B<-?>
2812
2813 Show a short help and exit.
2814
2815 =item B<--version>
2816
2817 Show version and exit.
2818
2819 =item B<-r>, B<--revisions>
2820
2821 Show revision numbers in output.
2822
2823 =item B<-b>, B<--branches>
2824
2825 Show branch names in revisions when possible.
2826
2827 =item B<-t>, B<--tags>
2828
2829 Show tags (symbolic names) in output.
2830
2831 =item B<-T>, B<--tagdates>
2832
2833 Show tags in output on their first occurance.
2834
2835 =item B<--show-dead>
2836
2837 Show dead files.
2838
2839 =item B<--stdin>
2840
2841 Read from stdin, don't run cvs log.
2842
2843 =item B<--stdout>
2844
2845 Output to stdout not to ChangeLog.
2846
2847 =item B<-d>, B<--distributed>
2848
2849 Put ChangeLogs in subdirs.
2850
2851 =item B<-f> I<FILE>, B<--file> I<FILE>
2852
2853 Write to I<FILE> instead of ChangeLog.
2854
2855 =item B<--fsf>
2856
2857 Use this if log data is in FSF ChangeLog style.
2858
2859 =item B<--FSF>
2860
2861 Attempt strict FSF-standard compatible output.
2862
2863 =item B<-W> I<SECS>, B<--window> I<SECS>
2864
2865 Window of time within which log entries unify.
2866
2867 =item -B<U> I<UFILE>, B<--usermap> I<UFILE>
2868
2869 Expand usernames to email addresses from I<UFILE>.
2870
2871 =item B<--passwd> I<PASSWORDFILE>
2872
2873 Use system passwd file for user name expansion.  If no mail domain is provided
2874 (via B<--domain>), it tries to read one from B</etc/mailname>, output of B<hostname
2875 -d>, B<dnsdomainname>, or B<domain-name>.  cvs2cl exits with an error if none of
2876 those options is successful. Use a domain of '' to prevent the addition of a
2877 mail domain.
2878
2879 =item B<--domain> I<DOMAIN>
2880
2881 Domain to build email addresses from.
2882
2883 =item B<--gecos>
2884
2885 Get user information from GECOS data.
2886
2887 =item B<-R> I<REGEXP>, B<--regexp> I<REGEXP>
2888
2889 Include only entries that match I<REGEXP>.  This option may be used multiple
2890 times.
2891
2892 =item B<-I> I<REGEXP>, B<--ignore> I<REGEXP>
2893
2894 Ignore files whose names match I<REGEXP>.  This option may be used multiple
2895 times.  The regexp is a perl regular expression.  It is matched as is; you may
2896 want to prefix with a ^ or suffix with a $ to anchor the match.
2897
2898 =item B<-C>, B<--case-insensitive>
2899
2900 Any regexp matching is done case-insensitively.
2901
2902 =item B<-F> I<BRANCH>, B<--follow> I<BRANCH>
2903
2904 Show only revisions on or ancestral to I<BRANCH>.
2905
2906 =item B<--follow-only> I<BRANCH>
2907
2908 Like --follow, but sub-branches are not followed.
2909
2910 =item B<--no-ancestors>
2911
2912 When using B<-F>, only track changes since the I<BRANCH> started.
2913
2914 =item B<--no-hide-branch-additions>
2915
2916 By default, entries generated by cvs for a file added on a branch (a dead 1.1
2917 entry) are not shown.  This flag reverses that action.
2918
2919 =item B<-S>, B<--separate-header>
2920
2921 Blank line between each header and log message.
2922
2923 =item B<--summary>
2924
2925 Add CVS change summary information.
2926
2927 =item B<--no-wrap>
2928
2929 Don't auto-wrap log message (recommend B<-S> also).
2930
2931 =item B<--no-indent>
2932
2933 Don't indent log message
2934
2935 =item B<--gmt>, B<--utc>
2936
2937 Show times in GMT/UTC instead of local time.
2938
2939 =item B<--accum>
2940
2941 Add to an existing ChangeLog (incompatible with B<--xml>).
2942
2943 =item B<-w>, B<--day-of-week>
2944
2945 Show day of week.
2946
2947 =item B<--no-times>
2948
2949 Don't show times in output.
2950
2951 =item B<--chrono>
2952
2953 Output log in chronological order (default is reverse chronological order).
2954
2955 =item B<--header> I<FILE>
2956
2957 Get ChangeLog header from I<FILE> ("B<->" means stdin).
2958
2959 =item B<--xml>
2960
2961 Output XML instead of ChangeLog format.
2962
2963 =item B<--xml-encoding> I<ENCODING.>
2964
2965 Insert encoding clause in XML header.
2966
2967 =item B<--noxmlns>
2968
2969 Don't include xmlns= attribute in root element.
2970
2971 =item B<--hide-filenames>
2972
2973 Don't show filenames (ignored for XML output).
2974
2975 =item B<--no-common-dir>
2976
2977 Don't shorten directory names from filenames.
2978
2979 =item B<--rcs> I<CVSROOT>
2980
2981 Handle filenames from raw RCS, for instance those produced by "cvs rlog"
2982 output, stripping the prefix I<CVSROOT>.
2983
2984 =item B<-P>, B<--prune>
2985
2986 Don't show empty log messages.
2987
2988 =item B<--lines-modified>
2989
2990 Output the number of lines added and the number of lines removed for
2991 each checkin (if applicable). At the moment, this only affects the
2992 XML output mode.
2993
2994 =item B<--ignore-tag> I<TAG>
2995
2996 Ignore individual changes that are associated with a given tag.
2997 May be repeated, if so, changes that are associated with any of
2998 the given tags are ignored.
2999
3000 =item B<--show-tag> I<TAG>
3001
3002 Log only individual changes that are associated with a given
3003 tag.  May be repeated, if so, changes that are associated with
3004 any of the given tags are logged.
3005
3006 =item B<--delta> I<FROM_TAG>B<:>I<TO_TAG>
3007
3008 Attempt a delta between two tags (since I<FROM_TAG> up to and
3009 including I<TO_TAG>).  The algorithm is a simple date-based one
3010 (this is a hard problem) so results are imperfect.
3011
3012 =item B<-g> I<OPTS>, B<--global-opts> I<OPTS>
3013
3014 Pass I<OPTS> to cvs like in "cvs I<OPTS> log ...".
3015
3016 =item B<-l> I<OPTS>, B<--log-opts> I<OPTS>
3017
3018 Pass I<OPTS> to cvs log like in "cvs ... log I<OPTS>".
3019
3020 =back
3021
3022 Notes about the options and arguments:
3023
3024 =over 4
3025
3026 =item *
3027
3028 The B<-I> and B<-F> options may appear multiple times.
3029
3030 =item *
3031
3032 To follow trunk revisions, use "B<-F trunk>" ("B<-F TRUNK>" also works).  This is
3033 okay because no would ever, ever be crazy enough to name a branch "trunk",
3034 right?  Right.
3035
3036 =item *
3037
3038 For the B<-U> option, the I<UFILE> should be formatted like CVSROOT/users. That is,
3039 each line of I<UFILE> looks like this:
3040
3041        jrandom:jrandom@red-bean.com
3042
3043 or maybe even like this
3044
3045        jrandom:'Jesse Q. Random <jrandom@red-bean.com>'
3046
3047 Don't forget to quote the portion after the colon if necessary.
3048
3049 =item *
3050
3051 Many people want to filter by date.  To do so, invoke cvs2cl.pl like this:
3052
3053        cvs2cl.pl -l "-d'DATESPEC'"
3054
3055 where DATESPEC is any date specification valid for "cvs log -d".  (Note that
3056 CVS 1.10.7 and below requires there be no space between -d and its argument).
3057
3058 =item *
3059
3060 Dates/times are interpreted in the local time zone.
3061
3062 =item *
3063
3064 Remember to quote the argument to `B<-l>' so that your shell doesn't interpret
3065 spaces as argument separators.
3066
3067 =item *
3068
3069 See the 'Common Options' section of the cvs manual ('info cvs' on UNIX-like
3070 systems) for more information.
3071
3072 =item *
3073
3074 Note that the rules for quoting under windows shells are different.
3075
3076 =item *
3077
3078 To run in an automated environment such as CGI or PHP, suidperl may be needed
3079 in order to execute as the correct user to enable /cvsroot read lock files to
3080 be written for the 'cvs log' command.  This is likely just a case of changing
3081 the /usr/bin/perl command to /usr/bin/suidperl, and explicitly declaring the
3082 PATH variable.
3083
3084 =back
3085
3086 =head1 EXAMPLES
3087
3088 Some examples (working on UNIX shells):
3089
3090       # logs after 6th March, 2003 (inclusive)
3091       cvs2cl.pl -l "-d'>2003-03-06'"
3092       # logs after 4:34PM 6th March, 2003 (inclusive)
3093       cvs2cl.pl -l "-d'>2003-03-06 16:34'"
3094       # logs between 4:46PM 6th March, 2003 (exclusive) and
3095       # 4:34PM 6th March, 2003 (inclusive)
3096       cvs2cl.pl -l "-d'2003-03-06 16:46>2003-03-06 16:34'"
3097
3098 Some examples (on non-UNIX shells):
3099
3100       # Reported to work on windows xp/2000
3101       cvs2cl.pl -l  "-d"">2003-10-18;today<"""
3102
3103 =head1 AUTHORS
3104
3105 =over 4
3106
3107 =item Karl Fogel
3108
3109 =item Melissa O'Neill
3110
3111 =item Martyn J. Pearce
3112
3113 =back
3114
3115 Contributions from
3116
3117 =over 4
3118
3119 =item Mike Ayers
3120
3121 =item Tim Bradshaw
3122
3123 =item Richard Broberg
3124
3125 =item Nathan Bryant
3126
3127 =item Oswald Buddenhagen
3128
3129 =item Neil Conway
3130
3131 =item Arthur de Jong
3132
3133 =item Mark W. Eichin
3134
3135 =item Dave Elcock
3136
3137 =item Reid Ellis
3138
3139 =item Simon Josefsson
3140
3141 =item Robin Hugh Johnson
3142
3143 =item Terry Kane
3144
3145 =item Pete Kempf
3146
3147 =item Akos Kiss
3148
3149 =item Claus Klein
3150
3151 =item Eddie Kohler
3152
3153 =item Richard Laager
3154
3155 =item Kevin Lilly
3156
3157 =item Karl-Heinz Marbaise
3158
3159 =item Mitsuaki Masuhara
3160
3161 =item Henrik Nordstrom
3162
3163 =item Joe Orton
3164
3165 =item Peter Palfrader
3166
3167 =item Thomas Parmelan
3168
3169 =item Jordan Russell
3170
3171 =item Jacek Sliwerski
3172
3173 =item Johannes Stezenbach
3174
3175 =item Joseph Walton
3176
3177 =item Ernie Zapata
3178
3179 =back
3180
3181 =head1 BUGS
3182
3183 Please report bugs to C<bug-cvs2cl@red-bean.com>.
3184
3185 =head1 PREREQUISITES
3186
3187 This script requires C<Text::Wrap>, C<Time::Local>, and C<File::Basename>.  It
3188 also seems to require C<Perl 5.004_04> or higher.
3189
3190 =head1 OPERATING SYSTEM COMPATIBILITY
3191
3192 Should work on any OS.
3193
3194 =head1 SCRIPT CATEGORIES
3195
3196 Version_Control/CVS
3197
3198 =head1 COPYRIGHT
3199
3200 (C) 2001,2002,2003,2004 Martyn J. Pearce <fluffy@cpan.org>, under the GNU GPL.
3201
3202 (C) 1999 Karl Fogel <kfogel@red-bean.com>, under the GNU GPL.
3203
3204 cvs2cl.pl is free software; you can redistribute it and/or modify
3205 it under the terms of the GNU General Public License as published by
3206 the Free Software Foundation; either version 2, or (at your option)
3207 any later version.
3208
3209 cvs2cl.pl is distributed in the hope that it will be useful,
3210 but WITHOUT ANY WARRANTY; without even the implied warranty of
3211 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
3212 GNU General Public License for more details.
3213
3214 You may have received a copy of the GNU General Public License
3215 along with cvs2cl.pl; see the file COPYING.  If not, write to the
3216 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
3217 Boston, MA 02111-1307, USA.
3218
3219 =head1 SEE ALSO
3220
3221 cvs(1)
3222