#!/bin/sh
exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
#!perl -w
##############################################################
### ###
### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
### ###
##############################################################
## $Revision: 2.59 $
## $Date: 2005/05/18 05:34:34 $
## $Author: kfogel $
##
use strict;
use File::Basename qw( fileparse );
use Getopt::Long qw( GetOptions );
use Text::Wrap qw( );
use Time::Local qw( timegm );
use User::pwent qw( getpwnam );
# The Plan:
#
# Read in the logs for multiple files, spit out a nice ChangeLog that
# mirrors the information entered during `cvs commit'.
#
# The problem presents some challenges. In an ideal world, we could
# detect files with the same author, log message, and checkin time --
# each <filelist, author, time, logmessage> would be a changelog entry.
# We'd sort them; and spit them out. Unfortunately, CVS is *not atomic*
# so checkins can span a range of times. Also, the directory structure
# could be hierarchical.
#
# Another question is whether we really want to have the ChangeLog
# exactly reflect commits. An author could issue two related commits,
# with different log entries, reflecting a single logical change to the
# source. GNU style ChangeLogs group these under a single author/date.
# We try to do the same.
#
# So, we parse the output of `cvs log', storing log messages in a
# multilevel hash that stores the mapping:
# directory => author => time => message => filelist
# As we go, we notice "nearby" commit times and store them together
# (i.e., under the same timestamp), so they appear in the same log
# entry.
#
# When we've read all the logs, we twist this mapping into
# a time => author => message => filelist mapping for each directory.
#
# If we're not using the `--distributed' flag, the directory is always
# considered to be `./', even as descend into subdirectories.
# Call Tree
# name number of lines (10.xii.03)
# parse_options 192
# derive_changelog 13
# +-maybe_grab_accumulation_date 38
# +-read_changelog 277
# +-maybe_read_user_map_file 94
# +-run_ext 9
# +-read_file_path 29
# +-read_symbolic_name 43
# +-read_revision 49
# +-read_date_author_and_state 25
# +-parse_date_author_and_state 20
# +-read_branches 36
# +-output_changelog 424
# +-pretty_file_list 290
# +-common_path_prefix 35
# +-preprocess_msg_text 30
# +-min 1
# +-mywrap 16
# +-last_line_len 5
# +-wrap_log_entry 177
#
# Utilities
#
# xml_escape 6
# slurp_file 11
# debug 5
# version 2
# usage 142
# -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
#
# Note about a bug-slash-opportunity:
# -----------------------------------
#
# There's a bug in Text::Wrap, which affects cvs2cl. This script
# reveals it:
#
# #!/usr/bin/perl -w
#
# use Text::Wrap;
#
# my $test_text =
# "This script demonstrates a bug in Text::Wrap. The very long line
# following this paragraph will be relocated relative to the surrounding
# text:
#
# ====================================================================
#
# See? When the bug happens, we'll get the line of equal signs below
# this paragraph, even though it should be above.";
#
#
# # Print out the test text with no wrapping:
# print "$test_text";
# print "\n";
# print "\n";
#
# # Now print it out wrapped, and see the bug:
# print wrap ("\t", " ", "$test_text");
# print "\n";
# print "\n";
#
# If the line of equal signs were one shorter, then the bug doesn't
# happen. Interesting.
#
# Anyway, rather than fix this in Text::Wrap, we might as well write a
# new wrap() which has the following much-needed features:
#
# * initial indentation, like current Text::Wrap()
# * subsequent line indentation, like current Text::Wrap()
# * user chooses among: force-break long words, leave them alone, or die()?
# * preserve existing indentation: chopped chunks from an indented line
# are indented by same (like this line, not counting the asterisk!)
# * optional list of things to preserve on line starts, default ">"
#
# Note that the last two are essentially the same concept, so unify in
# implementation and give a good interface to controlling them.
#
# And how about:
#
# Optionally, when encounter a line pre-indented by same as previous
# line, then strip the newline and refill, but indent by the same.
# Yeah...
# Globals --------------------------------------------------------------------
# In case we have to print it out:
my $VERSION = '$Revision: 2.59 $';
$VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
## Vars set by options:
# Print debugging messages?
my $Debug = 0;
# Just show version and exit?
my $Print_Version = 0;
# Just print usage message and exit?
my $Print_Usage = 0;
# What file should we generate (defaults to "ChangeLog")?
my $Log_File_Name = "ChangeLog";
# Grab most recent entry date from existing ChangeLog file, just add
# to that ChangeLog.
my $Cumulative = 0;
# `cvs log -d`, this will repeat the last entry in the old log. This is OK,
# as it guarantees at least one entry in the update changelog, which means
# that there will always be a date to extract for the next update. The repeat
# entry can be removed in postprocessing, if necessary.
# MJP 2003-08-02
# I don't think this actually does anything useful
my $Update = 0;
# Expand usernames to email addresses based on a map file?
my $User_Map_File = '';
my $User_Passwd_File;
my $Mail_Domain;
# Output log in chronological order? [default is reverse chronological order]
my $Chronological_Order = 0;
# Grab user details via gecos
my $Gecos = 0;
# User domain for gecos email addresses
my $Domain;
# Output to a file or to stdout?
my $Output_To_Stdout = 0;
# Eliminate empty log messages?
my $Prune_Empty_Msgs = 0;
# Tags of which not to output
my %ignore_tags;
# Show only revisions with Tags
my %show_tags;
# Don't call Text::Wrap on the body of the message
my $No_Wrap = 0;
# Indentation of log messages
my $Indent = "\t";
# Don't do any pretty print processing
my $Summary = 0;
# Separates header from log message. Code assumes it is either " " or
# "\n\n", so if there's ever an option to set it to something else,
# make sure to go through all conditionals that use this var.
my $After_Header = " ";
# XML Encoding
my $XML_Encoding = '';
# Format more for programs than for humans.
my $XML_Output = 0;
my $No_XML_Namespace = 0;
my $No_XML_ISO_Date = 0;
# Do some special tweaks for log data that was written in FSF
# ChangeLog style.
my $FSF_Style = 0;
# Show times in UTC instead of local time
my $UTC_Times = 0;
# Show times in output?
my $Show_Times = 1;
# Show day of week in output?
my $Show_Day_Of_Week = 0;
# Show revision numbers in output?
my $Show_Revisions = 0;
# Show dead files in output?
my $Show_Dead = 0;
# Hide dead trunk files which were created as a result of additions on a
# branch?
my $Hide_Branch_Additions = 1;
# Show tags (symbolic names) in output?
my $Show_Tags = 0;
# Show tags separately in output?
my $Show_Tag_Dates = 0;
# Show branches by symbolic name in output?
my $Show_Branches = 0;
# Show only revisions on these branches or their ancestors.
my @Follow_Branches;
# Show only revisions on these branches or their ancestors; ignore descendent
# branches.
my @Follow_Only;
# Don't bother with files matching this regexp.
my @Ignore_Files;
# How exactly we match entries. We definitely want "o",
# and user might add "i" by using --case-insensitive option.
my $Case_Insensitive = 0;
# Maybe only show log messages matching a certain regular expression.
my $Regexp_Gate = '';
# Pass this global option string along to cvs, to the left of `log':
my $Global_Opts = '';
# Pass this option string along to the cvs log subcommand:
my $Command_Opts = '';
# Read log output from stdin instead of invoking cvs log?
my $Input_From_Stdin = 0;
# Don't show filenames in output.
my $Hide_Filenames = 0;
# Don't shorten directory names from filenames.
my $Common_Dir = 1;
# Max checkin duration. CVS checkin is not atomic, so we may have checkin
# times that span a range of time. We assume that checkins will last no
# longer than $Max_Checkin_Duration seconds, and that similarly, no
# checkins will happen from the same users with the same message less
# than $Max_Checkin_Duration seconds apart.
my $Max_Checkin_Duration = 180;
# What to put at the front of [each] ChangeLog.
my $ChangeLog_Header = '';
# Whether to enable 'delta' mode, and for what start/end tags.
my $Delta_Mode = 0;
my $Delta_From = '';
my $Delta_To = '';
my $TestCode;
# Whether to parse filenames from the RCS filename, and if so what
# prefix to strip.
my $RCS_Root;
# Whether to output information on the # of lines added and removed
# by each file modification.
my $Show_Lines_Modified = 0;
## end vars set by options.
# latest observed times for the start/end tags in delta mode
my $Delta_StartTime = 0;
my $Delta_EndTime = 0;
my $No_Ancestors = 0;
my $No_Extra_Indent = 0;
my $GroupWithinDate = 0;
# ----------------------------------------------------------------------------
package CVS::Utils::ChangeLog::EntrySet;
sub new {
my $class = shift;
my %self;
bless \%self, $class;
}
# -------------------------------------
sub output_changelog {
my $output_type = $XML_Output ? 'XML' : 'Text';
my $output_class = "CVS::Utils::ChangeLog::EntrySet::Output::${output_type}";
my $output = $output_class->new(follow_branches => \@Follow_Branches,
follow_only => \@Follow_Only,
ignore_tags => \%ignore_tags,
show_tags => \%show_tags,
);
$output->output_changelog(@_);
}
# -------------------------------------
sub add_fileentry {
my ($self, $file_full_path, $time, $revision, $state, $lines,
$branch_names, $branch_roots, $branch_numbers,
$symbolic_names, $author, $msg_txt) = @_;
my $qunk =
CVS::Utils::ChangeLog::FileEntry->new($file_full_path, $time, $revision,
$state, $lines,
$branch_names, $branch_roots,
$branch_numbers,
$symbolic_names);
# We might be including revision numbers and/or tags and/or
# branch names in the output. Most of the code from here to
# loop-end deals with organizing these in qunk.
unless ( $Hide_Branch_Additions
and
$msg_txt =~ /file .+ was initially added on branch \S+./ ) {
# Add this file to the list
# (We use many spoonfuls of autovivication magic. Hashes and arrays
# will spring into existence if they aren't there already.)
&main::debug ("(pushing log msg for ". $qunk->dir_key . $qunk->filename . ")\n");
# Store with the files in this commit. Later we'll loop through
# again, making sure that revisions with the same log message
# and nearby commit times are grouped together as one commit.
$self->{$qunk->dir_key}{$author}{$time}{$msg_txt} =
CVS::Utils::ChangeLog::Message->new($msg_txt)
unless exists $self->{$qunk->dir_key}{$author}{$time}{$msg_txt};
$self->{$qunk->dir_key}{$author}{$time}{$msg_txt}->add_fileentry($qunk);
}
}
# ----------------------------------------------------------------------------
package CVS::Utils::ChangeLog::EntrySet::Output::Text;
use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
use File::Basename qw( fileparse );
sub new {
my $class = shift;
my $self = $class->SUPER::new(@_);
}
# -------------------------------------
sub wday {
my $self = shift; my $class = ref $self;
my ($wday) = @_;
return $Show_Day_Of_Week ? ' ' . $class->weekday_en($wday) : '';
}
# -------------------------------------
sub header_line {
my $self = shift;
my ($time, $author, $lastdate) = @_;
my $header_line = '';
my (undef,$min,$hour,$mday,$mon,$year,$wday)
= $UTC_Times ? gmtime($time) : localtime($time);
my $date = $self->fdatetime($time);
if ($Show_Times) {
$header_line =
sprintf "%s %s\n\n", $date, $author;
} else {
if ( ! defined $lastdate or $date ne $lastdate or ! $GroupWithinDate ) {
if ( $GroupWithinDate ) {
$header_line = "$date\n\n";
} else {
$header_line = "$date $author\n\n";
}
} else {
$header_line = '';
}
}
}
# -------------------------------------
sub preprocess_msg_text {
my $self = shift;
my ($text) = @_;
$text = $self->SUPER::preprocess_msg_text($text);
unless ( $No_Wrap ) {
# Strip off lone newlines, but only for lines that don't begin with
# whitespace or a mail-quoting character, since we want to preserve
# that kind of formatting. Also don't strip newlines that follow a
# period; we handle those specially next. And don't strip
# newlines that precede an open paren.
1 while $text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g;
# If a newline follows a period, make sure that when we bring up the
# bottom sentence, it begins with two spaces.
1 while $text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2 $3/g;
}
return $text;
}
# -------------------------------------
# Here we take a bunch of qunks and convert them into printed
# summary that will include all the information the user asked for.
sub pretty_file_list {
my $self = shift;
return ''
if $Hide_Filenames;
my $qunksref = shift;
my @filenames;
my $beauty = ''; # The accumulating header string for this entry.
my %non_unanimous_tags; # Tags found in a proper subset of qunks
my %unanimous_tags; # Tags found in all qunks
my %all_branches; # Branches found in any qunk
my $fbegun = 0; # Did we begin printing filenames yet?
my ($common_dir, $qunkrefs) =
$self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches), $qunksref);
my @qunkrefs = @$qunkrefs;
# Not XML output, so complexly compactify for chordate consumption. At this
# point we have enough global information about all the qunks to organize
# them non-redundantly for output.
if ($common_dir) {
# Note that $common_dir still has its trailing slash
$beauty .= "$common_dir: ";
}
if ($Show_Branches)
{
# For trailing revision numbers.
my @brevisions;
foreach my $branch (keys (%all_branches))
{
foreach my $qunkref (@qunkrefs)
{
if ((defined ($qunkref->branch))
and ($qunkref->branch eq $branch))
{
if ($fbegun) {
# kff todo: comma-delimited in XML too? Sure.
$beauty .= ", ";
}
else {
$fbegun = 1;
}
my $fname = substr ($qunkref->filename, length ($common_dir));
$beauty .= $fname;
$qunkref->{'printed'} = 1; # Just setting a mark bit, basically
if ( $Show_Tags and defined $qunkref->tags ) {
my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
if (@tags) {
$beauty .= " (tags: ";
$beauty .= join (', ', @tags);
$beauty .= ")";
}
}
if ($Show_Revisions) {
# Collect the revision numbers' last components, but don't
# print them -- they'll get printed with the branch name
# later.
$qunkref->revision =~ /.+\.([\d]+)$/;
push (@brevisions, $1);
# todo: we're still collecting branch roots, but we're not
# showing them anywhere. If we do show them, it would be
# nifty to just call them revision "0" on a the branch.
# Yeah, that's the ticket.
}
}
}
$beauty .= " ($branch";
if (@brevisions) {
if ((scalar (@brevisions)) > 1) {
$beauty .= ".[";
$beauty .= (join (',', @brevisions));
$beauty .= "]";
}
else {
# Square brackets are spurious here, since there's no range to
# encapsulate
$beauty .= ".$brevisions[0]";
}
}
$beauty .= ")";
}
}
# Okay; any qunks that were done according to branch are taken care
# of, and marked as printed. Now print everyone else.
my %fileinfo_printed;
foreach my $qunkref (@qunkrefs)
{
next if (defined ($qunkref->{'printed'})); # skip if already printed
my $b = substr ($qunkref->filename, length ($common_dir));
# todo: Shlomo's change was this:
# $beauty .= substr ($qunkref->filename,
# (($common_dir eq "./") ? '' : length ($common_dir)));
$qunkref->{'printed'} = 1; # Set a mark bit.
if ($Show_Revisions || $Show_Tags || $Show_Dead)
{
my $started_addendum = 0;
if ($Show_Revisions) {
$started_addendum = 1;
$b .= " (";
$b .= $qunkref->revision;
}
if ($Show_Dead && $qunkref->state =~ /dead/)
{
# Deliberately not using $started_addendum. Keeping it simple.
$b .= "[DEAD]";
}
if ($Show_Tags && (defined $qunkref->tags)) {
my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
if ((scalar (@tags)) > 0) {
if ($started_addendum) {
$b .= ", ";
}
else {
$b .= " (tags: ";
}
$b .= join (', ', @tags);
$started_addendum = 1;
}
}
if ($started_addendum) {
$b .= ")";
}
}
unless ( exists $fileinfo_printed{$b} ) {
if ($fbegun) {
$beauty .= ", ";
} else {
$fbegun = 1;
}
$beauty .= $b, $fileinfo_printed{$b} = 1;
}
}
# Unanimous tags always come last.
if ($Show_Tags && %unanimous_tags)
{
$beauty .= " (utags: ";
$beauty .= join (', ', sort keys (%unanimous_tags));
$beauty .= ")";
}
# todo: still have to take care of branch_roots?
$beauty = "$beauty:";
return $beauty;
}
# -------------------------------------
sub output_tagdate {
my $self = shift;
my ($fh, $time, $tag) = @_;
my $fdatetime = $self->fdatetime($time);
print $fh "$fdatetime tag $tag\n\n";
return;
}
# -------------------------------------
sub format_body {
my $self = shift;
my ($msg, $files, $qunklist) = @_;
my $body;
if ( $No_Wrap and ! $Summary ) {
$msg = $self->preprocess_msg_text($msg);
$files = $self->mywrap("\t", "\t ", "* $files");
$msg =~ s/\n(.+)/\n$Indent$1/g;
unless ($After_Header eq " ") {
$msg =~ s/^(.+)/$Indent$1/g;
}
if ( $Hide_Filenames ) {
$body = $After_Header . $msg;
} else {
$body = $files . $After_Header . $msg;
}
} elsif ( $Summary ) {
my ($filelist, $qunk);
my (@DeletedQunks, @AddedQunks, @ChangedQunks);
$msg = $self->preprocess_msg_text($msg);
#
# Sort the files (qunks) according to the operation that was
# performed. Files which were added have no line change
# indicator, whereas deleted files have state dead.
#
foreach $qunk ( @$qunklist ) {
if ( "dead" eq $qunk->state) {
push @DeletedQunks, $qunk;
} elsif ( ! defined $qunk->lines ) {
push @AddedQunks, $qunk;
} else {
push @ChangedQunks, $qunk;
}
}
#
# The qunks list was originally in tree search order. Let's
# get that back. The lists, if they exist, will be reversed upon
# processing.
#
#
# Now write the three sections onto $filelist
#
if ( @DeletedQunks ) {
$filelist .= "\tDeleted:\n";
foreach $qunk ( @DeletedQunks ) {
$filelist .= "\t\t" . $qunk->filename;
$filelist .= " (" . $qunk->revision . ")";
$filelist .= "\n";
}
undef @DeletedQunks;
}
if ( @AddedQunks ) {
$filelist .= "\tAdded:\n";
foreach $qunk (@AddedQunks) {
$filelist .= "\t\t" . $qunk->filename;
$filelist .= " (" . $qunk->revision . ")";
$filelist .= "\n";
}
undef @AddedQunks ;
}
if ( @ChangedQunks ) {
$filelist .= "\tChanged:\n";
foreach $qunk (@ChangedQunks) {
$filelist .= "\t\t" . $qunk->filename;
$filelist .= " (" . $qunk->revision . ")";
$filelist .= ", \"" . $qunk->state . "\"";
$filelist .= ", lines: " . $qunk->lines;
$filelist .= "\n";
}
undef @ChangedQunks;
}
chomp $filelist;
if ( $Hide_Filenames ) {
$filelist = '';
}
$msg =~ s/\n(.*)/\n$Indent$1/g;
unless ( $After_Header eq " " or $FSF_Style ) {
$msg =~ s/^(.*)/$Indent$1/g;
}
unless ( $No_Wrap ) {
if ( $FSF_Style ) {
$msg = $self->wrap_log_entry($msg, '', 69, 69);
chomp($msg);
chomp($msg);
} else {
$msg = $self->mywrap('', $Indent, "$msg");
$msg =~ s/[ \t]+\n/\n/g;
}
}
$body = $filelist . $After_Header . $msg;
} else { # do wrapping, either FSF-style or regular
my $latter_wrap = $No_Extra_Indent ? $Indent : "$Indent ";
if ( $FSF_Style ) {
$files = $self->mywrap($Indent, $latter_wrap, "* $files");
my $files_last_line_len = 0;
if ( $After_Header eq " " ) {
$files_last_line_len = $self->last_line_len($files);
$files_last_line_len += 1; # for $After_Header
}
$msg = $self->wrap_log_entry($msg, $latter_wrap, 69-$files_last_line_len, 69);
$body = $files . $After_Header . $msg;
} else { # not FSF-style
$msg = $self->preprocess_msg_text($msg);
$body = $files . $After_Header . $msg;
$body = $self->mywrap($Indent, $latter_wrap, "* $body");
$body =~ s/[ \t]+\n/\n/g;
}
}
return $body;
}
# ----------------------------------------------------------------------------
package CVS::Utils::ChangeLog::EntrySet::Output::XML;
use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
use File::Basename qw( fileparse );
sub new {
my $class = shift;
my $self = $class->SUPER::new(@_);
}
# -------------------------------------
sub header_line {
my $self = shift;
my ($time, $author, $lastdate) = @_;
my $header_line = '';
my $isoDate;
my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
# Ideally, this would honor $UTC_Times and use +HH:MM syntax
$isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
$y + 1900, $m + 1, $d, $H, $M, $S);
my (undef,$min,$hour,$mday,$mon,$year,$wday)
= $UTC_Times ? gmtime($time) : localtime($time);
my $date = $self->fdatetime($time);
$wday = $self->wday($wday);
$header_line =
sprintf ("<date>%4u-%02u-%02u</date>\n${wday}<time>%02u:%02u</time>\n",
$year+1900, $mon+1, $mday, $hour, $min);
$header_line .= "<isoDate>$isoDate</isoDate>\n"
unless $No_XML_ISO_Date;
$header_line .= sprintf("<author>%s</author>\n" , $author);
}
# -------------------------------------
sub wday {
my $self = shift; my $class = ref $self;
my ($wday) = @_;
return '<weekday>' . $class->weekday_en($wday) . "</weekday>\n";
}
# -------------------------------------
sub escape {
my $self = shift;
my $txt = shift;
$txt =~ s/&/&/g;
$txt =~ s/</</g;
$txt =~ s/>/>/g;
return $txt;
}
# -------------------------------------
sub output_header {
my $self = shift;
my ($fh) = @_;
my $encoding =
length $XML_Encoding ? qq'encoding="$XML_Encoding"' : '';
my $version = 'version="1.0"';
my $declaration =
sprintf '<?xml %s?>', join ' ', grep length, $version, $encoding;
my $root =
$No_XML_Namespace ?
'<changelog>' :
'<changelog xmlns="http://www.red-bean.com/xmlns/cvs2cl/">';
print $fh "$declaration\n\n$root\n\n";
}
# -------------------------------------
sub output_footer {
my $self = shift;
my ($fh) = @_;
print $fh "</changelog>\n";
}
# -------------------------------------
sub preprocess_msg_text {
my $self = shift;
my ($text) = @_;
$text = $self->SUPER::preprocess_msg_text($text);
$text = $self->escape($text);
chomp $text;
$text = "<msg>${text}</msg>\n";
return $text;
}
# -------------------------------------
# Here we take a bunch of qunks and convert them into a printed
# summary that will include all the information the user asked for.
sub pretty_file_list {
my $self = shift;
my ($qunksref) = @_;
my $beauty = ''; # The accumulating header string for this entry.
my %non_unanimous_tags; # Tags found in a proper subset of qunks
my %unanimous_tags; # Tags found in all qunks
my %all_branches; # Branches found in any qunk
my $fbegun = 0; # Did we begin printing filenames yet?
my ($common_dir, $qunkrefs) =
$self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches),
$qunksref);
my @qunkrefs = @$qunkrefs;
# If outputting XML, then our task is pretty simple, because we
# don't have to detect common dir, common tags, branch prefixing,
# etc. We just output exactly what we have, and don't worry about
# redundancy or readability.
foreach my $qunkref (@qunkrefs)
{
my $filename = $qunkref->filename;
my $state = $qunkref->state;
my $revision = $qunkref->revision;
my $tags = $qunkref->tags;
my $branch = $qunkref->branch;
my $branchroots = $qunkref->roots;
my $lines = $qunkref->lines;
$filename = $self->escape($filename); # probably paranoia
$revision = $self->escape($revision); # definitely paranoia
$beauty .= "<file>\n";
$beauty .= "<name>${filename}</name>\n";
$beauty .= "<cvsstate>${state}</cvsstate>\n";
$beauty .= "<revision>${revision}</revision>\n";
if ($Show_Lines_Modified
&& $lines && $lines =~ m/\+(\d+)\s+-(\d+)/) {
$beauty .= "<linesadded>$1</linesadded>\n";
$beauty .= "<linesremoved>$2</linesremoved>\n";
}
if ($branch) {
$branch = $self->escape($branch); # more paranoia
$beauty .= "<branch>${branch}</branch>\n";
}
foreach my $tag (@$tags) {
$tag = $self->escape($tag); # by now you're used to the paranoia
$beauty .= "<tag>${tag}</tag>\n";
}
foreach my $root (@$branchroots) {
$root = $self->escape($root); # which is good, because it will continue
$beauty .= "<branchroot>${root}</branchroot>\n";
}
$beauty .= "</file>\n";
}
# Theoretically, we could go home now. But as long as we're here,
# let's print out the common_dir and utags, as a convenience to
# the receiver (after all, earlier code calculated that stuff
# anyway, so we might as well take advantage of it).
if ((scalar (keys (%unanimous_tags))) > 1) {
foreach my $utag ((keys (%unanimous_tags))) {
$utag = $self->escape($utag); # the usual paranoia
$beauty .= "<utag>${utag}</utag>\n";
}
}
if ($common_dir) {
$common_dir = $self->escape($common_dir);
$beauty .= "<commondir>${common_dir}</commondir>\n";
}
# That's enough for XML, time to go home:
return $beauty;
}
# -------------------------------------
sub output_tagdate {
my $self = shift;
my ($fh, $time, $tag) = @_;
my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
# Ideally, this would honor $UTC_Times and use +HH:MM syntax
my $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
$y + 1900, $m + 1, $d, $H, $M, $S);
print $fh "<tagdate>\n";
print $fh "<tagisodate>$isoDate</tagisodate>\n";
print $fh "<tagdatetag>$tag</tagdatetag>\n";
print $fh "</tagdate>\n\n";
return;
}
# -------------------------------------
sub output_entry {
my $self = shift;
my ($fh, $entry) = @_;
print $fh "<entry>\n$entry</entry>\n\n";
}
# -------------------------------------
sub format_body {
my $self = shift;
my ($msg, $files, $qunklist) = @_;
$msg = $self->preprocess_msg_text($msg);
return $files . $msg;
}
# ----------------------------------------------------------------------------
package CVS::Utils::ChangeLog::EntrySet::Output;
use Carp qw( croak );
use File::Basename qw( fileparse );
# Class Utility Functions -------------
{ # form closure
my @weekdays = (qw(Sunday Monday Tuesday Wednesday Thursday Friday Saturday));
sub weekday_en {
my $class = shift;
return $weekdays[$_[0]];
}
}
# -------------------------------------
sub new {
my ($proto, %args) = @_;
my $class = ref $proto || $proto;
my $follow_branches = delete $args{follow_branches};
my $follow_only = delete $args{follow_only};
my $ignore_tags = delete $args{ignore_tags};
my $show_tags = delete $args{show_tags};
die "Unrecognized arg to EntrySet::Output::new: '$_'\n"
for keys %args;
bless +{follow_branches => $follow_branches,
follow_only => $follow_only,
show_tags => $show_tags,
ignore_tags => $ignore_tags,
}, $class;
}
# Abstract Subrs ----------------------
sub wday { croak "Whoops. Abtract method call (wday).\n" }
sub pretty_file_list { croak "Whoops. Abtract method call (pretty_file_list).\n" }
sub output_tagdate { croak "Whoops. Abtract method call (output_tagdate).\n" }
sub header_line { croak "Whoops. Abtract method call (header_line).\n" }
# Instance Subrs ----------------------
sub output_header { }
# -------------------------------------
sub output_entry {
my $self = shift;
my ($fh, $entry) = @_;
print $fh "$entry\n";
}
# -------------------------------------
sub output_footer { }
# -------------------------------------
sub escape { return $_[1] }
# -------------------------------------
sub _revision_is_wanted {
my ($self, $qunk) = @_;
my ($revision, $branch_numbers) = @{$qunk}{qw( revision branch_numbers )};
my $follow_branches = $self->{follow_branches};
my $follow_only = $self->{follow_only};
for my $ignore_tag (keys %{$self->{ignore_tags}}) {
return
if defined $qunk->{tags} and grep $_ eq $ignore_tag, @{$qunk->{tags}};
}
if ( keys %{$self->{show_tags}} ) {
for my $show_tag (keys %{$self->{show_tags}}) {
return
if ! defined $qunk->{tags} or ! grep $_ eq $show_tag, @{$qunk->{tags}};
}
}
return 1
unless @$follow_branches + @$follow_only; # no follow is follow all
for my $x (map([$_, 1], @$follow_branches),
map([$_, 0], @$follow_only )) {
my ($branch, $followsub) = @$x;
# Special case for following trunk revisions
return 1
if $branch =~ /^trunk$/i and $revision =~ /^[0-9]+\.[0-9]+$/;
if ( my $branch_number = $branch_numbers->{$branch} ) {
# Are we on one of the follow branches or an ancestor of same?
# If this revision is a prefix of the branch number, or possibly is less
# in the minormost number, OR if this branch number is a prefix of the
# revision, then yes. Otherwise, no.
# So below, we determine if any of those conditions are met.
# Trivial case: is this revision on the branch? (Compare this way to
# avoid regexps that screw up Emacs indentation, argh.)
if ( substr($revision, 0, (length($branch_number) + 1))
eq
($branch_number . ".") ) {
if ( $followsub ) {
return 1;
# } elsif ( length($revision) == length($branch_number)+2 ) {
} elsif ( substr($revision, length($branch_number)+1) =~ /^\d+$/ ) {
return 1;
}
} elsif ( length($branch_number) > length($revision)
and
! $No_Ancestors ) {
# Non-trivial case: check if rev is ancestral to branch
# r_left still has the trailing "."
my ($r_left, $r_end) = ($revision =~ /^((?:\d+\.)+)(\d+)$/);
# b_left still has trailing "."
# b_mid has no trailing "."
my ($b_left, $b_mid) = ($branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/);
return 1
if $r_left eq $b_left and $r_end <= $b_mid;
}
}
}
return;
}
# -------------------------------------
sub output_changelog {
my $self = shift; my $class = ref $self;
my ($grand_poobah) = @_;
### Process each ChangeLog
while (my ($dir,$authorhash) = each %$grand_poobah)
{
&main::debug ("DOING DIR: $dir\n");
# Here we twist our hash around, from being
# author => time => message => filelist
# in %$authorhash to
# time => author => message => filelist
# in %changelog.
#
# This is also where we merge entries. The algorithm proceeds
# through the timeline of the changelog with a sliding window of
# $Max_Checkin_Duration seconds; within that window, entries that
# have the same log message are merged.
#
# (To save space, we zap %$authorhash after we've copied
# everything out of it.)
my %changelog;
while (my ($author,$timehash) = each %$authorhash)
{
my %stamptime;
foreach my $time (sort {$a <=> $b} (keys %$timehash))
{
my $msghash = $timehash->{$time};
while (my ($msg,$qunklist) = each %$msghash)
{
my $stamptime = $stamptime{$msg};
if ((defined $stamptime)
and (($time - $stamptime) < $Max_Checkin_Duration)
and (defined $changelog{$stamptime}{$author}{$msg}))
{
push(@{$changelog{$stamptime}{$author}{$msg}}, $qunklist->files);
}
else {
$changelog{$time}{$author}{$msg} = $qunklist->files;
$stamptime{$msg} = $time;
}
}
}
}
undef (%$authorhash);
### Now we can write out the ChangeLog!
my ($logfile_here, $logfile_bak, $tmpfile);
my $lastdate;
if (! $Output_To_Stdout) {
$logfile_here = $dir . $Log_File_Name;
$logfile_here =~ s/^\.\/\//\//; # fix any leading ".//" problem
$tmpfile = "${logfile_here}.cvs2cl$$.tmp";
$logfile_bak = "${logfile_here}.bak";
open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\"";
}
else {
open (LOG_OUT, ">-") or die "Unable to open stdout for writing";
}
print LOG_OUT $ChangeLog_Header;
my %tag_date_printed;
$self->output_header(\*LOG_OUT);
my @key_list = ();
if($Chronological_Order) {
@key_list = sort {$a <=> $b} (keys %changelog);
} else {
@key_list = sort {$b <=> $a} (keys %changelog);
}
foreach my $time (@key_list)
{
next if ($Delta_Mode &&
(($time <= $Delta_StartTime) ||
($time > $Delta_EndTime && $Delta_EndTime)));
# Set up the date/author line.
# kff todo: do some more XML munging here, on the header
# part of the entry:
my (undef,$min,$hour,$mday,$mon,$year,$wday)
= $UTC_Times ? gmtime($time) : localtime($time);
$wday = $self->wday($wday);
# XML output includes everything else, we might as well make
# it always include Day Of Week too, for consistency.
my $authorhash = $changelog{$time};
if ( $Show_Tag_Dates || $XML_Output ) {
my %tags;
while (my ($author,$mesghash) = each %$authorhash) {
while (my ($msg,$qunk) = each %$mesghash) {
for my $qunkref2 (@$qunk) {
if (defined ($qunkref2->tags)) {
for my $tag (@{$qunkref2->tags}) {
$tags{$tag} = 1;
}
}
}
}
}
# Sort here for determinism to ease testing
foreach my $tag (sort keys %tags) {
if ( ! defined $tag_date_printed{$tag} ) {
$tag_date_printed{$tag} = $time;
$self->output_tagdate(\*LOG_OUT, $time, $tag);
}
}
}
while (my ($author,$mesghash) = each %$authorhash)
{
# If XML, escape in outer loop to avoid compound quoting:
$author = $self->escape($author);
FOOBIE:
# We sort here to enable predictable ordering for the testing porpoises
for my $msg (sort keys %$mesghash)
{
my $qunklist = $mesghash->{$msg};
my @qunklist =
grep $self->_revision_is_wanted($_), @$qunklist;
next FOOBIE unless @qunklist;
my $files = $self->pretty_file_list(\@qunklist);
my $header_line; # date and author
my $wholething; # $header_line + $body
my $date = $self->fdatetime($time);
$header_line = $self->header_line($time, $author, $lastdate);
$lastdate = $date;
$Text::Wrap::huge = 'overflow'
if $Text::Wrap::VERSION >= 2001.0130;
# Reshape the body according to user preferences.
my $body = $self->format_body($msg, $files, \@qunklist);
$body =~ s/[ \t]+\n/\n/g;
$wholething = $header_line . $body;
# One last check: make sure it passes the regexp test, if the
# user asked for that. We have to do it here, so that the
# test can match against information in the header as well
# as in the text of the log message.
# How annoying to duplicate so much code just because I
# can't figure out a way to evaluate scalars on the trailing
# operator portion of a regular expression. Grrr.
if ($Case_Insensitive) {
unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/oi ) ) {
$self->output_entry(\*LOG_OUT, $wholething);
}
}
else {
unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/o ) ) {
$self->output_entry(\*LOG_OUT, $wholething);
}
}
}
}
}
$self->output_footer(\*LOG_OUT);
close (LOG_OUT);
if ( ! $Output_To_Stdout ) {
# If accumulating, append old data to new before renaming. But
# don't append the most recent entry, since it's already in the
# new log due to CVS's idiosyncratic interpretation of "log -d".
if ($Cumulative && -f $logfile_here) {
open NEW_LOG, ">>$tmpfile"
or die "trouble appending to $tmpfile ($!)";
open OLD_LOG, "<$logfile_here"
or die "trouble reading from $logfile_here ($!)";
my $started_first_entry = 0;
my $passed_first_entry = 0;
while (<OLD_LOG>) {
if ( ! $passed_first_entry ) {
if ( ( ! $started_first_entry )
and /^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/ ) {
$started_first_entry = 1;
} elsif ( /^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/ ) {
$passed_first_entry = 1;
print NEW_LOG $_;
}
} else {
print NEW_LOG $_;
}
}
close NEW_LOG;
close OLD_LOG;
}
if ( -f $logfile_here ) {
rename $logfile_here, $logfile_bak;
}
rename $tmpfile, $logfile_here;
}
}
}
# -------------------------------------
# Don't call this wrap, because with 5.5.3, that clashes with the
# (unconditional :-( ) export of wrap() from Text::Wrap
sub mywrap {
my $self = shift;
my ($indent1, $indent2, @text) = @_;
# If incoming text looks preformatted, don't get clever
my $text = Text::Wrap::wrap($indent1, $indent2, @text);
if ( grep /^\s+/m, @text ) {
return $text;
}
my @lines = split /\n/, $text;
$indent2 =~ s!^((?: {8})+)!"\t" x (length($1)/8)!e;
$lines[0] =~ s/^$indent1\s+/$indent1/;
s/^$indent2\s+/$indent2/
for @lines[1..$#lines];
my $newtext = join "\n", @lines;
$newtext .= "\n"
if substr($text, -1) eq "\n";
return $newtext;
}
# -------------------------------------
sub preprocess_msg_text {
my $self = shift;
my ($text) = @_;
# Strip out carriage returns (as they probably result from DOSsy editors).
$text =~ s/\r\n/\n/g;
# If it *looks* like two newlines, make it *be* two newlines:
$text =~ s/\n\s*\n/\n\n/g;
return $text;
}
# -------------------------------------
sub last_line_len {
my $self = shift;
my $files_list = shift;
my @lines = split (/\n/, $files_list);
my $last_line = pop (@lines);
return length ($last_line);
}
# -------------------------------------
# A custom wrap function, sensitive to some common constructs used in
# log entries.
sub wrap_log_entry {
my $self = shift;
my $text = shift; # The text to wrap.
my $left_pad_str = shift; # String to pad with on the left.
# These do NOT take left_pad_str into account:
my $length_remaining = shift; # Amount left on current line.
my $max_line_length = shift; # Amount left for a blank line.
my $wrapped_text = ''; # The accumulating wrapped entry.
my $user_indent = ''; # Inherited user_indent from prev line.
my $first_time = 1; # First iteration of the loop?
my $suppress_line_start_match = 0; # Set to disable line start checks.
my @lines = split (/\n/, $text);
while (@lines) # Don't use `foreach' here, it won't work.
{
my $this_line = shift (@lines);
chomp $this_line;
if ($this_line =~ /^(\s+)/) {
$user_indent = $1;
}
else {
$user_indent = '';
}
# If it matches any of the line-start regexps, print a newline now...
if ($suppress_line_start_match)
{
$suppress_line_start_match = 0;
}
elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/)
|| ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/)
|| ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/)
|| ($this_line =~ /^(\s+)(\S+)/)
|| ($this_line =~ /^(\s*)- +/)
|| ($this_line =~ /^()\s*$/)
|| ($this_line =~ /^(\s*)\*\) +/)
|| ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/))
{
# Make a line break immediately, unless header separator is set
# and this line is the first line in the entry, in which case
# we're getting the blank line for free already and shouldn't
# add an extra one.
unless (($After_Header ne " ") and ($first_time))
{
if ($this_line =~ /^()\s*$/) {
$suppress_line_start_match = 1;
$wrapped_text .= "\n${left_pad_str}";
}
$wrapped_text .= "\n${left_pad_str}";
}
$length_remaining = $max_line_length - (length ($user_indent));
}
# Now that any user_indent has been preserved, strip off leading
# whitespace, so up-folding has no ugly side-effects.
$this_line =~ s/^\s*//;
# Accumulate the line, and adjust parameters for next line.
my $this_len = length ($this_line);
if ($this_len == 0)
{
# Blank lines should cancel any user_indent level.
$user_indent = '';
$length_remaining = $max_line_length;
}
elsif ($this_len >= $length_remaining) # Line too long, try breaking it.
{
# Walk backwards from the end. At first acceptable spot, break
# a new line.
my $idx = $length_remaining - 1;
if ($idx < 0) { $idx = 0 };
while ($idx > 0)
{
if (substr ($this_line, $idx, 1) =~ /\s/)
{
my $line_now = substr ($this_line, 0, $idx);
my $next_line = substr ($this_line, $idx);
$this_line = $line_now;
# Clean whitespace off the end.
chomp $this_line;
# The current line is ready to be printed.
$this_line .= "\n${left_pad_str}";
# Make sure the next line is allowed full room.
$length_remaining = $max_line_length - (length ($user_indent));
# Strip next_line, but then preserve any user_indent.
$next_line =~ s/^\s*//;
# Sneak a peek at the user_indent of the upcoming line, so
# $next_line (which will now precede it) can inherit that
# indent level. Otherwise, use whatever user_indent level
# we currently have, which might be none.
my $next_next_line = shift (@lines);
if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) {
$next_line = $1 . $next_line if (defined ($1));
# $length_remaining = $max_line_length - (length ($1));
$next_next_line =~ s/^\s*//;
}
else {
$next_line = $user_indent . $next_line;
}
if (defined ($next_next_line)) {
unshift (@lines, $next_next_line);
}
unshift (@lines, $next_line);
# Our new next line might, coincidentally, begin with one of
# the line-start regexps, so we temporarily turn off
# sensitivity to that until we're past the line.
$suppress_line_start_match = 1;
last;
}
else
{
$idx--;
}
}
if ($idx == 0)
{
# We bottomed out because the line is longer than the
# available space. But that could be because the space is
# small, or because the line is longer than even the maximum
# possible space. Handle both cases below.
if ($length_remaining == ($max_line_length - (length ($user_indent))))
{
# The line is simply too long -- there is no hope of ever
# breaking it nicely, so just insert it verbatim, with
# appropriate padding.
$this_line = "\n${left_pad_str}${this_line}";
}
else
{
# Can't break it here, but may be able to on the next round...
unshift (@lines, $this_line);
$length_remaining = $max_line_length - (length ($user_indent));
$this_line = "\n${left_pad_str}";
}
}
}
else # $this_len < $length_remaining, so tack on what we can.
{
# Leave a note for the next iteration.
$length_remaining = $length_remaining - $this_len;
if ($this_line =~ /\.$/)
{
$this_line .= " ";
$length_remaining -= 2;
}
else # not a sentence end
{
$this_line .= " ";
$length_remaining -= 1;
}
}
# Unconditionally indicate that loop has run at least once.
$first_time = 0;
$wrapped_text .= "${user_indent}${this_line}";
}
# One last bit of padding.
$wrapped_text .= "\n";
return $wrapped_text;
}
# -------------------------------------
sub _pretty_file_list {
my $self = shift;
my ($unanimous_tags, $non_unanimous_tags, $all_branches, $qunksref) = @_;
my @qunkrefs =
grep +( ( ! $_->tags_exists
or
! grep exists $ignore_tags{$_}, @{$_->tags})
and
( ! keys %show_tags
or
( $_->tags_exists
and
grep exists $show_tags{$_}, @{$_->tags} )
)
),
@$qunksref;
my $common_dir; # Dir prefix common to all files ('' if none)
# First, loop over the qunks gathering all the tag/branch names.
# We'll put them all in non_unanimous_tags, and take out the
# unanimous ones later.
QUNKREF:
foreach my $qunkref (@qunkrefs)
{
# Keep track of whether all the files in this commit were in the
# same directory, and memorize it if so. We can make the output a
# little more compact by mentioning the directory only once.
if ($Common_Dir && (scalar (@qunkrefs)) > 1)
{
if (! (defined ($common_dir)))
{
my ($base, $dir);
($base, $dir, undef) = fileparse ($qunkref->filename);
if ((! (defined ($dir))) # this first case is sheer paranoia
or ($dir eq '')
or ($dir eq "./")
or ($dir eq ".\\"))
{
$common_dir = '';
}
else
{
$common_dir = $dir;
}
}
elsif ($common_dir ne '')
{
# Already have a common dir prefix, so how much of it can we preserve?
$common_dir = &main::common_path_prefix ($qunkref->filename, $common_dir);
}
}
else # only one file in this entry anyway, so common dir not an issue
{
$common_dir = '';
}
if (defined ($qunkref->branch)) {
$all_branches->{$qunkref->branch} = 1;
}
if (defined ($qunkref->tags)) {
foreach my $tag (@{$qunkref->tags}) {
$non_unanimous_tags->{$tag} = 1;
}
}
}
# Any tag held by all qunks will be printed specially... but only if
# there are multiple qunks in the first place!
if ((scalar (@qunkrefs)) > 1) {
foreach my $tag (keys (%$non_unanimous_tags)) {
my $everyone_has_this_tag = 1;
foreach my $qunkref (@qunkrefs) {
if ((! (defined ($qunkref->tags)))
or (! (grep ($_ eq $tag, @{$qunkref->tags})))) {
$everyone_has_this_tag = 0;
}
}
if ($everyone_has_this_tag) {
$unanimous_tags->{$tag} = 1;
delete $non_unanimous_tags->{$tag};
}
}
}
return $common_dir, \@qunkrefs;
}
# -------------------------------------
sub fdatetime {
my $self = shift;
my ($year, $mday, $mon, $wday, $hour, $min);
if ( @_ > 1 ) {
($year, $mday, $mon, $wday, $hour, $min) = @_;
} else {
my ($time) = @_;
(undef, $min, $hour, $mday, $mon, $year, $wday) =
$UTC_Times ? gmtime($time) : localtime($time);
$year += 1900;
$mon += 1;
$wday = $self->wday($wday);
}
my $fdate = $self->fdate($year, $mon, $mday, $wday);
if ($Show_Times) {
my $ftime = $self->ftime($hour, $min);
return "$fdate $ftime";
} else {
return $fdate;
}
}
# -------------------------------------
sub fdate {
my $self = shift;
my ($year, $mday, $mon, $wday);
if ( @_ > 1 ) {
($year, $mon, $mday, $wday) = @_;
} else {
my ($time) = @_;
(undef, undef, undef, $mday, $mon, $year, $wday) =
$UTC_Times ? gmtime($time) : localtime($time);
$year += 1900;
$mon += 1;
$wday = $self->wday($wday);
}
return sprintf '%4u-%02u-%02u%s', $year, $mon, $mday, $wday;
}
# -------------------------------------
sub ftime {
my $self = shift;
my ($hour, $min);
if ( @_ > 1 ) {
($hour, $min) = @_;
} else {
my ($time) = @_;
(undef, $min, $hour) = $UTC_Times ? gmtime($time) : localtime($time);
}
return sprintf '%02u:%02u', $hour, $min;
}
# ----------------------------------------------------------------------------
package CVS::Utils::ChangeLog::Message;
sub new {
my $class = shift;
my ($msg) = @_;
my %self = (msg => $msg, files => []);
bless \%self, $class;
}
sub add_fileentry {
my $self = shift;
my ($fileentry) = @_;
die "Not a fileentry: $fileentry"
unless $fileentry->isa('CVS::Utils::ChangeLog::FileEntry');
push @{$self->{files}}, $fileentry;
}
sub files { wantarray ? @{$_[0]->{files}} : $_[0]->{files} }
# ----------------------------------------------------------------------------
package CVS::Utils::ChangeLog::FileEntry;
use File::Basename qw( fileparse );
# Each revision of a file has a little data structure (a `qunk')
# associated with it. That data structure holds not only the
# file's name, but any additional information about the file
# that might be needed in the output, such as the revision
# number, tags, branches, etc. The reason to have these things
# arranged in a data structure, instead of just appending them
# textually to the file's name, is that we may want to do a
# little rearranging later as we write the output. For example,
# all the files on a given tag/branch will go together, followed
# by the tag in parentheses (so trunk or otherwise non-tagged
# files would go at the end of the file list for a given log
# message). This rearrangement is a lot easier to do if we
# don't have to reparse the text.
#
# A qunk looks like this:
#
# {
# filename => "hello.c",
# revision => "1.4.3.2",
# time => a timegm() return value (moment of commit)
# tags => [ "tag1", "tag2", ... ],
# branch => "branchname" # There should be only one, right?
# roots => [ "branchtag1", "branchtag2", ... ]
# lines => "+x -y" # or undefined; x and y are integers
# }
# Single top-level ChangeLog, or one per subdirectory?
my $distributed;
sub distributed { $#_ ? ($distributed = $_[1]) : $distributed; }
sub new {
my $class = shift;
my ($path, $time, $revision, $state, $lines,
$branch_names, $branch_roots, $branch_numbers, $symbolic_names) = @_;
my %self = (time => $time,
revision => $revision,
state => $state,
lines => $lines,
branch_numbers => $branch_numbers,
);
if ( $distributed ) {
@self{qw(filename dir_key)} = fileparse($path);
} else {
@self{qw(filename dir_key)} = ($path, './');
}
{ # Scope for $branch_prefix
(my ($branch_prefix) = ($revision =~ /((?:\d+\.)+)\d+/));
$branch_prefix =~ s/\.$//;
if ( $branch_names->{$branch_prefix} ) {
my $branch_name = $branch_names->{$branch_prefix};
$self{branch} = $branch_name;
$self{branches} = [$branch_name];
}
while ( $branch_prefix =~ s/^(\d+(?:\.\d+\.\d+)+)\.\d+\.\d+$/$1/ ) {
push @{$self{branches}}, $branch_names->{$branch_prefix}
if exists $branch_names->{$branch_prefix};
}
}
# If there's anything in the @branch_roots array, then this
# revision is the root of at least one branch. We'll display
# them as branch names instead of revision numbers, the
# substitution for which is done directly in the array:
$self{'roots'} = [ map { $branch_names->{$_} } @$branch_roots ]
if @$branch_roots;
if ( exists $symbolic_names->{$revision} ) {
$self{tags} = delete $symbolic_names->{$revision};
&main::delta_check($time, $self{tags});
}
bless \%self, $class;
}
sub filename { $_[0]->{filename} }
sub dir_key { $_[0]->{dir_key} }
sub revision { $_[0]->{revision} }
sub branch { $_[0]->{branch} }
sub state { $_[0]->{state} }
sub lines { $_[0]->{lines} }
sub roots { $_[0]->{roots} }
sub branch_numbers { $_[0]->{branch_numbers} }
sub tags { $_[0]->{tags} }
sub tags_exists {
exists $_[0]->{tags};
}
# This may someday be used in a more sophisticated calculation of what other
# files are involved in this commit. For now, we don't use it much except for
# delta mode, because the common-commit-detection algorithm is hypothesized to
# be "good enough" as it stands.
sub time { $_[0]->{time} }
# ----------------------------------------------------------------------------
package CVS::Utils::ChangeLog::EntrySetBuilder;
use File::Basename qw( fileparse );
use Time::Local qw( timegm );
use constant MAILNAME => "/etc/mailname";
# In 'cvs log' output, one long unbroken line of equal signs separates files:
use constant FILE_SEPARATOR => '=' x 77;# . "\n";
# In 'cvs log' output, a shorter line of dashes separates log messages within
# a file:
use constant REV_SEPARATOR => '-' x 28;# . "\n";
use constant EMPTY_LOG_MESSAGE => '*** empty log message ***';
# -------------------------------------
sub new {
my ($proto) = @_;
my $class = ref $proto || $proto;
my $poobah = CVS::Utils::ChangeLog::EntrySet->new;
my $self = bless +{ grand_poobah => $poobah }, $class;
$self->clear_file;
$self->maybe_read_user_map_file;
return $self;
}
# -------------------------------------
sub clear_msg {
my ($self) = @_;
# Make way for the next message
undef $self->{rev_msg};
undef $self->{rev_time};
undef $self->{rev_revision};
undef $self->{rev_author};
undef $self->{rev_state};
undef $self->{lines};
$self->{rev_branch_roots} = []; # For showing which files are branch
# ancestors.
$self->{collecting_symbolic_names} = 0;
}
# -------------------------------------
sub clear_file {
my ($self) = @_;
$self->clear_msg;
undef $self->{filename};
$self->{branch_names} = +{}; # We'll grab branch names while we're
# at it.
$self->{branch_numbers} = +{}; # Save some revisions for
# @Follow_Branches
$self->{symbolic_names} = +{}; # Where tag names get stored.
}
# -------------------------------------
sub grand_poobah { $_[0]->{grand_poobah} }
# -------------------------------------
sub read_changelog {
my ($self, $command) = @_;
local (*READER, *WRITER);
my $pid;
if (! $Input_From_Stdin) {
pipe(READER, WRITER)
or die "Couldn't form pipe: $!\n";
$pid = fork;
die "Couldn't fork: $!\n"
if ! defined $pid;
if ( ! $pid ) { # child
open STDOUT, '>&=' . fileno WRITER
or die "Couldn't dup stderr to ", fileno WRITER, "\n";
# strangely, some perls give spurious warnings about STDIN being opened
# for output only these close calls precede the STDOUT reopen above.
# I think they must be reusing fd 1.
close READER;
close STDIN;
exec @$command;
}
close WRITER;
&main::debug ("(run \"@$command\")\n");
}
else {
open READER, '-' or die "unable to open stdin for reading";
}
binmode READER;
XX_Log_Source:
while (<READER>) {
chomp;
s!\r$!!;
# If on a new file and don't see filename, skip until we find it, and
# when we find it, grab it.
if ( ! defined $self->{filename} ) {
$self->read_file_path($_);
} elsif ( /^symbolic names:$/ ) {
$self->{collecting_symbolic_names} = 1;
} elsif ( $self->{collecting_symbolic_names} ) {
$self->read_symbolic_name($_);
} elsif ( $_ eq FILE_SEPARATOR and ! defined $self->{rev_revision} ) {
$self->clear_file;
} elsif ( ! defined $self->{rev_revision} ) {
# If have file name, but not revision, and see revision, then grab
# it. (We collect unconditionally, even though we may or may not
# ever use it.)
$self->read_revision($_);
} elsif ( ! defined $self->{rev_time} ) { # and /^date: /) {
$self->read_date_author_and_state($_);
} elsif ( /^branches:\s+(.*);$/ ) {
$self->read_branches($1);
} elsif ( ! ( $_ eq FILE_SEPARATOR or $_ eq REV_SEPARATOR ) ) {
# If have file name, time, and author, then we're just grabbing
# log message texts:
$self->{rev_msg} .= $_ . "\n"; # Normally, just accumulate the message...
} else {
my $noadd = 0;
if ( ! $self->{rev_msg}
or $self->{rev_msg} =~ /^\s*(\.\s*)?$/
or index($self->{rev_msg}, EMPTY_LOG_MESSAGE) > -1 ) {
# ... until a msg separator is encountered:
# Ensure the message contains something:
$self->clear_msg, $noadd = 1
if $Prune_Empty_Msgs;
$self->{rev_msg} = "[no log message]\n";
}
$self->add_file_entry
unless $noadd;
if ( $_ eq FILE_SEPARATOR ) {
$self->clear_file;
} else {
$self->clear_msg;
}
}
}
close READER
or die "Couldn't close pipe reader: $!\n";
if ( defined $pid ) {
my $rv;
waitpid $pid, 0;
0 == $?
or $!=1, die sprintf("Problem reading log input (pid/exit/signal/core: %d/%d/%d/%d)\n",
$pid, $? >> 8, $? & 127, $? & 128);
}
return;
}
# -------------------------------------
sub add_file_entry {
$_[0]->grand_poobah->add_fileentry(@{$_[0]}{qw(filename rev_time rev_revision
rev_state lines branch_names
rev_branch_roots
branch_numbers
symbolic_names
rev_author rev_msg)});
}
# -------------------------------------
sub maybe_read_user_map_file {
my ($self) = @_;
my %expansions;
my $User_Map_Input;
if ($User_Map_File)
{
if ( $User_Map_File =~ m{^([-\w\@+=.,\/]+):([-\w\@+=.,\/:]+)} and
!-f $User_Map_File )
{
my $rsh = (exists $ENV{'CVS_RSH'} ? $ENV{'CVS_RSH'} : 'ssh');
$User_Map_Input = "$rsh $1 'cat $2' |";
&main::debug ("(run \"${User_Map_Input}\")\n");
}
else
{
$User_Map_Input = "<$User_Map_File";
}
open (MAPFILE, $User_Map_Input)
or die ("Unable to open $User_Map_File ($!)");
while (<MAPFILE>)
{
next if /^\s*#/; # Skip comment lines.
next if not /:/; # Skip lines without colons.
# It is now safe to split on ':'.
my ($username, $expansion) = split ':';
chomp $expansion;
$expansion =~ s/^'(.*)'$/$1/;
$expansion =~ s/^"(.*)"$/$1/;
# If it looks like the expansion has a real name already, then
# we toss the username we got from CVS log. Otherwise, keep
# it to use in combination with the email address.
if ($expansion =~ /^\s*<{0,1}\S+@.*/) {
# Also, add angle brackets if none present
if (! ($expansion =~ /<\S+@\S+>/)) {
$expansions{$username} = "$username <$expansion>";
}
else {
$expansions{$username} = "$username $expansion";
}
}
else {
$expansions{$username} = $expansion;
}
} # fi ($User_Map_File)
close (MAPFILE);
}
if (defined $User_Passwd_File)
{
if ( ! defined $Domain ) {
if ( -e MAILNAME ) {
chomp($Domain = slurp_file(MAILNAME));
} else {
MAILDOMAIN_CMD:
for ([qw(hostname -d)], 'dnsdomainname', 'domainname') {
my ($text, $exit, $sig, $core) = run_ext($_);
if ( $exit == 0 && $sig == 0 && $core == 0 ) {
chomp $text;
if ( length $text ) {
$Domain = $text;
last MAILDOMAIN_CMD;
}
}
}
}
}
die "No mail domain found\n"
unless defined $Domain;
open (MAPFILE, "<$User_Passwd_File")
or die ("Unable to open $User_Passwd_File ($!)");
while (<MAPFILE>)
{
# all lines are valid
my ($username, $pw, $uid, $gid, $gecos, $homedir, $shell) = split ':';
my $expansion = '';
($expansion) = split (',', $gecos)
if defined $gecos && length $gecos;
my $mailname = $Domain eq '' ? $username : "$username\@$Domain";
$expansions{$username} = "$expansion <$mailname>";
}
close (MAPFILE);
}
$self->{usermap} = \%expansions;
}
# -------------------------------------
sub read_file_path {
my ($self, $line) = @_;
my $path;
if ( $line =~ /^Working file: (.*)/ ) {
$path = $1;
} elsif ( defined $RCS_Root
and
$line =~ m|^RCS file: $RCS_Root[/\\](.*),v$| ) {
$path = $1;
$path =~ s!Attic/!!;
} else {
return;
}
if ( @Ignore_Files ) {
my $base;
($base, undef, undef) = fileparse($path);
my $xpath = $Case_Insensitive ? lc($path) : $path;
return
if grep $path =~ /$_/, @Ignore_Files;
}
$self->{filename} = $path;
return;
}
# -------------------------------------
sub read_symbolic_name {
my ($self, $line) = @_;
# All tag names are listed with whitespace in front in cvs log
# output; so if see non-whitespace, then we're done collecting.
if ( /^\S/ ) {
$self->{collecting_symbolic_names} = 0;
return;
} else {
# we're looking at a tag name, so parse & store it
# According to the Cederqvist manual, in node "Tags", tag names must start
# with an uppercase or lowercase letter and can contain uppercase and
# lowercase letters, digits, `-', and `_'. However, it's not our place to
# enforce that, so we'll allow anything CVS hands us to be a tag:
my ($tag_name, $tag_rev) = ($line =~ /^\s+([^:]+): ([\d.]+)$/);
# A branch number either has an odd number of digit sections
# (and hence an even number of dots), or has ".0." as the
# second-to-last digit section. Test for these conditions.
my $real_branch_rev = '';
if ( $tag_rev =~ /^(\d+\.\d+\.)+\d+$/ # Even number of dots...
and
$tag_rev !~ /^(1\.)+1$/ ) { # ...but not "1.[1.]1"
$real_branch_rev = $tag_rev;
} elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) { # Has ".0."
$real_branch_rev = $1 . $3;
}
# If we got a branch, record its number.
if ( $real_branch_rev ) {
$self->{branch_names}->{$real_branch_rev} = $tag_name;
$self->{branch_numbers}->{$tag_name} = $real_branch_rev;
} else {
# Else it's just a regular (non-branch) tag.
push @{$self->{symbolic_names}->{$tag_rev}}, $tag_name;
}
}
$self->{collecting_symbolic_names} = 1;
return;
}
# -------------------------------------
sub read_revision {
my ($self, $line) = @_;
my ($revision) = ( $line =~ /^revision (\d+\.[\d.]+)/ );
return
unless $revision;
$self->{rev_revision} = $revision;
return;
}
# -------------------------------------
{ # Closure over %gecos_warned
my %gecos_warned;
sub read_date_author_and_state {
my ($self, $line) = @_;
my ($time, $author, $state) = $self->parse_date_author_and_state($line);
if ( defined($self->{usermap}->{$author}) and $self->{usermap}->{$author} ) {
$author = $self->{usermap}->{$author};
} elsif ( defined $Domain or $Gecos == 1 ) {
my $email = $author;
$email = $author."@".$Domain
if defined $Domain && $Domain ne '';
my $pw = getpwnam($author);
my ($fullname, $office, $workphone, $homephone, $gcos);
if ( defined $pw ) {
$gcos = (getpwnam($author))[6];
($fullname, $office, $workphone, $homephone) =
split /\s*,\s*/, $gcos;
} else {
warn "Couldn't find gecos info for author '$author'\n"
unless $gecos_warned{$author}++;
$fullname = '';
}
for (grep defined, $fullname, $office, $workphone, $homephone) {
s/&/ucfirst(lc($pw->name))/ge;
}
$author = $fullname . " <" . $email . ">"
if $fullname ne '';
}
$self->{rev_state} = $state;
$self->{rev_time} = $time;
$self->{rev_author} = $author;
return;
}
}
# -------------------------------------
sub read_branches {
# A "branches: ..." line here indicates that one or more branches
# are rooted at this revision. If we're showing branches, then we
# want to show that fact as well, so we collect all the branches
# that this is the latest ancestor of and store them in
# $self->[rev_branch_roots}. Just for reference, the format of the
# line we're seeing at this point is:
#
# branches: 1.5.2; 1.5.4; ...;
#
# Okay, here goes:
my ($self, $line) = @_;
# Ugh. This really bothers me. Suppose we see a log entry
# like this:
#
# ----------------------------
# revision 1.1
# date: 1999/10/17 03:07:38; author: jrandom; state: Exp;
# branches: 1.1.2;
# Intended first line of log message begins here.
# ----------------------------
#
# The question is, how we can tell the difference between that
# log message and a *two*-line log message whose first line is
#
# "branches: 1.1.2;"
#
# See the problem? The output of "cvs log" is inherently
# ambiguous.
#
# For now, we punt: we liberally assume that people don't
# write log messages like that, and just toss a "branches:"
# line if we see it but are not showing branches. I hope no
# one ever loses real log data because of this.
if ( $Show_Branches ) {
$line =~ s/(1\.)+1;|(1\.)+1$//; # ignore the trivial branch 1.1.1
$self->{rev_branch_roots} = [split /;\s+/, $line]
if length $line;
}
}
# -------------------------------------
sub parse_date_author_and_state {
my ($self, $line) = @_;
# Parses the date/time and author out of a line like:
#
# date: 1999/02/19 23:29:05; author: apharris; state: Exp;
#
# or, in CVS 1.12.9:
#
# date: 2004-06-05 16:10:32 +0000; author: somebody; state: Exp;
my ($year, $mon, $mday, $hours, $min, $secs, $utcOffset, $author, $state, $rest) =
$line =~
m!(\d+)[-/](\d+)[-/](\d+)\s+(\d+):(\d+):(\d+)(\s+[+-]\d{4})?;\s+
author:\s+([^;]+);\s+state:\s+([^;]+);(.*)!x
or die "Couldn't parse date ``$line''";
die "Bad date or Y2K issues"
unless $year > 1969 and $year < 2258;
# Kinda arbitrary, but useful as a sanity check
my $time = timegm($secs, $min, $hours, $mday, $mon-1, $year-1900);
if ( defined $utcOffset ) {
my ($plusminus, $hour, $minute) = ($utcOffset =~ m/([+-])(\d\d)(\d\d)/);
my $offset = (($hour * 60) + $minute) * 60 * ($plusminus eq '+' ? -1 : 1);
$time += $offset;
}
if ( $rest =~ m!\s+lines:\s+(.*)! ) {
$self->{lines} = $1;
}
return $time, $author, $state;
}
# Subrs ----------------------------------------------------------------------
package main;
sub delta_check {
my ($time, $tags) = @_;
# If we're in 'delta' mode, update the latest observed times for the
# beginning and ending tags, and when we get around to printing output, we
# will simply restrict ourselves to that timeframe...
return
unless $Delta_Mode;
$Delta_StartTime = $time
if $time > $Delta_StartTime and grep { $_ eq $Delta_From } @$tags;
$Delta_EndTime = $time
if $time > $Delta_EndTime and grep { $_ eq $Delta_To } @$tags;
}
sub run_ext {
my ($cmd) = @_;
$cmd = [$cmd]
unless ref $cmd;
local $" = ' ';
my $out = qx"@$cmd 2>&1";
my $rv = $?;
my ($sig, $core, $exit) = ($? & 127, $? & 128, $? >> 8);
return $out, $exit, $sig, $core;
}
# -------------------------------------
# If accumulating, grab the boundary date from pre-existing ChangeLog.
sub maybe_grab_accumulation_date {
if (! $Cumulative || $Update) {
return '';
}
# else
open (LOG, "$Log_File_Name")
or die ("trouble opening $Log_File_Name for reading ($!)");
my $boundary_date;
while (<LOG>)
{
if (/^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/)
{
$boundary_date = "$1";
last;
}
}
close (LOG);
# convert time from utc to local timezone if the ChangeLog has
# dates/times in utc
if ($UTC_Times && $boundary_date)
{
# convert the utc time to a time value
my ($year,$mon,$mday,$hour,$min) = $boundary_date =~
m#(\d+)-(\d+)-(\d+)\s+(\d+):(\d+)#;
my $time = timegm(0,$min,$hour,$mday,$mon-1,$year-1900);
# print the timevalue in the local timezone
my ($ignore,$wday);
($ignore,$min,$hour,$mday,$mon,$year,$wday) = localtime($time);
$boundary_date=sprintf ("%4u-%02u-%02u %02u:%02u",
$year+1900,$mon+1,$mday,$hour,$min);
}
return $boundary_date;
}
# -------------------------------------
# Fills up a ChangeLog structure in the current directory.
sub derive_changelog {
my ($command) = @_;
# See "The Plan" above for a full explanation.
# Might be adding to an existing ChangeLog
my $accumulation_date = maybe_grab_accumulation_date;
if ($accumulation_date) {
# Insert -d immediately after 'cvs log'
my $Log_Date_Command = "-d>${accumulation_date}";
my ($log_index) = grep $command->[$_] eq 'log', 0..$#$command;
splice @$command, $log_index+1, 0, $Log_Date_Command;
&debug ("(adding log msg starting from $accumulation_date)\n");
}
# output_changelog(read_changelog($command));
my $builder = CVS::Utils::ChangeLog::EntrySetBuilder->new;
$builder->read_changelog($command);
$builder->grand_poobah->output_changelog;
}
# -------------------------------------
sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
# -------------------------------------
sub common_path_prefix {
my ($path1, $path2) = @_;
# For compatibility (with older versions of cvs2cl.pl), we think in UN*X
# terms, and mould windoze filenames to match. Is this really appropriate?
# If a file is checked in under UN*X, and cvs log run on windoze, which way
# do the path separators slope? Can we use fileparse as per the local
# conventions? If so, we should probably have a user option to specify an
# OS to emulate to handle stdin-fed logs. If we did this, we could avoid
# the nasty \-/ transmogrification below.
my ($dir1, $dir2) = map +(fileparse($_))[1], $path1, $path2;
# Transmogrify Windows filenames to look like Unix.
# (It is far more likely that someone is running cvs2cl.pl under
# Windows than that they would genuinely have backslashes in their
# filenames.)
tr!\\!/!
for $dir1, $dir2;
my ($accum1, $accum2, $last_common_prefix) = ('') x 3;
my @path1 = grep length($_), split qr!/!, $dir1;
my @path2 = grep length($_), split qr!/!, $dir2;
my @common_path;
for (0..min($#path1,$#path2)) {
if ( $path1[$_] eq $path2[$_]) {
push @common_path, $path1[$_];
} else {
last;
}
}
return join '', map "$_/", @common_path;
}
# -------------------------------------
sub parse_options {
# Check this internally before setting the global variable.
my $output_file;
# If this gets set, we encountered unknown options and will exit at
# the end of this subroutine.
my $exit_with_admonishment = 0;
# command to generate the log
my @log_source_command = qw( cvs log );
my (@Global_Opts, @Local_Opts);
Getopt::Long::Configure(qw( bundling permute no_getopt_compat
pass_through no_ignore_case ));
GetOptions('help|usage|h' => \$Print_Usage,
'debug' => \$Debug, # unadvertised option, heh
'version' => \$Print_Version,
'file|f=s' => \$output_file,
'accum' => \$Cumulative,
'update' => \$Update,
'fsf' => \$FSF_Style,
'rcs=s' => \$RCS_Root,
'usermap|U=s' => \$User_Map_File,
'gecos' => \$Gecos,
'domain=s' => \$Domain,
'passwd=s' => \$User_Passwd_File,
'window|W=i' => \$Max_Checkin_Duration,
'chrono' => \$Chronological_Order,
'ignore|I=s' => \@Ignore_Files,
'case-insensitive|C' => \$Case_Insensitive,
'regexp|R=s' => \$Regexp_Gate,
'stdin' => \$Input_From_Stdin,
's