From 0e70e83dfd40cac47e1fc3e2f1c7b893ea0cd2f8 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 21 Sep 2009 17:04:20 -0700 Subject: [PATCH] scripts/get_maintainer.pl: better email routines, use perl not shell where possible Added format_email and parse_email routines to reduce inline use. Added email_address_inuse to eliminate multiple maintainer entries for the same email address, the first name encountered is used. Used internal perl equivalents of shell cmd use of grep|cut|sort|uniq Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 152 +++++++++++++++++++++++++++++----------------- 1 file changed, 96 insertions(+), 56 deletions(-) diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 5132949..1200d72 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -13,7 +13,7 @@ use strict; my $P = $0; -my $V = '0.19'; +my $V = '0.20'; use Getopt::Long qw(:config no_auto_abbrev); @@ -258,11 +258,8 @@ if ($email) { foreach my $chief (@penguin_chief) { if ($chief =~ m/^(.*):(.*)/) { my $email_address; - if ($email_usename) { - $email_address = format_email($1, $2); - } else { - $email_address = $2; - } + + $email_address = format_email($1, $2); if ($email_git_penguin_chiefs) { push(@email_to, $email_address); } else { @@ -400,21 +397,57 @@ sub top_of_kernel_tree { return 0; } -sub format_email { - my ($name, $email) = @_; +sub parse_email { + my ($formatted_email) = @_; + + my $name = ""; + my $address = ""; + + if ($formatted_email =~ /^([^<]+)<(.*\@.*)>$/) { + $name = $1; + $address = $2; + } elsif ($formatted_email =~ /^<(.*\@.*)>$/) { + $address = $1; + } elsif ($formatted_email =~ /^(.*\@.*)$/) { + $address = $1; + } $name =~ s/^\s+|\s+$//g; $name =~ s/^\"|\"$//g; - $email =~ s/^\s+|\s+$//g; + $address =~ s/^\s+|\s+$//g; - my $formatted_email = ""; + if ($name =~ /[^a-z0-9 \.\-]/i) { ##has "must quote" chars + $name =~ s/(?"; + $name = "\"$name\""; + } + + if ($email_usename) { + if ("$name" eq "") { + $formatted_email = "$address"; + } else { + $formatted_email = "$name <${address}>"; + } } else { - $formatted_email = "${name} \<${email}\>"; + $formatted_email = $address; } + return $formatted_email; } @@ -444,19 +477,18 @@ sub add_categories { } } } elsif ($ptype eq "M") { - my $p_used = 0; - if ($index >= 0) { - my $tv = $typevalue[$index - 1]; - if ($tv =~ m/^(\C):\s*(.*)/) { - if ($1 eq "P") { - if ($email_usename) { - push_email_address(format_email($2, $pvalue)); - $p_used = 1; + my ($name, $address) = parse_email($pvalue); + if ($name eq "") { + if ($index >= 0) { + my $tv = $typevalue[$index - 1]; + if ($tv =~ m/^(\C):\s*(.*)/) { + if ($1 eq "P") { + $name = $2; } } } } - if (!$p_used) { + if ($email_maintainer) { push_email_addresses($pvalue); } } elsif ($ptype eq "T") { @@ -475,26 +507,24 @@ sub add_categories { } } +sub email_address_inuse { + my ($test_address) = @_; + + foreach my $line (@email_to) { + my ($name, $address) = parse_email($line); + + return 1 if ($address eq $test_address); + } + return 0; +} + sub push_email_address { - my ($email_address) = @_; + my ($line) = @_; - my $email_name = ""; + my ($name, $address) = parse_email($line); - if ($email_maintainer) { - if ($email_address =~ m/([^<]+)<(.*\@.*)>$/) { - $email_name = $1; - $email_address = $2; - if ($email_usename) { - push(@email_to, format_email($email_name, $email_address)); - } else { - push(@email_to, $email_address); - } - } elsif ($email_address =~ m/<(.+)>/) { - $email_address = $1; - push(@email_to, $email_address); - } else { - push(@email_to, $email_address); - } + if (!email_address_inuse($address)) { + push(@email_to, format_email($name, $address)); } } @@ -535,6 +565,7 @@ sub recent_git_signoffs { my $output = ""; my $count = 0; my @lines = (); + my %hash; my $total_sign_offs; if (which("git") eq "") { @@ -548,25 +579,31 @@ sub recent_git_signoffs { } $cmd = "git log --since=${email_git_since} -- ${file}"; - $cmd .= " | grep -Ei \"^[-_ a-z]+by:.*\\\@.*\$\""; - if (!$email_git_penguin_chiefs) { - $cmd .= " | grep -Ev \"${penguin_chiefs}\""; - } - $cmd .= " | cut -f2- -d\":\""; - $cmd .= " | sort | uniq -c | sort -rn"; $output = `${cmd}`; $output =~ s/^\s*//gm; @lines = split("\n", $output); - $total_sign_offs = 0; + @lines = grep(/^[-_ a-z]+by:.*\@.*$/i, @lines); + if (!$email_git_penguin_chiefs) { + @lines = grep(!/${penguin_chiefs}/i, @lines); + } + # cut -f2- -d":" + s/.*:\s*(.+)\s*/$1/ for (@lines); + + @lines = mailmap(@lines); + + $total_sign_offs = @lines; + @lines = sort(@lines); + # uniq -c foreach my $line (@lines) { - if ($line =~ m/([0-9]+)\s+(.*)/) { - $total_sign_offs += $1; - } else { - die("$P: Unexpected git output: ${line}\n"); - } + $hash{$line}++; + } + # sort -rn + @lines = (); + foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) { + push(@lines,"$hash{$line} $line"); } foreach my $line (@lines) { @@ -579,8 +616,8 @@ sub recent_git_signoffs { $sign_offs * 100 / $total_sign_offs < $email_git_min_percent) { last; } + push_email_address($line); } - push_email_address($line); } } @@ -632,15 +669,18 @@ sub git_assign_blame { @commits = uniq(@commits); foreach my $commit (@commits) { $cmd = "git log -1 ${commit}"; - $cmd .= " | grep -Ei \"^[-_ a-z]+by:.*\\\@.*\$\""; - if (!$email_git_penguin_chiefs) { - $cmd .= " | grep -Ev \"${penguin_chiefs}\""; - } - $cmd .= " | cut -f2- -d\":\""; $output = `${cmd}`; $output =~ s/^\s*//gm; @lines = split("\n", $output); + + @lines = grep(/^[-_ a-z]+by:.*\@.*$/i, @lines); + if (!$email_git_penguin_chiefs) { + @lines = grep(!/${penguin_chiefs}/i, @lines); + } + # cut -f2- -d":" + s/.*:\s*(.+)\s*/$1/ for (@lines); + $hash{$_}++ for @lines; $total_sign_offs += @lines; } -- 2.7.4