From 2639772a11c860628c5f7007842eca52a1c34d78 Mon Sep 17 00:00:00 2001 From: Alvin Šipraga Date: Tue, 19 Dec 2023 02:25:15 +0100 Subject: get_maintainer: remove stray punctuation when cleaning file emails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When parsing emails from .yaml files in particular, stray punctuation such as a leading '-' can end up in the name. For example, consider a common YAML section such as: maintainers: - devicetree@vger.kernel.org This would previously be processed by get_maintainer.pl as: - Make the logic in clean_file_emails more robust by deleting any sub-names which consist of common single punctuation marks before proceeding to the best-effort name extraction logic. The output is then correct: devicetree@vger.kernel.org Some additional comments are added to the function to make things clearer to future readers. Link: https://lore.kernel.org/all/0173e76a36b3a9b4e7f324dd3a36fd4a9757f302.camel@perches.com/ Suggested-by: Joe Perches Signed-off-by: Alvin Šipraga Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index dac38c6e3b1c..ee1aed7e090c 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -2462,11 +2462,17 @@ sub clean_file_emails { foreach my $email (@file_emails) { $email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g; my ($name, $address) = parse_email($email); - if ($name eq '"[,\.]"') { - $name = ""; - } + # Strip quotes for easier processing, format_email will add them back + $name =~ s/^"(.*)"$/$1/; + + # Split into name-like parts and remove stray punctuation particles my @nw = split(/[^\p{L}\'\,\.\+-]/, $name); + @nw = grep(!/^[\'\,\.\+-]$/, @nw); + + # Make a best effort to extract the name, and only the name, by taking + # only the last two names, or in the case of obvious initials, the last + # three names. if (@nw > 2) { my $first = $nw[@nw - 3]; my $middle = $nw[@nw - 2]; @@ -2480,18 +2486,16 @@ sub clean_file_emails { } else { $name = "$middle $last"; } + } else { + $name = "@nw"; } if (substr($name, -1) =~ /[,\.]/) { $name = substr($name, 0, length($name) - 1); - } elsif (substr($name, -2) =~ /[,\.]"/) { - $name = substr($name, 0, length($name) - 2) . '"'; } if (substr($name, 0, 1) =~ /[,\.]/) { $name = substr($name, 1, length($name) - 1); - } elsif (substr($name, 0, 2) =~ /"[,\.]/) { - $name = '"' . substr($name, 2, length($name) - 2); } my $fmt_email = format_email($name, $address, $email_usename); -- cgit v1.2.3-58-ga151