From d77b8594dceae702cb3c2edf9fcbe54e8b2835e2 Mon Sep 17 00:00:00 2001
From: Matthew Somerville <matthew-github@dracos.co.uk>
Date: Fri, 13 May 2016 17:12:45 +0100
Subject: [PATCH 1/7] Allow non-ASCII characters in email addresses.

Keep the default behaviour of previous versions, add a UNICODE variable
to switch it on.

This is from RFC 5335 / 6532, "Internationalized Email Headers".

Fixes #12.
---
 lib/Email/Address.pm | 14 +++++---
 t/ascii.t            | 83 +++++++++++++++++++++++++-------------------
 2 files changed, 57 insertions(+), 40 deletions(-)

diff --git a/lib/Email/Address.pm b/lib/Email/Address.pm
index e92dd59..47f0f00 100644
--- a/lib/Email/Address.pm
+++ b/lib/Email/Address.pm
@@ -6,6 +6,7 @@ package Email::Address;
 our $COMMENT_NEST_LEVEL ||= 1;
 our $STRINGIFY          ||= 'format';
 our $COLLAPSE_SPACES      = 1 unless defined $COLLAPSE_SPACES; # I miss //=
+our $UNICODE            ||= 0;
 
 =head1 SYNOPSIS
 
@@ -172,9 +173,10 @@ collapse multiple spaces into a single space, which avoids this problem.  To
 prevent this behavior, set C<$Email::Address::COLLAPSE_SPACES> to zero.  This
 variable will go away when the bug is resolved properly.
 
-In accordance with RFC 822 and its descendants, this module demands that email
-addresses be ASCII only.  Any non-ASCII content in the parsed addresses will
-cause the parser to return no results.
+By default, this module mandates that email addresses be ASCII only, and any
+non-ASCII content will cause a blank result. This matches RFCs 822, 2822, and
+5322. If you wish to allow UTF-8 characters in email, as per RFCs 5335 and
+6532, set C<$Email:Address::UNICODE> to 1.
 
 =cut
 
@@ -223,8 +225,10 @@ sub parse {
           ($user, $host) = ($1, $2);
       }
 
-      next if $user =~ /\P{ASCII}/;
-      next if $host =~ /\P{ASCII}/;
+      unless ($UNICODE) {
+          next if $user =~ /\P{ASCII}/;
+          next if $host =~ /\P{ASCII}/;
+      }
 
       my ($phrase)       = /($display_name)/o;
 
diff --git a/t/ascii.t b/t/ascii.t
index 6219f62..a143641 100644
--- a/t/ascii.t
+++ b/t/ascii.t
@@ -10,50 +10,63 @@ my $ascii = q{admin@mozilla.org};
 my $utf_8 = q{аdmin@mozilla.org};
 my $text  = decode('utf-8', $utf_8, Encode::LEAVE_SRC);
 
-my $ok_mixed  = qq{"$text" <$ascii>};
-my $bad_mixed = qq{"$text" <$text>};
+my $ascii_mixed = qq{"$text" <$ascii>};
+my $utf8_mixed = qq{"$text" <$text>};
 
-{
-  my (@addr) = Email::Address->parse($ascii);
-  is(@addr, 1, "an ascii address is a-ok");
+for (0..1) {
+  local $Email::Address::UNICODE = $_;
 
-  # ok( $ascii =~ $Email::Address::addr_spec, "...it =~ addr_spec");
-}
+  {
+    my (@addr) = Email::Address->parse($ascii);
+    is(@addr, 1, "an ascii address is a-ok");
 
-{
-  my (@addr) = Email::Address->parse($ok_mixed);
-  is(@addr, 1, "a quoted non-ascii phrase is a-ok with ascii email");
-}
+    # ok( $ascii =~ $Email::Address::addr_spec, "...it =~ addr_spec");
+  }
 
-{
-  my (@addr) = Email::Address->parse($bad_mixed);
-  is(@addr, 0, "a quoted non-ascii phrase is not okay with non-ascii email");
-}
+  {
+    my (@addr) = Email::Address->parse($ascii_mixed);
+    is(@addr, 1, "a quoted non-ascii phrase is a-ok with ascii email");
+  }
 
-{
-  my (@addr) = Email::Address->parse($utf_8);
-  is(@addr, 0, "utf-8 octet address: not ok");
+  {
+    my (@addr) = Email::Address->parse($utf8_mixed);
+    is(@addr, $Email::Address::UNICODE, "a quoted non-ascii phrase with non-ascii email");
+  }
 
-  # ok( $utf_8 !~ $Email::Address::addr_spec, "...it !~ addr_spec");
-}
+  {
+    my (@addr) = Email::Address->parse($utf_8);
+    is(@addr, $Email::Address::UNICODE, "utf-8 octet address");
 
-{
-  my (@addr) = Email::Address->parse($text);
-  is(@addr, 0, "unicode (decoded) address: not ok");
+    # ok( $utf_8 !~ $Email::Address::addr_spec, "...it !~ addr_spec");
+  }
 
-  # ok( $text =~ $Email::Address::addr_spec, "...it !~ addr_spec");
-}
+  {
+    my (@addr) = Email::Address->parse($text);
+    is(@addr, $Email::Address::UNICODE, "unicode (decoded) address");
+
+    # ok( $text =~ $Email::Address::addr_spec, "...it !~ addr_spec");
+  }
+
+  {
+    my @addr = Email::Address->parse(qq{
+      "Not ascii phras\x{e9}" <good\@email>,
+      b\x{e3}d\@user,
+      bad\@d\x{f6}main,
+      not.bad\@again
+    });
+    is scalar @addr, $Email::Address::UNICODE ? 4 : 2, "correct number of good emails";
+    is "$addr[0]", qq{"Not ascii phras\x{e9}" <good\@email>}, "expected email";
+    if ($Email::Address::UNICODE) {
+      is "$addr[1]", qq{b\x{e3}d\@user}, "expected email";
+      is "$addr[2]", qq{bad\@d\x{f6}main}, "expected email";
+      is "$addr[3]", qq{not.bad\@again}, "expected email";
+    } else {
+      is "$addr[1]", qq{not.bad\@again}, "expected email";
+    }
+  }
+
+  Email::Address->purge_cache;
 
-{
-  my @addr = Email::Address->parse(qq{
-    "Not ascii phras\x{e9}" <good\@email>,
-    b\x{e3}d\@user,
-    bad\@d\x{f6}main,
-    not.bad\@again
-  });
-  is scalar @addr, 2, "correct number of good emails";
-  is "$addr[0]", qq{"Not ascii phras\x{e9}" <good\@email>}, "expected email";
-  is "$addr[1]", qq{not.bad\@again}, "expected email";
 }
 
 done_testing;

From fa81a51ac932adf182629c44a613d722dc680f58 Mon Sep 17 00:00:00 2001
From: Matthew Somerville <matthew-github@dracos.co.uk>
Date: Fri, 13 May 2016 13:51:37 +0100
Subject: [PATCH 2/7] Bring regexes more in line with RFC5322.

FWS is still brought in as \s+ for simplicity. The obs_phrase test in
comment passes fine, so remove that code; space backtracking problems
will be fixed in the next commit.

This commit actually fixes the original example in #10 as $comment is
much simplified.

Update test to pass; test is actually incorrect in that the "comment"
should not be removed (RT#80665).
---
 lib/Email/Address.pm | 33 ++++++++-------------------------
 t/tests.t            |  2 +-
 2 files changed, 9 insertions(+), 26 deletions(-)

diff --git a/lib/Email/Address.pm b/lib/Email/Address.pm
index 47f0f00..4b1f54c 100644
--- a/lib/Email/Address.pm
+++ b/lib/Email/Address.pm
@@ -29,15 +29,13 @@ to be correct, and very very fast.
 my $CTL            = q{\x00-\x1F\x7F};
 my $special        = q{()<>\\[\\]:;@\\\\,."};
 
-my $text           = qr/[^\x0A\x0D]/;
+my $quoted_pair    = qr/\\[[:graph:] \t]/;
 
-my $quoted_pair    = qr/\\$text/;
-
-my $ctext          = qr/(?>[^()\\]+)/;
+my $ctext          = qr/[^$CTL()\\]/;
 my ($ccontent, $comment) = (q{})x2;
 for (1 .. $COMMENT_NEST_LEVEL) {
   $ccontent = qr/$ctext|$quoted_pair|$comment/;
-  $comment  = qr/\s*\((?:\s*$ccontent)*\s*\)\s*/;
+  $comment  = qr/\($ccontent*\)/;
 }
 my $cfws           = qr/$comment|\s+/;
 
@@ -46,33 +44,18 @@ my $atom           = qr/$cfws*$atext+$cfws*/;
 my $dot_atom_text  = qr/$atext+(?:\.$atext+)*/;
 my $dot_atom       = qr/$cfws*$dot_atom_text$cfws*/;
 
-my $qtext          = qr/[^\\"]/;
+my $qtext          = qr/[^$CTL\\"]/;
 my $qcontent       = qr/$qtext|$quoted_pair/;
 my $quoted_string  = qr/$cfws*"$qcontent*"$cfws*/;
 
 my $word           = qr/$atom|$quoted_string/;
 
-# XXX: This ($phrase) used to just be: my $phrase = qr/$word+/; It was changed
-# to resolve bug 22991, creating a significant slowdown.  Given current speed
-# problems.  Once 16320 is resolved, this section should be dealt with.
-# -- rjbs, 2006-11-11
-#my $obs_phrase     = qr/$word(?:$word|\.|$cfws)*/;
-
-# XXX: ...and the above solution caused endless problems (never returned) when
-# examining this address, now in a test:
-#   admin+=E6=96=B0=E5=8A=A0=E5=9D=A1_Weblog-- ATAT --test.socialtext.com
-# So we disallow the hateful CFWS in this context for now.  Of modern mail
-# agents, only Apple Web Mail 2.0 is known to produce obs-phrase.
-# -- rjbs, 2006-11-19
-my $simple_word    = qr/$atom|\.|\s*"$qcontent+"\s*/;
-my $obs_phrase     = qr/$simple_word+/;
-
+my $obs_phrase     = qr/$word(?:$word|\.|$cfws)*/;
 my $phrase         = qr/$obs_phrase|(?:$word+)/;
 
 my $local_part     = qr/$dot_atom|$quoted_string/;
-my $dtext          = qr/[^\[\]\\]/;
-my $dcontent       = qr/$dtext|$quoted_pair/;
-my $domain_literal = qr/$cfws*\[(?:\s*$dcontent)*\s*\]$cfws*/;
+my $dtext          = qr/[^$CTL\[\]\\]/;
+my $domain_literal = qr/$cfws*\[$dtext*\]$cfws*/;
 my $domain         = qr/$dot_atom|$domain_literal/;
 
 my $display_name   = $phrase;
@@ -127,7 +110,7 @@ following comment.
 our $addr_spec  = qr/$local_part\@$domain/;
 our $angle_addr = qr/$cfws*<$addr_spec>$cfws*/;
 our $name_addr  = qr/(?>$display_name?)$angle_addr/;
-our $mailbox    = qr/(?:$name_addr|$addr_spec)$comment*/;
+our $mailbox    = qr/$name_addr|$addr_spec/;
 
 sub _PHRASE   () { 0 }
 sub _ADDRESS  () { 1 }
diff --git a/t/tests.t b/t/tests.t
index b590337..268a1de 100644
--- a/t/tests.t
+++ b/t/tests.t
@@ -1488,7 +1488,7 @@ my @list = (
     '"Greg Norris (humble visionary genius)" <nextrightmove-- ATAT --bang.example.net>, <advocacy-- ATAT --p.example.org>',
     [
       [
-        'Greg Norris',
+        'Greg Norris ',
         'nextrightmove-- ATAT --bang.example.net',
         '(humble visionary genius)'
       ],

From b142fb678bb26be312a8a6eedbb7e5db7e8f971b Mon Sep 17 00:00:00 2001
From: Matthew Somerville <matthew-github@dracos.co.uk>
Date: Fri, 13 May 2016 15:28:51 +0100
Subject: [PATCH 3/7] Try and prevent backtracking regex explosions.

COLLAPSE_SPACES is no longer necessary.

Fixes the second example given in #10.
---
 lib/Email/Address.pm | 25 ++++++++-----------------
 t/speed.t            | 15 +++++++++++++++
 2 files changed, 23 insertions(+), 17 deletions(-)
 create mode 100644 t/speed.t

diff --git a/lib/Email/Address.pm b/lib/Email/Address.pm
index 4b1f54c..72d23b7 100644
--- a/lib/Email/Address.pm
+++ b/lib/Email/Address.pm
@@ -5,7 +5,6 @@ package Email::Address;
 
 our $COMMENT_NEST_LEVEL ||= 1;
 our $STRINGIFY          ||= 'format';
-our $COLLAPSE_SPACES      = 1 unless defined $COLLAPSE_SPACES; # I miss //=
 our $UNICODE            ||= 0;
 
 =head1 SYNOPSIS
@@ -37,25 +36,25 @@ for (1 .. $COMMENT_NEST_LEVEL) {
   $ccontent = qr/$ctext|$quoted_pair|$comment/;
   $comment  = qr/\($ccontent*\)/;
 }
-my $cfws           = qr/$comment|\s+/;
+my $cfws           = qr/(?>$comment|\s+)/;
 
 my $atext          = qq/[^$CTL$special\\s]/;
-my $atom           = qr/$cfws*$atext+$cfws*/;
+my $atom           = qr/$cfws*(?>$atext+)$cfws*/;
 my $dot_atom_text  = qr/$atext+(?:\.$atext+)*/;
-my $dot_atom       = qr/$cfws*$dot_atom_text$cfws*/;
+my $dot_atom       = qr/$cfws*(?>$dot_atom_text)$cfws*/;
 
 my $qtext          = qr/[^$CTL\\"]/;
 my $qcontent       = qr/$qtext|$quoted_pair/;
-my $quoted_string  = qr/$cfws*"$qcontent*"$cfws*/;
+my $quoted_string  = qr/$cfws*"(?>$qcontent*)"$cfws*/;
 
 my $word           = qr/$atom|$quoted_string/;
 
-my $obs_phrase     = qr/$word(?:$word|\.|$cfws)*/;
-my $phrase         = qr/$obs_phrase|(?:$word+)/;
+my $obs_phrase     = qr/$word(?>(?:$word|\.|$cfws)*)/;
+my $phrase         = qr/$obs_phrase|(?>$word+)/;
 
 my $local_part     = qr/$dot_atom|$quoted_string/;
 my $dtext          = qr/[^$CTL\[\]\\]/;
-my $domain_literal = qr/$cfws*\[$dtext*\]$cfws*/;
+my $domain_literal = qr/$cfws*\[(?>$dtext*)\]$cfws*/;
 my $domain         = qr/$dot_atom|$domain_literal/;
 
 my $display_name   = $phrase;
@@ -109,7 +108,7 @@ following comment.
 
 our $addr_spec  = qr/$local_part\@$domain/;
 our $angle_addr = qr/$cfws*<$addr_spec>$cfws*/;
-our $name_addr  = qr/(?>$display_name?)$angle_addr/;
+our $name_addr  = qr/$display_name?$angle_addr/;
 our $mailbox    = qr/$name_addr|$addr_spec/;
 
 sub _PHRASE   () { 0 }
@@ -150,12 +149,6 @@ C<$Email::Address::COMMENT_NEST_LEVEL> package variable to allow more.
 
 The reason for this hardly-limiting limitation is simple: efficiency.
 
-Long strings of whitespace can be problematic for this module to parse, a bug
-which has not yet been adequately addressed.  The default behavior is now to
-collapse multiple spaces into a single space, which avoids this problem.  To
-prevent this behavior, set C<$Email::Address::COLLAPSE_SPACES> to zero.  This
-variable will go away when the bug is resolved properly.
-
 By default, this module mandates that email addresses be ASCII only, and any
 non-ASCII content will cause a blank result. This matches RFCs 822, 2822, and
 5322. If you wish to allow UTF-8 characters in email, as per RFCs 5335 and
@@ -187,8 +180,6 @@ sub parse {
     my ($class, $line) = @_;
     return unless $line;
 
-    $line =~ s/[ \t]+/ /g if $COLLAPSE_SPACES;
-
     if (my @cached = $class->__get_cached_parse($line)) {
         return @cached;
     }
diff --git a/t/speed.t b/t/speed.t
new file mode 100644
index 0000000..1092f49
--- /dev/null
+++ b/t/speed.t
@@ -0,0 +1,15 @@
+#!perl
+use strict;
+
+use Email::Address;
+use Test::More tests => 1;
+
+my $email = "\"Hello\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\" <\@m>";
+my ($ea) = Email::Address->parse($email);
+
+is(
+  $ea,
+  undef,
+  'Bad address does not parse, but is not really slow'
+);
+

From 9066167ba421d59786b16308f3b43e58cce0d488 Mon Sep 17 00:00:00 2001
From: Matthew Somerville <matthew-github@dracos.co.uk>
Date: Fri, 13 May 2016 16:23:58 +0100
Subject: [PATCH 4/7] Use regex recusion for comment nesting.

Due to use of recursive regex and named backpatterns, this can not
support perl 5.8.

Fixes #11.
---
 lib/Email/Address.pm | 32 +++++++++-----------------------
 t/speed.t            |  8 +++++++-
 t/tests.t            | 10 ++++++++++
 3 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/lib/Email/Address.pm b/lib/Email/Address.pm
index 72d23b7..7255a4d 100644
--- a/lib/Email/Address.pm
+++ b/lib/Email/Address.pm
@@ -3,9 +3,8 @@ use warnings;
 package Email::Address;
 # ABSTRACT: RFC 2822 Address Parsing and Creation
 
-our $COMMENT_NEST_LEVEL ||= 1;
-our $STRINGIFY          ||= 'format';
-our $UNICODE            ||= 0;
+our $STRINGIFY ||= 'format';
+our $UNICODE   ||= 0;
 
 =head1 SYNOPSIS
 
@@ -31,11 +30,7 @@ my $special        = q{()<>\\[\\]:;@\\\\,."};
 my $quoted_pair    = qr/\\[[:graph:] \t]/;
 
 my $ctext          = qr/[^$CTL()\\]/;
-my ($ccontent, $comment) = (q{})x2;
-for (1 .. $COMMENT_NEST_LEVEL) {
-  $ccontent = qr/$ctext|$quoted_pair|$comment/;
-  $comment  = qr/\($ccontent*\)/;
-}
+my $comment        = qr/(?<comment>\((?:$ctext|$quoted_pair|(?&comment))*\))/;
 my $cfws           = qr/(?>$comment|\s+)/;
 
 my $atext          = qq/[^$CTL$special\\s]/;
@@ -140,15 +135,6 @@ This method returns a list of C<Email::Address> objects it finds in the input
 string.  B<Please note> that it returns a list, and expects that it may find
 multiple addresses.  The behavior in scalar context is undefined.
 
-The specification for an email address allows for infinitely nestable comments.
-That's nice in theory, but a little over done.  By default this module allows
-for one (C<1>) level of nested comments. If you think you need more, modify the
-C<$Email::Address::COMMENT_NEST_LEVEL> package variable to allow more.
-
-  $Email::Address::COMMENT_NEST_LEVEL = 10; # I'm deep
-
-The reason for this hardly-limiting limitation is simple: efficiency.
-
 By default, this module mandates that email addresses be ASCII only, and any
 non-ASCII content will cause a blank result. This matches RFCs 822, 2822, and
 5322. If you wish to allow UTF-8 characters in email, as per RFCs 5335 and
@@ -184,19 +170,19 @@ sub parse {
         return @cached;
     }
 
-    my (@mailboxes) = ($line =~ /$mailbox/go);
     my @addrs;
-    foreach (@mailboxes) {
+    while ($line =~ /(?<mailbox>$mailbox)/go) {
+      local $_ = $+{mailbox};
       my $original = $_;
 
-      my @comments = /($comment)/go;
+      my @comments = /$comment/go;
       s/$comment//go if @comments;
 
       my ($user, $host, $com);
-      ($user, $host) = ($1, $2) if s/<($local_part)\@($domain)>\s*\z//o;
+      ($user, $host) = ($+{user}, $+{host}) if s/<(?<user>$local_part)\@(?<host>$domain)>\s*\z//o;
       if (! defined($user) || ! defined($host)) {
-          s/($local_part)\@($domain)//o;
-          ($user, $host) = ($1, $2);
+          s/(?<user>$local_part)\@(?<host>$domain)//o;
+          ($user, $host) = ($+{user}, $+{host});
       }
 
       unless ($UNICODE) {
diff --git a/t/speed.t b/t/speed.t
index 1092f49..c8b15f3 100644
--- a/t/speed.t
+++ b/t/speed.t
@@ -2,7 +2,7 @@
 use strict;
 
 use Email::Address;
-use Test::More tests => 1;
+use Test::More tests => 2;
 
 my $email = "\"Hello\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\" <\@m>";
 my ($ea) = Email::Address->parse($email);
@@ -13,3 +13,9 @@ is(
   'Bad address does not parse, but is not really slow'
 );
 
+my $email = '\(¯¯`·.¥«P®ÎÑç€ØfTh€ÐÅ®K»¥.·`¯¯\) <email () example com>, "(> \" \" <)                              ( =\'o\'= )                              (\")___(\")  sWeEtAnGeLtHePrInCeSsOfThEsKy" <email2 () example com>, "(i)cRiStIaN(i)" <email3 () example com>, "(S)MaNu_vuOLeAmMazZaReNimOe(*)MiAo(@)" <email4 () example com>';
+my $email2 = ", $email";
+$email = $email . ($email2 x 10);
+my @emails = Email::Address->parse($email);
+
+is(@emails, 0, 'Bad addresses do not parse, but do not take for ever');
diff --git a/t/tests.t b/t/tests.t
index 268a1de..6f3adf9 100644
--- a/t/tests.t
+++ b/t/tests.t
@@ -1629,6 +1629,16 @@ my @list = (
       ],
     ],
   ],
+  [
+    q{"Matthew" <matthew-- ATAT --example.org> (Matthew (GSC))},
+    [
+      [
+        'Matthew',
+        'matthew-- ATAT --example.org',
+        'Matthew (GSC)',
+      ],
+    ],
+  ],
 );
 
 my $tests = 1;

From 7205fc9e972eeaf13ba975fdfdf28904ccb3c72b Mon Sep 17 00:00:00 2001
From: Matthew Somerville <matthew-github@dracos.co.uk>
Date: Sat, 14 May 2016 10:40:46 +0100
Subject: [PATCH 5/7] Don't extract "comments" from in quoted strings.

Fixes https://rt.cpan.org/Public/Bug/Display.html?id=80665
---
 lib/Email/Address.pm | 16 ++++++++++++++--
 t/tests.t            | 14 ++++++++++++--
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/lib/Email/Address.pm b/lib/Email/Address.pm
index 7255a4d..c9824bc 100644
--- a/lib/Email/Address.pm
+++ b/lib/Email/Address.pm
@@ -54,6 +54,10 @@ my $domain         = qr/$dot_atom|$domain_literal/;
 
 my $display_name   = $phrase;
 
+# This is for extracting comments, but not from inside quoted strings or domain
+# literals.
+my $parts = qr/("(?>$qcontent*)")|(\[(?>$dtext*)\])|$comment|([^\["(]+)/;
+
 =head2 Package Variables
 
 B<ACHTUNG!>  Email isn't easy (if even possible) to parse with a regex, I<at
@@ -175,8 +179,16 @@ sub parse {
       local $_ = $+{mailbox};
       my $original = $_;
 
-      my @comments = /$comment/go;
-      s/$comment//go if @comments;
+      my @comments;
+      my $new = '';
+      while (/$parts/go) {
+          my ($q, $d, $c, $o) = ($1, $2, $3, $4);
+          $new .= $q, next if $q;
+          $new .= $d, next if $d;
+          $new .= $o, next if $o;
+          push @comments, $c;
+      }
+      $_ = $new;
 
       my ($user, $host, $com);
       ($user, $host) = ($+{user}, $+{host}) if s/<(?<user>$local_part)\@(?<host>$domain)>\s*\z//o;
diff --git a/t/tests.t b/t/tests.t
index 6f3adf9..dda6414 100644
--- a/t/tests.t
+++ b/t/tests.t
@@ -1488,9 +1488,9 @@ my @list = (
     '"Greg Norris (humble visionary genius)" <nextrightmove-- ATAT --bang.example.net>, <advocacy-- ATAT --p.example.org>',
     [
       [
-        'Greg Norris ',
+        'Greg Norris (humble visionary genius)',
         'nextrightmove-- ATAT --bang.example.net',
-        '(humble visionary genius)'
+        undef,
       ],
       [
         undef,
@@ -1639,6 +1639,16 @@ my @list = (
       ],
     ],
   ],
+  [
+    q{"John (imperator) Doe" <john.doe-- ATAT --example.com> (Comment with "quotes"!)},
+    [
+      [
+        'John (imperator) Doe',
+        'john.doe-- ATAT --example.com',
+        'Comment with "quotes"!',
+      ],
+    ]
+  ],
 );
 
 my $tests = 1;

From c61bb289fdbbc645775c41b185c44bc8e5e9456d Mon Sep 17 00:00:00 2001
From: Matthew Somerville <matthew-github@dracos.co.uk>
Date: Sat, 14 May 2016 11:35:38 +0100
Subject: [PATCH 6/7] Fetch phrase/user/host from main regex.

---
 lib/Email/Address.pm | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/lib/Email/Address.pm b/lib/Email/Address.pm
index c9824bc..dd85f16 100644
--- a/lib/Email/Address.pm
+++ b/lib/Email/Address.pm
@@ -105,9 +105,9 @@ following comment.
 
 =cut
 
-our $addr_spec  = qr/$local_part\@$domain/;
+our $addr_spec  = qr/(?<local_part>$local_part)\@(?<domain>$domain)/;
 our $angle_addr = qr/$cfws*<$addr_spec>$cfws*/;
-our $name_addr  = qr/$display_name?$angle_addr/;
+our $name_addr  = qr/(?<display_name>$display_name?)$angle_addr/;
 our $mailbox    = qr/$name_addr|$addr_spec/;
 
 sub _PHRASE   () { 0 }
@@ -178,6 +178,14 @@ sub parse {
     while ($line =~ /(?<mailbox>$mailbox)/go) {
       local $_ = $+{mailbox};
       my $original = $_;
+      my $phrase = $+{display_name};
+      my $user = $+{local_part};
+      my $host = $+{domain};
+
+      unless ($UNICODE) {
+          next if $user =~ /\P{ASCII}/;
+          next if $host =~ /\P{ASCII}/;
+      }
 
       my @comments;
       my $new = '';
@@ -190,20 +198,6 @@ sub parse {
       }
       $_ = $new;
 
-      my ($user, $host, $com);
-      ($user, $host) = ($+{user}, $+{host}) if s/<(?<user>$local_part)\@(?<host>$domain)>\s*\z//o;
-      if (! defined($user) || ! defined($host)) {
-          s/(?<user>$local_part)\@(?<host>$domain)//o;
-          ($user, $host) = ($+{user}, $+{host});
-      }
-
-      unless ($UNICODE) {
-          next if $user =~ /\P{ASCII}/;
-          next if $host =~ /\P{ASCII}/;
-      }
-
-      my ($phrase)       = /($display_name)/o;
-
       for ( $phrase, $host, $user, @comments ) {
         next unless defined $_;
         s/^\s+//;

From 5182577ff2f3282f33e460b145e396316d9d7354 Mon Sep 17 00:00:00 2001
From: Matthew Somerville <matthew-github@dracos.co.uk>
Date: Sat, 14 May 2016 12:03:29 +0100
Subject: [PATCH 7/7] Parse display name for multiple quoted strings.

Tidy up handling of phrase (display name) to be consistent throughout; a
phrase will be treated as is unless it starts and ends with double quote
marks, in which case it will be treated as a quoted string, unquoted and
unescaped.

Fixes comment in #13.
---
 lib/Email/Address.pm | 33 ++++++++++++++++++++++++++-------
 t/tests.t            | 20 ++++++++++++++++++++
 2 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/lib/Email/Address.pm b/lib/Email/Address.pm
index dd85f16..da64411 100644
--- a/lib/Email/Address.pm
+++ b/lib/Email/Address.pm
@@ -55,7 +55,7 @@ my $domain         = qr/$dot_atom|$domain_literal/;
 my $display_name   = $phrase;
 
 # This is for extracting comments, but not from inside quoted strings or domain
-# literals.
+# literals; or quoted strings from in phrases.
 my $parts = qr/("(?>$qcontent*)")|(\[(?>$dtext*)\])|$comment|([^\["(]+)/;
 
 =head2 Package Variables
@@ -205,7 +205,17 @@ sub parse {
         $_ = undef unless length $_;
       }
 
-      $phrase =~ s/\\(.)/$1/g if $phrase;
+      $new = '';
+      while ($phrase && $phrase =~ /$parts/go) {
+        my ($q, $d, $c, $o) = ($1, $2, $3, $4);
+        $new .= $d, next if $d; # Shouldn't be any
+        $new .= $c, next if $c; # Shouldn't be any
+        $new .= $o, next if $o;
+        $q =~ s/\A"(.+)"\z/$1/;
+        $q =~ s/\\(.)/$1/g;
+        $new .= $q;
+      }
+      $phrase = $new if $new;
 
       my $new_comment = join q{ }, @comments;
       push @addrs,
@@ -226,17 +236,28 @@ sub parse {
 Constructs and returns a new C<Email::Address> object. Takes four
 positional arguments: phrase, email, and comment, and original string.
 
+If phrase starts and ends with quotes, the phrase will be assumed to be a
+quoted string. Otherwise it will be treated as is.
+
 The original string should only really be set using C<parse>.
 
 =cut
 
 sub new {
   my ($class, $phrase, $email, $comment, $orig) = @_;
-  $phrase =~ s/\A"(.+)"\z/$1/ if $phrase;
+  $phrase = _dephrase($phrase) if $phrase;
 
   bless [ $phrase, $email, $comment, $orig ] => $class;
 }
 
+sub _dephrase {
+    my $phrase = shift;
+    return $phrase unless $phrase =~ /\A"(.+)"\z/;
+    $phrase =~ s/\A"(.+)"\z/$1/;
+    $phrase =~ s/($quoted_pair)/substr $1, -1/goe;
+    return $phrase;
+}
+
 =item purge_cache
 
   Email::Address->purge_cache;
@@ -408,7 +429,7 @@ sub _enquoted_phrase {
   # if it's encoded -- rjbs, 2007-02-28
   return $phrase if $phrase =~ /\A=\?.+\?=\z/;
 
-  $phrase =~ s/\A"(.+)"\z/$1/;
+  $phrase = _dephrase($phrase);
   $phrase =~ s/([\\"])/\\$1/g;
 
   return qq{"$phrase"};
@@ -436,9 +457,7 @@ sub name {
     my ($self) = @_;
     my $name = q{};
     if ( $name = $self->[_PHRASE] ) {
-        $name =~ s/^"//;
-        $name =~ s/"$//;
-        $name =~ s/($quoted_pair)/substr $1, -1/goe;
+        $name = _dephrase($name);
     } elsif ( $name = $self->[_COMMENT] ) {
         $name =~ s/^\(//;
         $name =~ s/\)$//;
diff --git a/t/tests.t b/t/tests.t
index dda6414..0038b70 100644
--- a/t/tests.t
+++ b/t/tests.t
@@ -1649,6 +1649,26 @@ my @list = (
       ],
     ]
   ],
+  [
+    q{Matthew "the" Example <matthew-- ATAT --example.org>},
+    [
+      [
+        'Matthew the Example',
+        'matthew-- ATAT --example.org',
+        undef,
+      ],
+    ],
+  ],
+  [
+    q{"Matthew" the "Example" <matthew-- ATAT --example.org>},
+    [
+      [
+        'Matthew the Example',
+        'matthew-- ATAT --example.org',
+        undef,
+      ],
+    ],
+  ],
 );
 
 my $tests = 1;