Skip to content

Commit

Permalink
pythongh-80222: Fix email address header folding with long quoted-str…
Browse files Browse the repository at this point in the history
…ing (python#122753)

Email generators using email.policy.default could incorrectly omit the
quote ('"') characters from a quoted-string during header refolding,
leading to invalid address headers and enabling header spoofing. This
change restores the quote characters on a bare-quoted-string as the
header is refolded, and escapes backslash and quote chars in the string.
  • Loading branch information
medmunds authored Jan 19, 2025
1 parent 61b35f7 commit 5aaf416
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 3 deletions.
19 changes: 18 additions & 1 deletion Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,16 @@
NLSET = {'\n', '\r'}
SPECIALSNL = SPECIALS | NLSET


def make_quoted_pairs(value):
"""Escape dquote and backslash for use within a quoted-string."""
return str(value).replace('\\', '\\\\').replace('"', '\\"')


def quote_string(value):
return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
escaped = make_quoted_pairs(value)
return f'"{escaped}"'


# Match a RFC 2047 word, looks like =?utf-8?q?someword?=
rfc2047_matcher = re.compile(r'''
Expand Down Expand Up @@ -2905,6 +2913,15 @@ def _refold_parse_tree(parse_tree, *, policy):
if not hasattr(part, 'encode'):
# It's not a terminal, try folding the subparts.
newparts = list(part)
if part.token_type == 'bare-quoted-string':
# To fold a quoted string we need to create a list of terminal
# tokens that will render the leading and trailing quotes
# and use quoted pairs in the value as appropriate.
newparts = (
[ValueTerminal('"', 'ptext')] +
[ValueTerminal(make_quoted_pairs(p), 'ptext')
for p in newparts] +
[ValueTerminal('"', 'ptext')])
if not part.as_ew_allowed:
wrap_as_ew_blocked += 1
newparts.append(end_ew_not_allowed)
Expand Down
31 changes: 29 additions & 2 deletions Lib/test/test_email/test__header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3082,13 +3082,40 @@ def test_address_list_with_list_separator_after_fold(self):
self._test(parser.get_address_list(to)[0],
f'{a},\n =?utf-8?q?H=C3=BCbsch?= Kaktus <[email protected]>\n')

a = '.' * 79
a = '.' * 79 # ('.' is a special, so must be in quoted-string.)
to = f'"{a}" <[email protected]>, "Hübsch Kaktus" <[email protected]>'
self._test(parser.get_address_list(to)[0],
f'{a}\n'
f'"{a}"\n'
' <[email protected]>, =?utf-8?q?H=C3=BCbsch?= Kaktus '
'<[email protected]>\n')

def test_address_list_with_specials_in_long_quoted_string(self):
# Regression for gh-80222.
policy = self.policy.clone(max_line_length=40)
cases = [
# (to, folded)
('"Exfiltrator <[email protected]> (unclosed comment?" <[email protected]>',
'"Exfiltrator <[email protected]> (unclosed\n'
' comment?" <[email protected]>\n'),
('"Escaped \\" chars \\\\ in quoted-string stay escaped" <[email protected]>',
'"Escaped \\" chars \\\\ in quoted-string\n'
' stay escaped" <[email protected]>\n'),
('This long display name does not need quotes <[email protected]>',
'This long display name does not need\n'
' quotes <[email protected]>\n'),
('"Quotes are not required but are retained here" <[email protected]>',
'"Quotes are not required but are\n'
' retained here" <[email protected]>\n'),
('"A quoted-string, it can be a valid local-part"@example.com',
'"A quoted-string, it can be a valid\n'
' local-part"@example.com\n'),
('"[email protected]"@example.com',
'"[email protected]"@example.com\n'),
]
for (to, folded) in cases:
with self.subTest(to=to):
self._test(parser.get_address_list(to)[0], folded, policy=policy)

# XXX Need tests with comments on various sides of a unicode token,
# and with unicode tokens in the comments. Spaces inside the quotes
# currently don't do the right thing.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Fix bug in the folding of quoted strings when flattening an email message using
a modern email policy. Previously when a quoted string was folded so that
it spanned more than one line, the surrounding quotes and internal escapes
would be omitted. This could theoretically be used to spoof header lines
using a carefully constructed quoted string if the resulting rendered email
was transmitted or re-parsed.

0 comments on commit 5aaf416

Please sign in to comment.