@@ -2835,6 +2835,30 @@ def _steal_trailing_WSP_if_exists(lines):
28352835 lines .pop ()
28362836 return wsp
28372837
2838+ def _steal_all_trailing_WSP_if_exists (lines ):
2839+ lines_popped = False
2840+ wsp_lines = []
2841+ while lines and lines [- 1 ]:
2842+ for i in range (len (lines [- 1 ]), - 1 , - 1 ):
2843+ if i <= 0 :
2844+ break
2845+ if lines [- 1 ][i - 1 ] not in WSP :
2846+ break
2847+ wsp_line = lines [- 1 ][i :]
2848+ if not wsp_line :
2849+ break
2850+ wsp_lines .insert (0 , wsp_line )
2851+ lines [- 1 ] = lines [- 1 ][:i ]
2852+ if not lines [- 1 ]:
2853+ lines_popped = True
2854+ lines .pop ()
2855+ else :
2856+ break
2857+
2858+ if lines_popped :
2859+ lines .append (' ' )
2860+ return '' .join (wsp_lines )
2861+
28382862def _refold_parse_tree (parse_tree , * , policy ):
28392863 """Return string of contents of parse_tree folded according to RFC rules.
28402864
@@ -2843,9 +2867,7 @@ def _refold_parse_tree(parse_tree, *, policy):
28432867 maxlen = policy .max_line_length or sys .maxsize
28442868 encoding = 'utf-8' if policy .utf8 else 'us-ascii'
28452869 lines = ['' ] # Folded lines to be output
2846- leading_whitespace = '' # When we have whitespace between two encoded
2847- # words, we may need to encode the whitespace
2848- # at the beginning of the second word.
2870+ last_word_is_ew = False
28492871 last_ew = None # Points to the last encoded character if there's an ew on
28502872 # the line
28512873 last_charset = None
@@ -2882,6 +2904,7 @@ def _refold_parse_tree(parse_tree, *, policy):
28822904 if part .token_type == 'mime-parameters' :
28832905 # Mime parameter folding (using RFC2231) is extra special.
28842906 _fold_mime_parameters (part , lines , maxlen , encoding )
2907+ last_word_is_ew = False
28852908 continue
28862909
28872910 if want_encoding and not wrap_as_ew_blocked :
@@ -2898,6 +2921,7 @@ def _refold_parse_tree(parse_tree, *, policy):
28982921 # XXX what if encoded_part has no leading FWS?
28992922 lines .append (newline )
29002923 lines [- 1 ] += encoded_part
2924+ last_word_is_ew = False
29012925 continue
29022926 # Either this is not a major syntactic break, so we don't
29032927 # want it on a line by itself even if it fits, or it
@@ -2917,10 +2941,8 @@ def _refold_parse_tree(parse_tree, *, policy):
29172941 last_charset == 'utf-8' and charset != 'us-ascii' )):
29182942 last_ew = None
29192943 last_ew = _fold_as_ew (tstr , lines , maxlen , last_ew ,
2920- part .ew_combine_allowed , charset , leading_whitespace )
2921- # This whitespace has been added to the lines in _fold_as_ew()
2922- # so clear it now.
2923- leading_whitespace = ''
2944+ part .ew_combine_allowed , charset , last_word_is_ew )
2945+ last_word_is_ew = True
29242946 last_charset = charset
29252947 want_encoding = False
29262948 continue
@@ -2933,28 +2955,20 @@ def _refold_parse_tree(parse_tree, *, policy):
29332955
29342956 if len (tstr ) <= maxlen - len (lines [- 1 ]):
29352957 lines [- 1 ] += tstr
2958+ if any (char not in WSP for char in tstr ):
2959+ last_word_is_ew = False
29362960 continue
29372961
29382962 # This part is too long to fit. The RFC wants us to break at
29392963 # "major syntactic breaks", so unless we don't consider this
29402964 # to be one, check if it will fit on the next line by itself.
2941- leading_whitespace = ''
29422965 if (part .syntactic_break and
29432966 len (tstr ) + 1 <= maxlen ):
29442967 newline = _steal_trailing_WSP_if_exists (lines )
29452968 if newline or part .startswith_fws ():
2946- # We're going to fold the data onto a new line here. Due to
2947- # the way encoded strings handle continuation lines, we need to
2948- # be prepared to encode any whitespace if the next line turns
2949- # out to start with an encoded word.
29502969 lines .append (newline + tstr )
2951-
2952- whitespace_accumulator = []
2953- for char in lines [- 1 ]:
2954- if char not in WSP :
2955- break
2956- whitespace_accumulator .append (char )
2957- leading_whitespace = '' .join (whitespace_accumulator )
2970+ if not all (char in WSP for char in lines [- 1 ]):
2971+ last_word_is_ew = False
29582972 last_ew = None
29592973 continue
29602974 if not hasattr (part , 'encode' ):
@@ -2994,10 +3008,12 @@ def _refold_parse_tree(parse_tree, *, policy):
29943008 else :
29953009 # We can't fold it onto the next line either...
29963010 lines [- 1 ] += tstr
3011+ if any (char not in WSP for char in tstr ):
3012+ last_word_is_ew = False
29973013
29983014 return policy .linesep .join (lines ) + policy .linesep
29993015
3000- def _fold_as_ew (to_encode , lines , maxlen , last_ew , ew_combine_allowed , charset , leading_whitespace ):
3016+ def _fold_as_ew (to_encode , lines , maxlen , last_ew , ew_combine_allowed , charset , last_word_is_ew ):
30013017 """Fold string to_encode into lines as encoded word, combining if allowed.
30023018 Return the new value for last_ew, or None if ew_combine_allowed is False.
30033019
@@ -3012,14 +3028,22 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
30123028 to_encode = str (
30133029 get_unstructured (lines [- 1 ][last_ew :] + to_encode ))
30143030 lines [- 1 ] = lines [- 1 ][:last_ew ]
3015- elif to_encode [0 ] in WSP :
3031+ elif to_encode [0 ] in WSP and not last_word_is_ew :
30163032 # We're joining this to non-encoded text, so don't encode
30173033 # the leading blank.
30183034 leading_wsp = to_encode [0 ]
30193035 to_encode = to_encode [1 :]
30203036 if (len (lines [- 1 ]) == maxlen ):
30213037 lines .append (_steal_trailing_WSP_if_exists (lines ))
30223038 lines [- 1 ] += leading_wsp
3039+ elif last_word_is_ew :
3040+ # If we are following up an encoded word with another encoded word,
3041+ # any white space between the two will be ignored when decoded.
3042+ # Therefore, we encode all to-be-displayed whitespace in the second
3043+ # encoded word.
3044+ leading_whitespace = _steal_all_trailing_WSP_if_exists (lines )
3045+ to_encode = leading_whitespace + to_encode
3046+ lines [- 1 ] = ' '
30233047
30243048 trailing_wsp = ''
30253049 if to_encode [- 1 ] in WSP :
@@ -3040,20 +3064,11 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
30403064
30413065 while to_encode :
30423066 remaining_space = maxlen - len (lines [- 1 ])
3043- text_space = remaining_space - chrome_len - len ( leading_whitespace )
3067+ text_space = remaining_space - chrome_len
30443068 if text_space <= 0 :
30453069 lines .append (' ' )
30463070 continue
30473071
3048- # If we are at the start of a continuation line, prepend whitespace
3049- # (we only want to do this when the line starts with an encoded word
3050- # but if we're folding in this helper function, then we know that we
3051- # are going to be writing out an encoded word.)
3052- if len (lines ) > 1 and len (lines [- 1 ]) == 1 and leading_whitespace :
3053- encoded_word = _ew .encode (leading_whitespace , charset = encode_as )
3054- lines [- 1 ] += encoded_word
3055- leading_whitespace = ''
3056-
30573072 to_encode_word = to_encode [:text_space ]
30583073 encoded_word = _ew .encode (to_encode_word , charset = encode_as )
30593074 excess = len (encoded_word ) - remaining_space
@@ -3065,7 +3080,6 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
30653080 excess = len (encoded_word ) - remaining_space
30663081 lines [- 1 ] += encoded_word
30673082 to_encode = to_encode [len (to_encode_word ):]
3068- leading_whitespace = ''
30693083
30703084 if to_encode :
30713085 lines .append (' ' )
0 commit comments