diff --git a/email_reply_parser/__init__.py b/email_reply_parser/__init__.py index 063f65b..1623471 100644 --- a/email_reply_parser/__init__.py +++ b/email_reply_parser/__init__.py @@ -47,7 +47,7 @@ class EmailMessage(object): def __init__(self, text): self.fragments = [] self.fragment = None - self.text = text.replace('\r\n', '\n') + self.text = '\n'.join(text.splitlines()) self.found_visible = False def read(self): @@ -94,18 +94,20 @@ def _scan_line(self, line): line - a row of text from an email message """ - is_quote_header = self.QUOTE_HDR_REGEX.match(line) is not None + stripped_line = line.strip() + + is_quote_header = self.QUOTE_HDR_REGEX.match(stripped_line) is not None is_quoted = self.QUOTED_REGEX.match(line) is not None is_header = is_quote_header or self.HEADER_REGEX.match(line) is not None - if self.fragment and len(line.strip()) == 0: + if self.fragment and len(stripped_line) == 0: if self.SIG_REGEX.match(self.fragment.lines[-1].strip()): self.fragment.signature = True self._finish_fragment() if self.fragment \ and ((self.fragment.headers == is_header and self.fragment.quoted == is_quoted) or - (self.fragment.quoted and (is_quote_header or len(line.strip()) == 0))): + (self.fragment.quoted and (is_quote_header or len(stripped_line) == 0))): self.fragment.lines.append(line) else: diff --git a/email_reply_parser/version.py b/email_reply_parser/version.py index eaf6e6a..3139180 100644 --- a/email_reply_parser/version.py +++ b/email_reply_parser/version.py @@ -1 +1 @@ -VERSION = '0.5.11' +VERSION = '0.5.13' diff --git a/test/emails/email_1_10.txt b/test/emails/email_1_10.txt new file mode 100644 index 0000000..2b9ac3b --- /dev/null +++ b/test/emails/email_1_10.txt @@ -0,0 +1,7 @@ +Base tax cost environment side. May house most director treatment call heavy. +Forward professional woman institution happen. Tell girl hope to. Wrong perhaps apply anything expert main indeed. + +On Monday, April 13, 2020, 06:49:16 PM GMT+3, Paige Lee wrote: + +Thank experience bag memory hundred understand of. Environmental lose probably majority peace behind. When produce ask tough. +Institution thought system class nice instead speak. diff --git a/test/emails/email_1_11.txt b/test/emails/email_1_11.txt new file mode 100644 index 0000000..11988f7 --- /dev/null +++ b/test/emails/email_1_11.txt @@ -0,0 +1,9 @@ +Admit high represent movement. +Everything car rest perform late either among. Available help threat across spring necessary. +Develop line class impact pick generation. Join day design simply. + +On Tue, Apr 14, 2020 at 6:13 PM Alexandru via Sailo +wrote: + +Music easy though onto form top run agency. Arrive senior away total help. Foot partner between store energy out. +Water stock garden just. Skill design condition after why ten executive. diff --git a/test/emails/email_1_9.txt b/test/emails/email_1_9.txt new file mode 100644 index 0000000..393ce0b --- /dev/null +++ b/test/emails/email_1_9.txt @@ -0,0 +1,9 @@ +Resource popular local capital doctor. Wish with think north shoulder stand catch. Decade many production food view only green. + +Believe concern floor treatment admit keep maintain put. + On Friday, April 3, 2020, 06:05:24 PM EDT, Vicki Davis wrote: + + +Example myself effect understand miss idea. Tonight work home policy arm time report. + +Against rest concern each hotel. Person care policy sea. Attack realize suggest save all everything scientist. diff --git a/test/test_email_reply_parser.py b/test/test_email_reply_parser.py index 8d2849b..197fb06 100644 --- a/test/test_email_reply_parser.py +++ b/test/test_email_reply_parser.py @@ -90,6 +90,129 @@ def test_complex_body_with_one_fragment(self): self.assertEqual(1, len(message.fragments)) + def test_whitespace_before_header(self): + '''Header has whitespace at the beginning of the line. + + Seen in Yahoo! Mail (April 2020) with rich text reply. + ''' + + message = self.get_email('email_1_9') + + self.assertEqual( + 3, + len(message.fragments) + ) + + self.assertEqual( + [False, False, False], + [f.quoted for f in message.fragments] + ) + + self.assertEqual( + [False, False, False], + [f.signature for f in message.fragments] + ) + + self.assertEqual( + [False, True, False], + [f.headers for f in message.fragments] + ) + + self.assertEqual( + [False, True, True], + [f.hidden for f in message.fragments] + ) + + self.assertEqual( + ("Resource popular local capital doctor. " + "Wish with think north shoulder stand catch. " + "Decade many production food view only green.\n" + "\n" + "Believe concern floor treatment admit keep maintain put."), + message.reply) + + def test_quote_not_quoted(self): + '''Original email is not quoted at all. + + Seen in Yahoo! Mail (April 2020) with plain text reply. + ''' + + message = self.get_email('email_1_10') + + self.assertEqual( + 3, + len(message.fragments) + ) + + self.assertEqual( + [False, False, False], + [f.quoted for f in message.fragments] + ) + + self.assertEqual( + [False, False, False], + [f.signature for f in message.fragments] + ) + + self.assertEqual( + [False, True, False], + [f.headers for f in message.fragments] + ) + + self.assertEqual( + [False, True, True], + [f.hidden for f in message.fragments] + ) + + self.assertEqual( + ("Base tax cost environment side. " + "May house most director treatment call heavy.\n" + "Forward professional woman institution happen. " + "Tell girl hope to. " + "Wrong perhaps apply anything expert main indeed."), + message.reply) + + def test_header_on_multiple_lines(self): + '''Header is split into multiple lines + + Seen in GMail (April 2020); line length was 78 fwiw + ''' + + message = self.get_email('email_1_11') + + self.assertEqual( + 3, + len(message.fragments) + ) + + self.assertEqual( + [False, False, False], + [f.quoted for f in message.fragments] + ) + + self.assertEqual( + [False, False, False], + [f.signature for f in message.fragments] + ) + + self.assertEqual( + [False, True, False], + [f.headers for f in message.fragments] + ) + + self.assertEqual( + [False, True, True], + [f.hidden for f in message.fragments] + ) + + self.assertEqual( + ("Admit high represent movement.\n" + "Everything car rest perform late either among. " + "Available help threat across spring necessary.\n" + "Develop line class impact pick generation. " + "Join day design simply."), + message.reply) + def test_verify_reads_signature_correct(self): message = self.get_email('correct_sig') self.assertEqual(2, len(message.fragments)) @@ -166,17 +289,17 @@ def test_multiple_on(self): self.assertTrue(re.match('^On 9 Jan 2014', message.fragments[1].content)) self.assertEqual( - [False, True, False], + [False, True], [fragment.quoted for fragment in message.fragments] ) self.assertEqual( - [False, False, False], + [False, False], [fragment.signature for fragment in message.fragments] ) self.assertEqual( - [False, True, True], + [False, True], [fragment.hidden for fragment in message.fragments] )