is metadata which we don't want
+ } else {
+ self.add_text
+ }
+ }
+}
+
+#[derive(Debug, PartialEq, Clone, Copy)]
enum AddText {
No,
YesRemoveLineEnds,
@@ -41,6 +74,8 @@ pub fn dehtml_quick_xml(buf: &str) -> String {
strbuilder: String::with_capacity(buf.len()),
add_text: AddText::YesRemoveLineEnds,
last_href: None,
+ divs_since_quote_div: 0,
+ divs_since_quoted_content_div: 0,
};
let mut reader = quick_xml::Reader::from_str(buf);
@@ -79,13 +114,16 @@ pub fn dehtml_quick_xml(buf: &str) -> String {
}
fn dehtml_text_cb(event: &BytesText, dehtml: &mut Dehtml) {
- if dehtml.add_text == AddText::YesPreserveLineEnds
- || dehtml.add_text == AddText::YesRemoveLineEnds
+ if dehtml.get_add_text() == AddText::YesPreserveLineEnds
+ || dehtml.get_add_text() == AddText::YesRemoveLineEnds
{
let last_added = escaper::decode_html_buf_sloppy(event.escaped()).unwrap_or_default();
- if dehtml.add_text == AddText::YesRemoveLineEnds {
+ if dehtml.get_add_text() == AddText::YesRemoveLineEnds {
dehtml.strbuilder += LINE_RE.replace_all(&last_added, "\r").as_ref();
+ } else if !dehtml.line_prefix().is_empty() {
+ let l = dehtml.append_prefix("\n");
+ dehtml.strbuilder += LINE_RE.replace_all(&last_added, l.as_str()).as_ref();
} else {
dehtml.strbuilder += &last_added;
}
@@ -93,13 +131,16 @@ fn dehtml_text_cb(event: &BytesText, dehtml: &mut Dehtml) {
}
fn dehtml_cdata_cb(event: &BytesText, dehtml: &mut Dehtml) {
- if dehtml.add_text == AddText::YesPreserveLineEnds
- || dehtml.add_text == AddText::YesRemoveLineEnds
+ if dehtml.get_add_text() == AddText::YesPreserveLineEnds
+ || dehtml.get_add_text() == AddText::YesRemoveLineEnds
{
let last_added = escaper::decode_html_buf_sloppy(event.escaped()).unwrap_or_default();
- if dehtml.add_text == AddText::YesRemoveLineEnds {
+ if dehtml.get_add_text() == AddText::YesRemoveLineEnds {
dehtml.strbuilder += LINE_RE.replace_all(&last_added, "\r").as_ref();
+ } else if !dehtml.line_prefix().is_empty() {
+ let l = dehtml.append_prefix("\n");
+ dehtml.strbuilder += LINE_RE.replace_all(&last_added, l.as_str()).as_ref();
} else {
dehtml.strbuilder += &last_added;
}
@@ -110,8 +151,15 @@ fn dehtml_endtag_cb(event: &BytesEnd, dehtml: &mut Dehtml) {
let tag = String::from_utf8_lossy(event.name()).trim().to_lowercase();
match tag.as_str() {
- "p" | "div" | "table" | "td" | "style" | "script" | "title" | "pre" => {
- dehtml.strbuilder += "\n\n";
+ "p" | "table" | "td" | "style" | "script" | "title" | "pre" => {
+ dehtml.strbuilder += &dehtml.append_prefix("\n\n");
+ dehtml.add_text = AddText::YesRemoveLineEnds;
+ }
+ "div" => {
+ pop_tag(&mut dehtml.divs_since_quote_div);
+ pop_tag(&mut dehtml.divs_since_quoted_content_div);
+
+ dehtml.strbuilder += &dehtml.append_prefix("\n\n");
dehtml.add_text = AddText::YesRemoveLineEnds;
}
"a" => {
@@ -122,10 +170,14 @@ fn dehtml_endtag_cb(event: &BytesEnd, dehtml: &mut Dehtml) {
}
}
"b" | "strong" => {
- dehtml.strbuilder += "*";
+ if dehtml.get_add_text() != AddText::No {
+ dehtml.strbuilder += "*";
+ }
}
"i" | "em" => {
- dehtml.strbuilder += "_";
+ if dehtml.get_add_text() != AddText::No {
+ dehtml.strbuilder += "_";
+ }
}
_ => {}
}
@@ -139,19 +191,27 @@ fn dehtml_starttag_cb
(
let tag = String::from_utf8_lossy(event.name()).trim().to_lowercase();
match tag.as_str() {
- "p" | "div" | "table" | "td" => {
- dehtml.strbuilder += "\n\n";
+ "p" | "table" | "td" => {
+ dehtml.strbuilder += &dehtml.append_prefix("\n\n");
+ dehtml.add_text = AddText::YesRemoveLineEnds;
+ }
+ #[rustfmt::skip]
+ "div" => {
+ maybe_push_tag(event, reader, "quote", &mut dehtml.divs_since_quote_div);
+ maybe_push_tag(event, reader, "quoted-content", &mut dehtml.divs_since_quoted_content_div);
+
+ dehtml.strbuilder += &dehtml.append_prefix("\n\n");
dehtml.add_text = AddText::YesRemoveLineEnds;
}
"br" => {
- dehtml.strbuilder += "\n";
+ dehtml.strbuilder += &dehtml.append_prefix("\n");
dehtml.add_text = AddText::YesRemoveLineEnds;
}
"style" | "script" | "title" => {
dehtml.add_text = AddText::No;
}
"pre" => {
- dehtml.strbuilder += "\n\n";
+ dehtml.strbuilder += &dehtml.append_prefix("\n\n");
dehtml.add_text = AddText::YesPreserveLineEnds;
}
"a" => {
@@ -172,15 +232,51 @@ fn dehtml_starttag_cb(
}
}
"b" | "strong" => {
- dehtml.strbuilder += "*";
+ if dehtml.get_add_text() != AddText::No {
+ dehtml.strbuilder += "*";
+ }
}
"i" | "em" => {
- dehtml.strbuilder += "_";
+ if dehtml.get_add_text() != AddText::No {
+ dehtml.strbuilder += "_";
+ }
}
_ => {}
}
}
+/// In order to know when a specific tag is closed, we need to count the opening and closing tags.
+/// The `counts`s are stored in the `Dehtml` struct.
+fn pop_tag(count: &mut u32) {
+ if *count > 0 {
+ *count -= 1;
+ }
+}
+
+/// In order to know when a specific tag is closed, we need to count the opening and closing tags.
+/// The `counts`s are stored in the `Dehtml` struct.
+fn maybe_push_tag(
+ event: &BytesStart,
+ reader: &Reader,
+ tag_name: &str,
+ count: &mut u32,
+) {
+ if *count > 0 || tag_contains_attr(event, reader, tag_name) {
+ *count += 1;
+ }
+}
+
+fn tag_contains_attr(event: &BytesStart, reader: &Reader, name: &str) -> bool {
+ event.attributes().any(|r| {
+ r.map(|a| {
+ a.unescape_and_decode_value(reader)
+ .map(|v| v == name)
+ .unwrap_or(false)
+ })
+ .unwrap_or(false)
+ })
+}
+
pub fn dehtml_manually(buf: &str) -> String {
// Just strip out everything between "<" and ">"
let mut strbuilder = String::new();
@@ -288,4 +384,15 @@ mod tests {
let txt = dehtml(input).unwrap();
assert_eq!(txt.trim(), "lots of text");
}
+
+ #[async_std::test]
+ async fn test_quote_div() {
+ let input = include_str!("../test-data/message/gmx-quote-body.eml");
+ let dehtml = dehtml(input).unwrap();
+ println!("{}", dehtml);
+ let (msg, forwawded, top_quote) = simplify(dehtml, false);
+ assert_eq!(msg, "Test");
+ assert_eq!(forwawded, false);
+ assert_eq!(top_quote.as_deref(), Some("test"));
+ }
}
diff --git a/src/mimeparser.rs b/src/mimeparser.rs
index 2be629b1b..6a7767d8c 100644
--- a/src/mimeparser.rs
+++ b/src/mimeparser.rs
@@ -2513,4 +2513,13 @@ On 2020-10-25, Bob wrote:
);
assert_eq!(mimeparser.parts[0].typ, Viewtype::File);
}
+
+ #[async_std::test]
+ async fn test_quote_div() {
+ let t = TestContext::new().await;
+ let raw = include_bytes!("../test-data/message/gmx-quote.eml");
+ let mimeparser = MimeMessage::from_bytes(&t.ctx, raw).await.unwrap();
+ assert_eq!(mimeparser.parts[0].msg, "YIPPEEEEEE\n\nMulti-line");
+ assert_eq!(mimeparser.parts[0].param.get(Param::Quote).unwrap(), "Now?");
+ }
}
diff --git a/src/simplify.rs b/src/simplify.rs
index 792a7c887..6c46414e1 100644
--- a/src/simplify.rs
+++ b/src/simplify.rs
@@ -91,6 +91,14 @@ pub fn simplify(mut input: String, is_chat_message: bool) -> (String, bool, Opti
render_message(lines, has_nonstandard_footer || bottom_quote.is_some())
}
};
+
+ if !is_chat_message {
+ top_quote = top_quote.map(|quote| {
+ let quote_lines = split_lines("e);
+ let quote_lines = remove_message_footer("e_lines);
+ render_message(quote_lines, false)
+ });
+ }
(text, is_forwarded, top_quote)
}
@@ -218,19 +226,10 @@ fn render_message(lines: &[&str], is_cut_at_end: bool) -> String {
* Tools
*/
fn is_empty_line(buf: &str) -> bool {
- // XXX: can it be simplified to buf.chars().all(|c| c.is_whitespace())?
- //
- // Strictly speaking, it is not equivalent (^A is not whitespace, but less than ' '),
- // but having control sequences in email body?!
- //
- // See discussion at: https://github.com/deltachat/deltachat-core-rust/pull/402#discussion_r317062392
- for c in buf.chars() {
- if c > ' ' {
- return false;
- }
- }
-
- true
+ buf.chars().all(char::is_whitespace)
+ // for some time, this checked for `char <= ' '`,
+ // see discussion at: https://github.com/deltachat/deltachat-core-rust/pull/402#discussion_r317062392
+ // and https://github.com/deltachat/deltachat-core-rust/pull/2104/files#r538973613
}
fn is_quoted_headline(buf: &str) -> bool {
diff --git a/test-data/message/blockquote-tag.eml b/test-data/message/blockquote-tag.eml
new file mode 100644
index 000000000..6943ffcb4
--- /dev/null
+++ b/test-data/message/blockquote-tag.eml
@@ -0,0 +1,47 @@
+Return-Path:
+User-Agent: K-9 Mail for Android
+In-Reply-To:
+MIME-Version: 1.0
+Content-Type: multipart/alternative; boundary="----MLV7YOLJ7ED4UZKNGQYQ63O0RJGHU0"
+Content-Transfer-Encoding: 7bit
+Subject: Re: Test
+To: Alice
+From: Bob
+Message-ID:
+
+------MLV7YOLJ7ED4UZKNGQYQ63O0RJGHU0
+Content-Type: text/definitelynotplainthiswouldbetooeasy;
+ charset=utf-8
+Content-Transfer-Encoding: quoted-printable
+
+Hi Alice,
+
+some text.
+
+Am 21=2E Juni 2020 10:38:44 MESZ schrieb Alice :
+>Dear Bob,
+>
+>let's meet
+>
+>Alice
+
+--=20
+Diese Nachricht wurde von meinem Android-Ger=C3=A4t mit K-9 Mail gesendet=
+=2E
+------MLV7YOLJ7ED4UZKNGQYQ63O0RJGHU0
+Content-Type: text/html;
+ charset=utf-8
+Content-Transfer-Encoding: quoted-printable
+
+Hi Alice,
some text.
+Am 21=2E Juni 2020 10:38:44 M=
+ESZ schrieb Alice <jonathanschmiederer@gmx=2Ede>:
+Sehr geehrte/r Frau/Herr Brenner,
ich habe in=
+ meinen JuFo-Unterlagen den angeh=C3=A4ngten Gutschein gefunden=2E
Ist e=
+s noch m=C3=B6glich, diesen einzul=C3=B6sen?
Mit freundlichen Gr=C3=
+=BC=C3=9Fen
Alice
--
=
+Diese Nachricht wurde von meinem Android-Ger=C3=A4t mit K-9 Mail gesendet=
+=2E
+------MLV7YOLJ7ED4UZKNGQYQ63O0RJGHU0--
diff --git a/test-data/message/gmx-quote-body.eml b/test-data/message/gmx-quote-body.eml
new file mode 100644
index 000000000..77e282baa
--- /dev/null
+++ b/test-data/message/gmx-quote-body.eml
@@ -0,0 +1,14 @@
+Test
+
+
+
+
+
Gesendet: Freitag, 04. Dezember 2020 um 18:46 Uhr
+Von: "Bob" <bob@gmx.de>
+An: alice@gmx.de
+Betreff: test
+
+
test
+
+
+
diff --git a/test-data/message/gmx-quote.eml b/test-data/message/gmx-quote.eml
new file mode 100644
index 000000000..594a72ef5
--- /dev/null
+++ b/test-data/message/gmx-quote.eml
@@ -0,0 +1,38 @@
+Return-Path:
+Delivered-To: bob@gmx.de
+MIME-Version: 1.0
+Message-ID:
+From: Alice
+To: bob@gmx.de
+Subject: Aw: Re: Re: Re: Message from bob@gmx.de
+Content-Type: text/html; charset=UTF-8
+Date: Tue, 8 Dec 2020 12:59:55 +0100
+Importance: normal
+Sensitivity: Normal
+In-Reply-To:
+References:
+
+
+
+YIPPEEEEEE
+
+
+
+
Multi-line
+
+
+
+
+
Gesendet: Dienstag, 08. Dezember 2020 um 12:59 Uhr
+Von: bob@gmx.de
+An: "Alice" <alice@gmx.de>
+Betreff: Re: Re: Re: Message from bob@gmx.de
+
+
+
+
+