diff --git a/src/dc_simplify.rs b/src/dc_simplify.rs
index 1e5c0f694..33f6b4721 100644
--- a/src/dc_simplify.rs
+++ b/src/dc_simplify.rs
@@ -1,175 +1,169 @@
use crate::dehtml::*;
-#[derive(Copy, Clone)]
-pub struct Simplify {
- pub is_forwarded: bool,
-}
-
-/// Return index of footer line in vector of message lines, or vector length if
-/// no footer is found.
-///
-/// Also return whether not-standard (rfc3676, §4.3) footer is found.
-fn find_message_footer(lines: &[&str]) -> (usize, bool) {
+/// Remove standard (RFC 3676, §4.3) footer if it is found.
+fn remove_message_footer<'a>(lines: &'a [&str]) -> &'a [&'a str] {
for (ix, &line) in lines.iter().enumerate() {
// quoted-printable may encode `-- ` to `-- =20` which is converted
// back to `-- `
match line {
- "-- " | "-- " => return (ix, false),
- "--" | "---" | "----" => return (ix, true),
+ "-- " | "-- " => return &lines[..ix],
_ => (),
}
}
- (lines.len(), false)
+ lines
}
-impl Simplify {
- pub fn new() -> Self {
- Simplify {
- is_forwarded: false,
+/// Remove nonstandard footer and a boolean indicating whether such
+/// footer was removed.
+fn remove_nonstandard_footer<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
+ for (ix, &line) in lines.iter().enumerate() {
+ if line == "--"
+ || line == "---"
+ || line == "----"
+ || line.starts_with("-----")
+ || line.starts_with("_____")
+ || line.starts_with("=====")
+ || line.starts_with("*****")
+ || line.starts_with("~~~~~")
+ {
+ return (&lines[..ix], true);
}
}
+ (lines, false)
+}
- /// Simplify and normalise text: Remove quotes, signatures, unnecessary
- /// lineends etc.
- /// The data returned from simplify() must be free()'d when no longer used.
- pub fn simplify(&mut self, input: &str, is_html: bool, is_msgrmsg: bool) -> String {
- let mut out = if is_html {
- dehtml(input)
- } else {
- input.to_string()
- };
+fn split_lines(buf: &str) -> Vec<&str> {
+ buf.split('\n').collect()
+}
- out.retain(|c| c != '\r');
- out = self.simplify_plain_text(&out, is_msgrmsg);
- out.retain(|c| c != '\r');
+/// Simplify message text for chat display.
+/// Remove quotes, signatures, trailing empty lines etc.
+pub fn simplify(input: &str, is_html: bool, is_chat_message: bool) -> (String, bool) {
+ let mut out = if is_html {
+ dehtml(input)
+ } else {
+ input.to_string()
+ };
- out
+ out.retain(|c| c != '\r');
+ let lines = split_lines(&out);
+ let (lines, is_forwarded) = skip_forward_header(&lines);
+
+ let lines = remove_message_footer(lines);
+ let (lines, has_nonstandard_footer) = remove_nonstandard_footer(lines);
+ let (lines, has_bottom_quote) = if !is_chat_message {
+ remove_bottom_quote(lines)
+ } else {
+ (lines, false)
+ };
+ let (lines, has_top_quote) = if !is_chat_message {
+ remove_top_quote(lines)
+ } else {
+ (lines, false)
+ };
+
+ // re-create buffer from the remaining lines
+ let text = render_message(
+ lines,
+ has_top_quote,
+ has_nonstandard_footer || has_bottom_quote,
+ );
+ (text, is_forwarded)
+}
+
+/// Skips "forwarded message" header.
+/// Returns message body lines and a boolean indicating whether
+/// a message is forwarded or not.
+fn skip_forward_header<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
+ if lines.len() >= 3
+ && lines[0] == "---------- Forwarded message ----------"
+ && lines[1].starts_with("From: ")
+ && lines[2].is_empty()
+ {
+ (&lines[3..], true)
+ } else {
+ (lines, false)
}
+}
- /**
- * Simplify Plain Text
- */
- #[allow(non_snake_case, clippy::mut_range_bound, clippy::needless_range_loop)]
- fn simplify_plain_text(&mut self, buf_terminated: &str, is_msgrmsg: bool) -> String {
- /* This function ...
- ... removes all text after the line `-- ` (footer mark)
- ... removes full quotes at the beginning and at the end of the text -
- these are all lines starting with the character `>`
- ... remove a non-empty line before the removed quote (contains sth. like "On 2.9.2016, Bjoern wrote:" in different formats and lanugages) */
- /* split the given buffer into lines */
- let lines: Vec<_> = buf_terminated.split('\n').collect();
- let mut l_first: usize = 0;
- let mut is_cut_at_begin = false;
- let (mut l_last, mut is_cut_at_end) = find_message_footer(&lines);
-
- if l_last > l_first + 2 {
- let line0 = lines[l_first];
- let line1 = lines[l_first + 1];
- let line2 = lines[l_first + 2];
- if line0 == "---------- Forwarded message ----------"
- && line1.starts_with("From: ")
- && line2.is_empty()
- {
- self.is_forwarded = true;
- l_first += 3
+fn remove_bottom_quote<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
+ let mut last_quoted_line = None;
+ for (l, line) in lines.iter().enumerate().rev() {
+ if is_plain_quote(line) {
+ last_quoted_line = Some(l)
+ } else if !is_empty_line(line) {
+ break;
+ }
+ }
+ if let Some(mut l_last) = last_quoted_line {
+ if l_last > 1 && is_empty_line(lines[l_last - 1]) {
+ l_last -= 1
+ }
+ if l_last > 1 {
+ let line = lines[l_last - 1];
+ if is_quoted_headline(line) {
+ l_last -= 1
}
}
- for l in l_first..l_last {
- let line = lines[l];
- if line == "-----"
- || line == "_____"
- || line == "====="
- || line == "*****"
- || line == "~~~~~"
- {
- l_last = l;
- is_cut_at_end = true;
- /* done */
+ (&lines[..l_last], true)
+ } else {
+ (lines, false)
+ }
+}
+
+fn remove_top_quote<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
+ let mut last_quoted_line = None;
+ let mut has_quoted_headline = false;
+ for (l, line) in lines.iter().enumerate() {
+ if is_plain_quote(line) {
+ last_quoted_line = Some(l)
+ } else if !is_empty_line(line) {
+ if is_quoted_headline(line) && !has_quoted_headline && last_quoted_line.is_none() {
+ has_quoted_headline = true
+ } else {
+ /* non-quoting line found */
break;
}
}
- if !is_msgrmsg {
- let mut l_lastQuotedLine = None;
- for l in (l_first..l_last).rev() {
- let line = lines[l];
- if is_plain_quote(line) {
- l_lastQuotedLine = Some(l)
- } else if !is_empty_line(line) {
- break;
- }
- }
- if let Some(last_quoted_line) = l_lastQuotedLine {
- l_last = last_quoted_line;
- is_cut_at_end = true;
- if l_last > 1 && is_empty_line(lines[l_last - 1]) {
- l_last -= 1
- }
- if l_last > 1 {
- let line = lines[l_last - 1];
- if is_quoted_headline(line) {
- l_last -= 1
- }
- }
- }
- }
- if !is_msgrmsg {
- let mut l_lastQuotedLine_0 = None;
- let mut hasQuotedHeadline = 0;
- for l in l_first..l_last {
- let line = lines[l];
- if is_plain_quote(line) {
- l_lastQuotedLine_0 = Some(l)
- } else if !is_empty_line(line) {
- if is_quoted_headline(line)
- && 0 == hasQuotedHeadline
- && l_lastQuotedLine_0.is_none()
- {
- hasQuotedHeadline = 1i32
- } else {
- /* non-quoting line found */
- break;
- }
- }
- }
- if let Some(last_quoted_line) = l_lastQuotedLine_0 {
- l_first = last_quoted_line + 1;
- is_cut_at_begin = true
- }
- }
- /* re-create buffer from the remaining lines */
- let mut ret = String::new();
- if is_cut_at_begin {
- ret += "[...]";
- }
- /* we write empty lines only in case and non-empty line follows */
- let mut pending_linebreaks = 0;
- let mut content_lines_added = 0;
- for l in l_first..l_last {
- let line = lines[l];
- if is_empty_line(line) {
- pending_linebreaks += 1
- } else {
- if 0 != content_lines_added {
- if pending_linebreaks > 2i32 {
- pending_linebreaks = 2i32
- }
- while 0 != pending_linebreaks {
- ret += "\n";
- pending_linebreaks -= 1
- }
- }
- // the incoming message might contain invalid UTF8
- ret += line;
- content_lines_added += 1;
- pending_linebreaks = 1i32
- }
- }
- if is_cut_at_end && (!is_cut_at_begin || 0 != content_lines_added) {
- ret += " [...]";
- }
-
- ret
}
+ if let Some(last_quoted_line) = last_quoted_line {
+ (&lines[last_quoted_line + 1..], true)
+ } else {
+ (lines, false)
+ }
+}
+
+fn render_message(lines: &[&str], is_cut_at_begin: bool, is_cut_at_end: bool) -> String {
+ let mut ret = String::new();
+ if is_cut_at_begin {
+ ret += "[...]";
+ }
+ /* we write empty lines only in case and non-empty line follows */
+ let mut pending_linebreaks = 0;
+ let mut empty_body = true;
+ for line in lines {
+ if is_empty_line(line) {
+ pending_linebreaks += 1
+ } else {
+ if !empty_body {
+ if pending_linebreaks > 2 {
+ pending_linebreaks = 2
+ }
+ while 0 != pending_linebreaks {
+ ret += "\n";
+ pending_linebreaks -= 1
+ }
+ }
+ // the incoming message might contain invalid UTF8
+ ret += line;
+ empty_body = false;
+ pending_linebreaks = 1
+ }
+ }
+ if is_cut_at_end && (!is_cut_at_begin || !empty_body) {
+ ret += " [...]";
+ }
+ ret
}
/**
@@ -213,50 +207,59 @@ mod tests {
#[test]
// proptest does not support [[:graphical:][:space:]] regex.
fn test_simplify_plain_text_fuzzy(input in "[!-~\t \n]+") {
- let output = Simplify::new().simplify_plain_text(&input, true);
+ let (output, _is_forwarded) = simplify(&input, false, true);
assert!(output.split('\n').all(|s| s != "-- "));
}
}
#[test]
fn test_simplify_trim() {
- let mut simplify = Simplify::new();
let html = "\r\r\nline1
\r\n\r\n\r\rline2\n\r";
- let plain = simplify.simplify(html, true, false);
+ let (plain, is_forwarded) = simplify(html, true, false);
assert_eq!(plain, "line1\nline2");
+ assert!(!is_forwarded);
}
#[test]
fn test_simplify_parse_href() {
- let mut simplify = Simplify::new();
let html = "text]>text bold]]>";
- let plain = simplify.simplify(html, true, false);
+ let (plain, is_forwarded) = simplify(html, true, false);
assert_eq!(plain, "text *bold*<>");
+ assert!(!is_forwarded);
+ }
+
+ #[test]
+ fn test_simplify_forwarded_message() {
+ let text = "---------- Forwarded message ----------\r\nFrom: test@example.com\r\n\r\nForwarded message\r\n-- \r\nSignature goes here";
+ let (plain, is_forwarded) = simplify(text, false, false);
+
+ assert_eq!(plain, "Forwarded message");
+ assert!(is_forwarded);
}
#[test]
fn test_simplify_html_encoded() {
- let mut simplify = Simplify::new();
let html =
"<>"'& äÄöÖüÜß fooÆçÇ ♦&noent;";
- let plain = simplify.simplify(html, true, false);
+ let (plain, is_forwarded) = simplify(html, true, false);
assert_eq!(
plain,
"<>\"\'& äÄöÖüÜß fooÆçÇ \u{2666}\u{200e}\u{200f}\u{200c}&noent;\u{200d}"
);
+ assert!(!is_forwarded);
}
#[test]
@@ -270,4 +273,19 @@ mod tests {
assert!(!is_plain_quote("Life is pain"));
assert!(!is_plain_quote(""));
}
+
+ #[test]
+ fn test_remove_top_quote() {
+ let (lines, has_top_quote) = remove_top_quote(&["> first", "> second"]);
+ assert!(lines.is_empty());
+ assert!(has_top_quote);
+
+ let (lines, has_top_quote) = remove_top_quote(&["> first", "> second", "not a quote"]);
+ assert_eq!(lines, &["not a quote"]);
+ assert!(has_top_quote);
+
+ let (lines, has_top_quote) = remove_top_quote(&["not a quote", "> first", "> second"]);
+ assert_eq!(lines, &["not a quote", "> first", "> second"]);
+ assert!(!has_top_quote);
+ }
}
diff --git a/src/mimeparser.rs b/src/mimeparser.rs
index eab89a81c..e86af1fe2 100644
--- a/src/mimeparser.rs
+++ b/src/mimeparser.rs
@@ -576,12 +576,11 @@ impl<'a> MimeParser<'a> {
}
};
- let mut simplifier = Simplify::new();
- let simplified_txt = if decoded_data.is_empty() {
- "".into()
+ let (simplified_txt, is_forwarded) = if decoded_data.is_empty() {
+ ("".into(), false)
} else {
let is_html = mime_type == mime::TEXT_HTML;
- simplifier.simplify(&decoded_data, is_html, self.has_chat_version())
+ simplify(&decoded_data, is_html, self.has_chat_version())
};
if !simplified_txt.is_empty() {
@@ -593,7 +592,7 @@ impl<'a> MimeParser<'a> {
self.do_add_single_part(part);
}
- if simplifier.is_forwarded {
+ if is_forwarded {
self.is_forwarded = true;
}
}