Files
chatmail-core/src/simplify.rs
2019-12-20 12:55:57 +01:00

252 lines
7.6 KiB
Rust
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/// Remove standard (RFC 3676, §4.3) footer if it is found.
fn remove_message_footer<'a>(lines: &'a [&str]) -> &'a [&'a str] {
for (ix, &line) in lines.iter().enumerate() {
// quoted-printable may encode `-- ` to `-- =20` which is converted
// back to `-- `
match line {
"-- " | "-- " => return &lines[..ix],
_ => (),
}
}
lines
}
/// Remove nonstandard footer and a boolean indicating whether such
/// footer was removed.
fn remove_nonstandard_footer<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
for (ix, &line) in lines.iter().enumerate() {
if line == "--"
|| line == "---"
|| line == "----"
|| line.starts_with("-----")
|| line.starts_with("_____")
|| line.starts_with("=====")
|| line.starts_with("*****")
|| line.starts_with("~~~~~")
{
return (&lines[..ix], true);
}
}
(lines, false)
}
fn split_lines(buf: &str) -> Vec<&str> {
buf.split('\n').collect()
}
/// Simplify message text for chat display.
/// Remove quotes, signatures, trailing empty lines etc.
pub fn simplify(mut input: String, is_chat_message: bool) -> (String, bool) {
input.retain(|c| c != '\r');
let lines = split_lines(&input);
let (lines, is_forwarded) = skip_forward_header(&lines);
let lines = remove_message_footer(lines);
let (lines, has_nonstandard_footer) = remove_nonstandard_footer(lines);
let (lines, has_bottom_quote) = if !is_chat_message {
remove_bottom_quote(lines)
} else {
(lines, false)
};
let (lines, has_top_quote) = if !is_chat_message {
remove_top_quote(lines)
} else {
(lines, false)
};
// re-create buffer from the remaining lines
let text = render_message(
lines,
has_top_quote,
has_nonstandard_footer || has_bottom_quote,
);
(text, is_forwarded)
}
/// Skips "forwarded message" header.
/// Returns message body lines and a boolean indicating whether
/// a message is forwarded or not.
fn skip_forward_header<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
if lines.len() >= 3
&& lines[0] == "---------- Forwarded message ----------"
&& lines[1].starts_with("From: ")
&& lines[2].is_empty()
{
(&lines[3..], true)
} else {
(lines, false)
}
}
fn remove_bottom_quote<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
let mut last_quoted_line = None;
for (l, line) in lines.iter().enumerate().rev() {
if is_plain_quote(line) {
last_quoted_line = Some(l)
} else if !is_empty_line(line) {
break;
}
}
if let Some(mut l_last) = last_quoted_line {
if l_last > 1 && is_empty_line(lines[l_last - 1]) {
l_last -= 1
}
if l_last > 1 {
let line = lines[l_last - 1];
if is_quoted_headline(line) {
l_last -= 1
}
}
(&lines[..l_last], true)
} else {
(lines, false)
}
}
fn remove_top_quote<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
let mut last_quoted_line = None;
let mut has_quoted_headline = false;
for (l, line) in lines.iter().enumerate() {
if is_plain_quote(line) {
last_quoted_line = Some(l)
} else if !is_empty_line(line) {
if is_quoted_headline(line) && !has_quoted_headline && last_quoted_line.is_none() {
has_quoted_headline = true
} else {
/* non-quoting line found */
break;
}
}
}
if let Some(last_quoted_line) = last_quoted_line {
(&lines[last_quoted_line + 1..], true)
} else {
(lines, false)
}
}
fn render_message(lines: &[&str], is_cut_at_begin: bool, is_cut_at_end: bool) -> String {
let mut ret = String::new();
if is_cut_at_begin {
ret += "[...]";
}
/* we write empty lines only in case and non-empty line follows */
let mut pending_linebreaks = 0;
let mut empty_body = true;
for line in lines {
if is_empty_line(line) {
pending_linebreaks += 1
} else {
if !empty_body {
if pending_linebreaks > 2 {
pending_linebreaks = 2
}
while 0 != pending_linebreaks {
ret += "\n";
pending_linebreaks -= 1
}
}
// the incoming message might contain invalid UTF8
ret += line;
empty_body = false;
pending_linebreaks = 1
}
}
if is_cut_at_end && (!is_cut_at_begin || !empty_body) {
ret += " [...]";
}
ret
}
/**
* Tools
*/
fn is_empty_line(buf: &str) -> bool {
// XXX: can it be simplified to buf.chars().all(|c| c.is_whitespace())?
//
// Strictly speaking, it is not equivalent (^A is not whitespace, but less than ' '),
// but having control sequences in email body?!
//
// See discussion at: https://github.com/deltachat/deltachat-core-rust/pull/402#discussion_r317062392
for c in buf.chars() {
if c > ' ' {
return false;
}
}
true
}
fn is_quoted_headline(buf: &str) -> bool {
/* This function may be called for the line _directly_ before a quote.
The function checks if the line contains sth. like "On 01.02.2016, xy@z wrote:" in various languages.
- Currently, we simply check if the last character is a ':'.
- Checking for the existence of an email address may fail (headlines may show the user's name instead of the address) */
buf.len() <= 80 && buf.ends_with(':')
}
fn is_plain_quote(buf: &str) -> bool {
buf.starts_with('>')
}
#[cfg(test)]
mod tests {
use super::*;
use proptest::prelude::*;
proptest! {
#[test]
// proptest does not support [[:graphical:][:space:]] regex.
fn test_simplify_plain_text_fuzzy(input in "[!-~\t \n]+") {
let (output, _is_forwarded) = simplify(input, true);
assert!(output.split('\n').all(|s| s != "-- "));
}
}
#[test]
fn test_simplify_trim() {
let input = "line1\n\r\r\rline2".to_string();
let (plain, is_forwarded) = simplify(input, false);
assert_eq!(plain, "line1\nline2");
assert!(!is_forwarded);
}
#[test]
fn test_simplify_forwarded_message() {
let input = "---------- Forwarded message ----------\r\nFrom: test@example.com\r\n\r\nForwarded message\r\n-- \r\nSignature goes here".to_string();
let (plain, is_forwarded) = simplify(input, false);
assert_eq!(plain, "Forwarded message");
assert!(is_forwarded);
}
#[test]
fn test_simplify_utilities() {
assert!(is_empty_line(" \t"));
assert!(is_empty_line(""));
assert!(is_empty_line(" \r"));
assert!(!is_empty_line(" x"));
assert!(is_plain_quote("> hello world"));
assert!(is_plain_quote(">>"));
assert!(!is_plain_quote("Life is pain"));
assert!(!is_plain_quote(""));
}
#[test]
fn test_remove_top_quote() {
let (lines, has_top_quote) = remove_top_quote(&["> first", "> second"]);
assert!(lines.is_empty());
assert!(has_top_quote);
let (lines, has_top_quote) = remove_top_quote(&["> first", "> second", "not a quote"]);
assert_eq!(lines, &["not a quote"]);
assert!(has_top_quote);
let (lines, has_top_quote) = remove_top_quote(&["not a quote", "> first", "> second"]);
assert_eq!(lines, &["not a quote", "> first", "> second"]);
assert!(!has_top_quote);
}
}