mirror of
https://github.com/chatmail/core.git
synced 2026-04-06 07:32:12 +03:00
513 lines
17 KiB
Rust
513 lines
17 KiB
Rust
//! # Simplify incoming plaintext.
|
||
|
||
/// Protects lines starting with `--` against being treated as a footer.
|
||
/// for that, we insert a ZERO WIDTH SPACE (ZWSP, 0x200B);
|
||
/// this should be invisible on most systems and there is no need to unescape it again
|
||
/// (which won't be done by non-deltas anyway).
|
||
///
|
||
/// This escapes a bit more than actually needed by delta (e.g. also lines as "-- footer"),
|
||
/// but for non-delta-compatibility, that seems to be better.
|
||
/// (to be only compatible with delta, only "[\r\n|\n]-- {0,2}[\r\n|\n]" needs to be replaced)
|
||
pub fn escape_message_footer_marks(text: &str) -> String {
|
||
if let Some(text) = text.strip_prefix("--") {
|
||
"-\u{200B}-".to_string() + &text.replace("\n--", "\n-\u{200B}-")
|
||
} else {
|
||
text.replace("\n--", "\n-\u{200B}-")
|
||
}
|
||
}
|
||
|
||
/// Remove standard (RFC 3676, §4.3) footer if it is found.
|
||
/// Returns `(lines, footer_lines)` tuple;
|
||
/// `footer_lines` is set to `Some` if the footer was actually removed from `lines`
|
||
/// (which is equal to the input array otherwise).
|
||
#[allow(clippy::indexing_slicing)]
|
||
fn remove_message_footer<'a>(lines: &'a [&str]) -> (&'a [&'a str], Option<&'a [&'a str]>) {
|
||
let mut nearly_standard_footer = None;
|
||
for (ix, &line) in lines.iter().enumerate() {
|
||
match line {
|
||
// some providers encode `-- ` to `-- =20` which results in `-- `
|
||
"-- " | "-- " => return (&lines[..ix], lines.get(ix + 1..)),
|
||
// some providers encode `-- ` to `=2D-` which results in only `--`;
|
||
// use that only when no other footer is found
|
||
// and if the line before is empty and the line after is not empty
|
||
"--" => {
|
||
if (ix == 0 || lines[ix - 1].is_empty())
|
||
&& ix != lines.len() - 1
|
||
&& !lines[ix + 1].is_empty()
|
||
{
|
||
nearly_standard_footer = Some(ix);
|
||
}
|
||
}
|
||
_ => (),
|
||
}
|
||
}
|
||
if let Some(ix) = nearly_standard_footer {
|
||
return (&lines[..ix], lines.get(ix + 1..));
|
||
}
|
||
(lines, None)
|
||
}
|
||
|
||
/// Remove nonstandard footer and a boolean indicating whether such footer was removed.
|
||
/// Returns `(lines, is_footer_removed)` tuple;
|
||
/// `is_footer_removed` is set to `true` if the footer was actually removed from `lines`
|
||
/// (which is equal to the input array otherwise).
|
||
#[allow(clippy::indexing_slicing)]
|
||
fn remove_nonstandard_footer<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
|
||
for (ix, &line) in lines.iter().enumerate() {
|
||
if line == "--"
|
||
|| line.starts_with("---")
|
||
|| line.starts_with("_____")
|
||
|| line.starts_with("=====")
|
||
|| line.starts_with("*****")
|
||
|| line.starts_with("~~~~~")
|
||
{
|
||
return (&lines[..ix], true);
|
||
}
|
||
}
|
||
(lines, false)
|
||
}
|
||
|
||
pub(crate) fn split_lines(buf: &str) -> Vec<&str> {
|
||
buf.split('\n').collect()
|
||
}
|
||
|
||
/// Simplified text and some additional information gained from the input.
|
||
#[derive(Debug, Default)]
|
||
pub(crate) struct SimplifiedText {
|
||
/// The text itself.
|
||
pub text: String,
|
||
|
||
/// True if the message is forwarded.
|
||
pub is_forwarded: bool,
|
||
|
||
/// True if nonstandard footer was removed.
|
||
pub is_cut: bool,
|
||
|
||
/// Top quote, if any.
|
||
pub top_quote: Option<String>,
|
||
|
||
/// Footer, if any.
|
||
pub footer: Option<String>,
|
||
}
|
||
|
||
/// Simplify message text for chat display.
|
||
/// Remove quotes, signatures, trailing empty lines etc.
|
||
pub(crate) fn simplify(mut input: String, is_chat_message: bool) -> SimplifiedText {
|
||
let mut is_cut = false;
|
||
|
||
input.retain(|c| c != '\r');
|
||
let lines = split_lines(&input);
|
||
let (lines, is_forwarded) = skip_forward_header(&lines);
|
||
|
||
let (lines, mut top_quote) = remove_top_quote(lines);
|
||
let original_lines = &lines;
|
||
let (lines, footer_lines) = remove_message_footer(lines);
|
||
let footer = footer_lines.map(|footer_lines| render_message(footer_lines, false));
|
||
is_cut = is_cut || footer.is_some();
|
||
|
||
let text = if is_chat_message {
|
||
render_message(lines, false)
|
||
} else {
|
||
let (lines, has_nonstandard_footer) = remove_nonstandard_footer(lines);
|
||
let (lines, mut bottom_quote) = remove_bottom_quote(lines);
|
||
|
||
if top_quote.is_none() && bottom_quote.is_some() {
|
||
std::mem::swap(&mut top_quote, &mut bottom_quote);
|
||
}
|
||
|
||
if lines.iter().all(|it| it.trim().is_empty()) {
|
||
render_message(original_lines, false)
|
||
} else {
|
||
is_cut = is_cut || has_nonstandard_footer || bottom_quote.is_some();
|
||
render_message(lines, has_nonstandard_footer || bottom_quote.is_some())
|
||
}
|
||
};
|
||
|
||
if !is_chat_message {
|
||
top_quote = top_quote.map(|quote| {
|
||
let quote_lines = split_lines("e);
|
||
let (quote_lines, quote_footer_lines) = remove_message_footer("e_lines);
|
||
is_cut = is_cut || quote_footer_lines.is_some();
|
||
|
||
render_message(quote_lines, false)
|
||
});
|
||
}
|
||
|
||
SimplifiedText {
|
||
text,
|
||
is_forwarded,
|
||
is_cut,
|
||
top_quote,
|
||
footer,
|
||
}
|
||
}
|
||
|
||
/// Skips "forwarded message" header.
|
||
/// Returns message body lines and a boolean indicating whether
|
||
/// a message is forwarded or not.
|
||
fn skip_forward_header<'a>(lines: &'a [&str]) -> (&'a [&'a str], bool) {
|
||
match lines {
|
||
["---------- Forwarded message ----------", first_line, "", rest @ ..]
|
||
if first_line.starts_with("From: ") =>
|
||
{
|
||
(rest, true)
|
||
}
|
||
_ => (lines, false),
|
||
}
|
||
}
|
||
|
||
#[allow(clippy::indexing_slicing)]
|
||
fn remove_bottom_quote<'a>(lines: &'a [&str]) -> (&'a [&'a str], Option<String>) {
|
||
let mut first_quoted_line = lines.len();
|
||
let mut last_quoted_line = None;
|
||
for (l, line) in lines.iter().enumerate().rev() {
|
||
if is_plain_quote(line) {
|
||
if last_quoted_line.is_none() {
|
||
first_quoted_line = l + 1;
|
||
}
|
||
last_quoted_line = Some(l)
|
||
} else if !is_empty_line(line) {
|
||
break;
|
||
}
|
||
}
|
||
if let Some(mut l_last) = last_quoted_line {
|
||
let quoted_text = lines[l_last..first_quoted_line]
|
||
.iter()
|
||
.map(|s| {
|
||
s.strip_prefix('>')
|
||
.map_or(*s, |u| u.strip_prefix(' ').unwrap_or(u))
|
||
})
|
||
.collect::<Vec<&str>>()
|
||
.join("\n");
|
||
if l_last > 1 && is_empty_line(lines[l_last - 1]) {
|
||
l_last -= 1
|
||
}
|
||
if l_last > 1 {
|
||
let line = lines[l_last - 1];
|
||
if is_quoted_headline(line) {
|
||
l_last -= 1
|
||
}
|
||
}
|
||
(&lines[..l_last], Some(quoted_text))
|
||
} else {
|
||
(lines, None)
|
||
}
|
||
}
|
||
|
||
#[allow(clippy::indexing_slicing)]
|
||
fn remove_top_quote<'a>(lines: &'a [&str]) -> (&'a [&'a str], Option<String>) {
|
||
let mut first_quoted_line = 0;
|
||
let mut last_quoted_line = None;
|
||
let mut has_quoted_headline = false;
|
||
for (l, line) in lines.iter().enumerate() {
|
||
if is_plain_quote(line) {
|
||
if last_quoted_line.is_none() {
|
||
first_quoted_line = l;
|
||
}
|
||
last_quoted_line = Some(l)
|
||
} else if is_quoted_headline(line) && !has_quoted_headline && last_quoted_line.is_none() {
|
||
has_quoted_headline = true
|
||
} else {
|
||
/* non-quoting line found */
|
||
break;
|
||
}
|
||
}
|
||
if let Some(last_quoted_line) = last_quoted_line {
|
||
(
|
||
&lines[last_quoted_line + 1..],
|
||
Some(
|
||
lines[first_quoted_line..last_quoted_line + 1]
|
||
.iter()
|
||
.map(|s| {
|
||
s.strip_prefix('>')
|
||
.map_or(*s, |u| u.strip_prefix(' ').unwrap_or(u))
|
||
})
|
||
.collect::<Vec<&str>>()
|
||
.join("\n"),
|
||
),
|
||
)
|
||
} else {
|
||
(lines, None)
|
||
}
|
||
}
|
||
|
||
fn render_message(lines: &[&str], is_cut_at_end: bool) -> String {
|
||
let mut ret = String::new();
|
||
/* we write empty lines only in case and non-empty line follows */
|
||
let mut pending_linebreaks = 0;
|
||
let mut empty_body = true;
|
||
for line in lines {
|
||
if is_empty_line(line) {
|
||
pending_linebreaks += 1
|
||
} else {
|
||
if !empty_body {
|
||
if pending_linebreaks > 2 {
|
||
pending_linebreaks = 2
|
||
}
|
||
while 0 != pending_linebreaks {
|
||
ret += "\n";
|
||
pending_linebreaks -= 1
|
||
}
|
||
}
|
||
// the incoming message might contain invalid UTF8
|
||
ret += line;
|
||
empty_body = false;
|
||
pending_linebreaks = 1
|
||
}
|
||
}
|
||
if is_cut_at_end && !empty_body {
|
||
ret += " [...]";
|
||
}
|
||
// redo escaping done by escape_message_footer_marks()
|
||
ret.replace('\u{200B}', "")
|
||
}
|
||
|
||
/// Returns true if the line contains only whitespace.
|
||
fn is_empty_line(buf: &str) -> bool {
|
||
buf.chars().all(char::is_whitespace)
|
||
// for some time, this checked for `char <= ' '`,
|
||
// see discussion at: <https://github.com/deltachat/deltachat-core-rust/pull/402#discussion_r317062392>
|
||
// and <https://github.com/deltachat/deltachat-core-rust/pull/2104/files#r538973613>
|
||
}
|
||
|
||
fn is_quoted_headline(buf: &str) -> bool {
|
||
/* This function may be called for the line _directly_ before a quote.
|
||
The function checks if the line contains sth. like "On 01.02.2016, xy@z wrote:" in various languages.
|
||
- Currently, we simply check if the last character is a ':'.
|
||
- Checking for the existence of an email address may fail (headlines may show the user's name instead of the address) */
|
||
|
||
buf.len() <= 80 && buf.ends_with(':')
|
||
}
|
||
|
||
fn is_plain_quote(buf: &str) -> bool {
|
||
buf.starts_with('>')
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use proptest::prelude::*;
|
||
|
||
use super::*;
|
||
|
||
proptest! {
|
||
#[test]
|
||
// proptest does not support [[:graphical:][:space:]] regex.
|
||
fn test_simplify_plain_text_fuzzy(input in "[!-~\t \n]+") {
|
||
let SimplifiedText {
|
||
text,
|
||
..
|
||
} = simplify(input, true);
|
||
assert!(text.split('\n').all(|s| s != "-- "));
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn test_dont_remove_whole_message() {
|
||
let input = "\n------\nFailed\n------\n\nUh-oh, this workflow did not succeed!\n\nlots of other text".to_string();
|
||
let SimplifiedText {
|
||
text,
|
||
is_forwarded,
|
||
is_cut,
|
||
..
|
||
} = simplify(input, false);
|
||
assert_eq!(
|
||
text,
|
||
"------\nFailed\n------\n\nUh-oh, this workflow did not succeed!\n\nlots of other text"
|
||
);
|
||
assert!(!is_forwarded);
|
||
assert!(!is_cut);
|
||
}
|
||
|
||
#[test]
|
||
fn test_chat_message() {
|
||
let input = "Hi! How are you?\n\n---\n\nI am good.\n-- \nSent with my Delta Chat Messenger: https://delta.chat".to_string();
|
||
let SimplifiedText {
|
||
text,
|
||
is_forwarded,
|
||
is_cut,
|
||
footer,
|
||
..
|
||
} = simplify(input, true);
|
||
assert_eq!(text, "Hi! How are you?\n\n---\n\nI am good.");
|
||
assert!(!is_forwarded);
|
||
assert!(is_cut);
|
||
assert_eq!(
|
||
footer.unwrap(),
|
||
"Sent with my Delta Chat Messenger: https://delta.chat"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn test_simplify_trim() {
|
||
let input = "line1\n\r\r\rline2".to_string();
|
||
let SimplifiedText {
|
||
text,
|
||
is_forwarded,
|
||
is_cut,
|
||
..
|
||
} = simplify(input, false);
|
||
|
||
assert_eq!(text, "line1\nline2");
|
||
assert!(!is_forwarded);
|
||
assert!(!is_cut);
|
||
}
|
||
|
||
#[test]
|
||
fn test_simplify_forwarded_message() {
|
||
let input = "---------- Forwarded message ----------\r\nFrom: test@example.com\r\n\r\nForwarded message\r\n-- \r\nSignature goes here".to_string();
|
||
let SimplifiedText {
|
||
text,
|
||
is_forwarded,
|
||
is_cut,
|
||
footer,
|
||
..
|
||
} = simplify(input, false);
|
||
|
||
assert_eq!(text, "Forwarded message");
|
||
assert!(is_forwarded);
|
||
assert!(is_cut);
|
||
assert_eq!(footer.unwrap(), "Signature goes here");
|
||
}
|
||
|
||
#[test]
|
||
fn test_simplify_utilities() {
|
||
assert!(is_empty_line(" \t"));
|
||
assert!(is_empty_line(""));
|
||
assert!(is_empty_line(" \r"));
|
||
assert!(!is_empty_line(" x"));
|
||
assert!(is_plain_quote("> hello world"));
|
||
assert!(is_plain_quote(">>"));
|
||
assert!(!is_plain_quote("Life is pain"));
|
||
assert!(!is_plain_quote(""));
|
||
}
|
||
|
||
#[test]
|
||
fn test_remove_top_quote() {
|
||
let (lines, top_quote) = remove_top_quote(&["> first", "> second"]);
|
||
assert!(lines.is_empty());
|
||
assert_eq!(top_quote.unwrap(), "first\nsecond");
|
||
|
||
let (lines, top_quote) = remove_top_quote(&["> first", "> second", "not a quote"]);
|
||
assert_eq!(lines, &["not a quote"]);
|
||
assert_eq!(top_quote.unwrap(), "first\nsecond");
|
||
|
||
let (lines, top_quote) = remove_top_quote(&["not a quote", "> first", "> second"]);
|
||
assert_eq!(lines, &["not a quote", "> first", "> second"]);
|
||
assert!(top_quote.is_none());
|
||
}
|
||
|
||
#[test]
|
||
fn test_escape_message_footer_marks() {
|
||
let esc = escape_message_footer_marks("--\n--text --in line");
|
||
assert_eq!(esc, "-\u{200B}-\n-\u{200B}-text --in line");
|
||
|
||
let esc = escape_message_footer_marks("--\r\n--text");
|
||
assert_eq!(esc, "-\u{200B}-\r\n-\u{200B}-text");
|
||
}
|
||
|
||
#[test]
|
||
fn test_remove_message_footer() {
|
||
let input = "text\n--\nno footer".to_string();
|
||
let SimplifiedText {
|
||
text,
|
||
is_cut,
|
||
footer,
|
||
..
|
||
} = simplify(input, true);
|
||
assert_eq!(text, "text\n--\nno footer");
|
||
assert_eq!(footer, None);
|
||
assert!(!is_cut);
|
||
|
||
let input = "text\n\n--\n\nno footer".to_string();
|
||
let SimplifiedText {
|
||
text,
|
||
is_cut,
|
||
footer,
|
||
..
|
||
} = simplify(input, true);
|
||
assert_eq!(text, "text\n\n--\n\nno footer");
|
||
assert_eq!(footer, None);
|
||
assert!(!is_cut);
|
||
|
||
let input = "text\n\n-- no footer\n\n".to_string();
|
||
let SimplifiedText { text, footer, .. } = simplify(input, true);
|
||
assert_eq!(text, "text\n\n-- no footer");
|
||
assert_eq!(footer, None);
|
||
|
||
let input = "text\n\n--\nno footer\n-- \nfooter".to_string();
|
||
let SimplifiedText {
|
||
text,
|
||
is_cut,
|
||
footer,
|
||
..
|
||
} = simplify(input, true);
|
||
assert_eq!(text, "text\n\n--\nno footer");
|
||
assert!(is_cut);
|
||
assert_eq!(footer.unwrap(), "footer");
|
||
|
||
let input = "text\n\n--\ntreated as footer when unescaped".to_string();
|
||
let SimplifiedText {
|
||
text,
|
||
is_cut,
|
||
footer,
|
||
..
|
||
} = simplify(input.clone(), true);
|
||
assert_eq!(text, "text"); // see remove_message_footer() for some explanations
|
||
assert!(is_cut);
|
||
assert_eq!(footer.unwrap(), "treated as footer when unescaped");
|
||
let escaped = escape_message_footer_marks(&input);
|
||
let SimplifiedText {
|
||
text,
|
||
is_cut,
|
||
footer,
|
||
..
|
||
} = simplify(escaped, true);
|
||
assert_eq!(text, "text\n\n--\ntreated as footer when unescaped");
|
||
assert!(!is_cut);
|
||
assert_eq!(footer, None);
|
||
|
||
// Nonstandard footer sent by <https://siju.es/>
|
||
let input = "Message text here\n---Desde mi teléfono con SIJÚ\n\nQuote here".to_string();
|
||
let SimplifiedText {
|
||
text,
|
||
is_cut,
|
||
footer,
|
||
..
|
||
} = simplify(input.clone(), false);
|
||
assert_eq!(text, "Message text here [...]");
|
||
assert!(is_cut);
|
||
assert_eq!(footer, None);
|
||
let SimplifiedText {
|
||
text,
|
||
is_cut,
|
||
footer,
|
||
..
|
||
} = simplify(input.clone(), true);
|
||
assert_eq!(text, input);
|
||
assert!(!is_cut);
|
||
assert_eq!(footer, None);
|
||
|
||
let input = "--\ntreated as footer when unescaped".to_string();
|
||
let SimplifiedText {
|
||
text,
|
||
is_cut,
|
||
footer,
|
||
..
|
||
} = simplify(input.clone(), true);
|
||
assert_eq!(text, ""); // see remove_message_footer() for some explanations
|
||
assert!(is_cut);
|
||
assert_eq!(footer.unwrap(), "treated as footer when unescaped");
|
||
|
||
let escaped = escape_message_footer_marks(&input);
|
||
let SimplifiedText {
|
||
text,
|
||
is_cut,
|
||
footer,
|
||
..
|
||
} = simplify(escaped, true);
|
||
assert_eq!(text, "--\ntreated as footer when unescaped");
|
||
assert!(!is_cut);
|
||
assert_eq!(footer, None);
|
||
}
|
||
}
|