mirror of
https://github.com/chatmail/core.git
synced 2026-05-13 20:06:30 +03:00
fix: do not run simplify() on dehtml() output
simplify() is written to process incoming plaintext messages and extract footers and quotes from them. Incoming messages contain various quote styles and simplify() implements heuristics to detects them. If dehtml() output is processed by simplify(), simplify() heuristics may erroneously detect footers and quotes in produced plaintext. dehtml() should directly detect quotes instead of converting them to plaintext quotes for parsing with simplify().
This commit is contained in:
193
src/dehtml.rs
193
src/dehtml.rs
@@ -10,10 +10,11 @@ use quick_xml::{
|
|||||||
Reader,
|
Reader,
|
||||||
};
|
};
|
||||||
|
|
||||||
static LINE_RE: Lazy<regex::Regex> = Lazy::new(|| regex::Regex::new(r"(\r?\n)+").unwrap());
|
use crate::simplify::{simplify_quote, SimplifiedText};
|
||||||
|
|
||||||
struct Dehtml {
|
struct Dehtml {
|
||||||
strbuilder: String,
|
strbuilder: String,
|
||||||
|
quote: String,
|
||||||
add_text: AddText,
|
add_text: AddText,
|
||||||
last_href: Option<String>,
|
last_href: Option<String>,
|
||||||
/// GMX wraps a quote in `<div name="quote">`. After a `<div name="quote">`, this count is
|
/// GMX wraps a quote in `<div name="quote">`. After a `<div name="quote">`, this count is
|
||||||
@@ -29,17 +30,22 @@ struct Dehtml {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Dehtml {
|
impl Dehtml {
|
||||||
fn line_prefix(&self) -> &str {
|
/// Returns true if HTML parser is currently inside the quote.
|
||||||
if self.divs_since_quoted_content_div > 0 || self.blockquotes_since_blockquote > 0 {
|
fn is_quote(&self) -> bool {
|
||||||
"> "
|
self.divs_since_quoted_content_div > 0 || self.blockquotes_since_blockquote > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the buffer where the text should be written.
|
||||||
|
///
|
||||||
|
/// If the parser is inside the quote, returns the quote buffer.
|
||||||
|
fn get_buf(&mut self) -> &mut String {
|
||||||
|
if self.is_quote() {
|
||||||
|
&mut self.quote
|
||||||
} else {
|
} else {
|
||||||
""
|
&mut self.strbuilder
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn append_prefix(&self, line_end: &str) -> String {
|
|
||||||
// line_end is e.g. "\n\n". We add "> " if necessary.
|
|
||||||
line_end.to_string() + self.line_prefix()
|
|
||||||
}
|
|
||||||
fn get_add_text(&self) -> AddText {
|
fn get_add_text(&self) -> AddText {
|
||||||
if self.divs_since_quote_div > 0 && self.divs_since_quoted_content_div == 0 {
|
if self.divs_since_quote_div > 0 && self.divs_since_quoted_content_div == 0 {
|
||||||
AddText::No // Everything between `<div name="quoted">` and `<div name="quoted_content">` is metadata which we don't want
|
AddText::No // Everything between `<div name="quoted">` and `<div name="quoted_content">` is metadata which we don't want
|
||||||
@@ -61,25 +67,60 @@ enum AddText {
|
|||||||
YesPreserveLineEnds,
|
YesPreserveLineEnds,
|
||||||
}
|
}
|
||||||
|
|
||||||
// dehtml() returns way too many newlines; however, an optimisation on this issue is not needed as
|
pub(crate) fn dehtml(buf: &str) -> Option<SimplifiedText> {
|
||||||
// the newlines are typically removed in further processing by the caller
|
let (s, quote) = dehtml_quick_xml(buf);
|
||||||
pub fn dehtml(buf: &str) -> Option<String> {
|
|
||||||
let s = dehtml_quick_xml(buf);
|
|
||||||
if !s.trim().is_empty() {
|
if !s.trim().is_empty() {
|
||||||
return Some(s);
|
let text = dehtml_cleanup(s);
|
||||||
|
let top_quote = if !quote.trim().is_empty() {
|
||||||
|
Some(dehtml_cleanup(simplify_quote("e).0))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
return Some(SimplifiedText {
|
||||||
|
text,
|
||||||
|
top_quote,
|
||||||
|
..Default::default()
|
||||||
|
});
|
||||||
}
|
}
|
||||||
let s = dehtml_manually(buf);
|
let s = dehtml_manually(buf);
|
||||||
if !s.trim().is_empty() {
|
if !s.trim().is_empty() {
|
||||||
return Some(s);
|
let text = dehtml_cleanup(s);
|
||||||
|
return Some(SimplifiedText {
|
||||||
|
text,
|
||||||
|
..Default::default()
|
||||||
|
});
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dehtml_quick_xml(buf: &str) -> String {
|
fn dehtml_cleanup(mut text: String) -> String {
|
||||||
|
text.retain(|c| c != '\r');
|
||||||
|
let lines = text.trim().split('\n');
|
||||||
|
let mut text = String::new();
|
||||||
|
let mut linebreak = false;
|
||||||
|
for line in lines {
|
||||||
|
if line.chars().all(char::is_whitespace) {
|
||||||
|
linebreak = true;
|
||||||
|
} else {
|
||||||
|
if !text.is_empty() {
|
||||||
|
text += "\n";
|
||||||
|
if linebreak {
|
||||||
|
text += "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
text += line.trim_end();
|
||||||
|
linebreak = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
text
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dehtml_quick_xml(buf: &str) -> (String, String) {
|
||||||
let buf = buf.trim().trim_start_matches("<!doctype html>");
|
let buf = buf.trim().trim_start_matches("<!doctype html>");
|
||||||
|
|
||||||
let mut dehtml = Dehtml {
|
let mut dehtml = Dehtml {
|
||||||
strbuilder: String::with_capacity(buf.len()),
|
strbuilder: String::with_capacity(buf.len()),
|
||||||
|
quote: String::new(),
|
||||||
add_text: AddText::YesRemoveLineEnds,
|
add_text: AddText::YesRemoveLineEnds,
|
||||||
last_href: None,
|
last_href: None,
|
||||||
divs_since_quote_div: 0,
|
divs_since_quote_div: 0,
|
||||||
@@ -131,22 +172,33 @@ fn dehtml_quick_xml(buf: &str) -> String {
|
|||||||
buf.clear();
|
buf.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
dehtml.strbuilder
|
(dehtml.strbuilder, dehtml.quote)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dehtml_text_cb(event: &BytesText, dehtml: &mut Dehtml) {
|
fn dehtml_text_cb(event: &BytesText, dehtml: &mut Dehtml) {
|
||||||
|
static LINE_RE: Lazy<regex::Regex> = Lazy::new(|| regex::Regex::new(r"(\r?\n)+").unwrap());
|
||||||
|
|
||||||
if dehtml.get_add_text() == AddText::YesPreserveLineEnds
|
if dehtml.get_add_text() == AddText::YesPreserveLineEnds
|
||||||
|| dehtml.get_add_text() == AddText::YesRemoveLineEnds
|
|| dehtml.get_add_text() == AddText::YesRemoveLineEnds
|
||||||
{
|
{
|
||||||
let last_added = escaper::decode_html_buf_sloppy(event as &[_]).unwrap_or_default();
|
let last_added = escaper::decode_html_buf_sloppy(event as &[_]).unwrap_or_default();
|
||||||
|
|
||||||
if dehtml.get_add_text() == AddText::YesRemoveLineEnds {
|
if dehtml.get_add_text() == AddText::YesRemoveLineEnds {
|
||||||
dehtml.strbuilder += LINE_RE.replace_all(&last_added, "\r").as_ref();
|
// Replace all line ends with spaces.
|
||||||
} else if !dehtml.line_prefix().is_empty() {
|
// E.g. `\r\n\r\n` is replaced with one space.
|
||||||
let l = dehtml.append_prefix("\n");
|
let last_added = LINE_RE.replace_all(&last_added, " ");
|
||||||
dehtml.strbuilder += LINE_RE.replace_all(&last_added, l.as_str()).as_ref();
|
|
||||||
|
// Add a space if `last_added` starts with a space
|
||||||
|
// and there is no whitespace at the end of the buffer yet.
|
||||||
|
// Trim the rest of leading whitespace from `last_added`.
|
||||||
|
let buf = dehtml.get_buf();
|
||||||
|
if !buf.ends_with(' ') && !buf.ends_with('\n') && last_added.starts_with(' ') {
|
||||||
|
*buf += " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
*buf += last_added.trim_start();
|
||||||
} else {
|
} else {
|
||||||
dehtml.strbuilder += &last_added;
|
*dehtml.get_buf() += LINE_RE.replace_all(&last_added, "\n").as_ref();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -158,35 +210,36 @@ fn dehtml_endtag_cb(event: &BytesEnd, dehtml: &mut Dehtml) {
|
|||||||
|
|
||||||
match tag.as_str() {
|
match tag.as_str() {
|
||||||
"style" | "script" | "title" | "pre" => {
|
"style" | "script" | "title" | "pre" => {
|
||||||
dehtml.strbuilder += &dehtml.append_prefix("\n\n");
|
*dehtml.get_buf() += "\n\n";
|
||||||
dehtml.add_text = AddText::YesRemoveLineEnds;
|
dehtml.add_text = AddText::YesRemoveLineEnds;
|
||||||
}
|
}
|
||||||
"div" => {
|
"div" => {
|
||||||
pop_tag(&mut dehtml.divs_since_quote_div);
|
pop_tag(&mut dehtml.divs_since_quote_div);
|
||||||
pop_tag(&mut dehtml.divs_since_quoted_content_div);
|
pop_tag(&mut dehtml.divs_since_quoted_content_div);
|
||||||
|
|
||||||
dehtml.strbuilder += &dehtml.append_prefix("\n\n");
|
*dehtml.get_buf() += "\n\n";
|
||||||
dehtml.add_text = AddText::YesRemoveLineEnds;
|
dehtml.add_text = AddText::YesRemoveLineEnds;
|
||||||
}
|
}
|
||||||
"a" => {
|
"a" => {
|
||||||
if let Some(ref last_href) = dehtml.last_href.take() {
|
if let Some(ref last_href) = dehtml.last_href.take() {
|
||||||
if dehtml.strbuilder.ends_with('[') {
|
let buf = dehtml.get_buf();
|
||||||
dehtml.strbuilder.truncate(dehtml.strbuilder.len() - 1);
|
if buf.ends_with('[') {
|
||||||
|
buf.truncate(buf.len() - 1);
|
||||||
} else {
|
} else {
|
||||||
dehtml.strbuilder += "](";
|
*buf += "](";
|
||||||
dehtml.strbuilder += last_href;
|
*buf += last_href;
|
||||||
dehtml.strbuilder += ")";
|
*buf += ")";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"b" | "strong" => {
|
"b" | "strong" => {
|
||||||
if dehtml.get_add_text() != AddText::No {
|
if dehtml.get_add_text() != AddText::No {
|
||||||
dehtml.strbuilder += "*";
|
*dehtml.get_buf() += "*";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"i" | "em" => {
|
"i" | "em" => {
|
||||||
if dehtml.get_add_text() != AddText::No {
|
if dehtml.get_add_text() != AddText::No {
|
||||||
dehtml.strbuilder += "_";
|
*dehtml.get_buf() += "_";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"blockquote" => pop_tag(&mut dehtml.blockquotes_since_blockquote),
|
"blockquote" => pop_tag(&mut dehtml.blockquotes_since_blockquote),
|
||||||
@@ -206,7 +259,7 @@ fn dehtml_starttag_cb<B: std::io::BufRead>(
|
|||||||
match tag.as_str() {
|
match tag.as_str() {
|
||||||
"p" | "table" | "td" => {
|
"p" | "table" | "td" => {
|
||||||
if !dehtml.strbuilder.is_empty() {
|
if !dehtml.strbuilder.is_empty() {
|
||||||
dehtml.strbuilder += &dehtml.append_prefix("\n\n");
|
*dehtml.get_buf() += "\n\n";
|
||||||
}
|
}
|
||||||
dehtml.add_text = AddText::YesRemoveLineEnds;
|
dehtml.add_text = AddText::YesRemoveLineEnds;
|
||||||
}
|
}
|
||||||
@@ -215,18 +268,18 @@ fn dehtml_starttag_cb<B: std::io::BufRead>(
|
|||||||
maybe_push_tag(event, reader, "quote", &mut dehtml.divs_since_quote_div);
|
maybe_push_tag(event, reader, "quote", &mut dehtml.divs_since_quote_div);
|
||||||
maybe_push_tag(event, reader, "quoted-content", &mut dehtml.divs_since_quoted_content_div);
|
maybe_push_tag(event, reader, "quoted-content", &mut dehtml.divs_since_quoted_content_div);
|
||||||
|
|
||||||
dehtml.strbuilder += &dehtml.append_prefix("\n\n");
|
*dehtml.get_buf() += "\n\n";
|
||||||
dehtml.add_text = AddText::YesRemoveLineEnds;
|
dehtml.add_text = AddText::YesRemoveLineEnds;
|
||||||
}
|
}
|
||||||
"br" => {
|
"br" => {
|
||||||
dehtml.strbuilder += &dehtml.append_prefix("\n");
|
*dehtml.get_buf() += "\n";
|
||||||
dehtml.add_text = AddText::YesRemoveLineEnds;
|
dehtml.add_text = AddText::YesRemoveLineEnds;
|
||||||
}
|
}
|
||||||
"style" | "script" | "title" => {
|
"style" | "script" | "title" => {
|
||||||
dehtml.add_text = AddText::No;
|
dehtml.add_text = AddText::No;
|
||||||
}
|
}
|
||||||
"pre" => {
|
"pre" => {
|
||||||
dehtml.strbuilder += &dehtml.append_prefix("\n\n");
|
*dehtml.get_buf() += "\n\n";
|
||||||
dehtml.add_text = AddText::YesPreserveLineEnds;
|
dehtml.add_text = AddText::YesPreserveLineEnds;
|
||||||
}
|
}
|
||||||
"a" => {
|
"a" => {
|
||||||
@@ -247,18 +300,18 @@ fn dehtml_starttag_cb<B: std::io::BufRead>(
|
|||||||
|
|
||||||
if !href.is_empty() {
|
if !href.is_empty() {
|
||||||
dehtml.last_href = Some(href);
|
dehtml.last_href = Some(href);
|
||||||
dehtml.strbuilder += "[";
|
*dehtml.get_buf() += "[";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"b" | "strong" => {
|
"b" | "strong" => {
|
||||||
if dehtml.get_add_text() != AddText::No {
|
if dehtml.get_add_text() != AddText::No {
|
||||||
dehtml.strbuilder += "*";
|
*dehtml.get_buf() += "*";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"i" | "em" => {
|
"i" | "em" => {
|
||||||
if dehtml.get_add_text() != AddText::No {
|
if dehtml.get_add_text() != AddText::No {
|
||||||
dehtml.strbuilder += "_";
|
*dehtml.get_buf() += "_";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"blockquote" => dehtml.blockquotes_since_blockquote += 1,
|
"blockquote" => dehtml.blockquotes_since_blockquote += 1,
|
||||||
@@ -319,7 +372,6 @@ pub fn dehtml_manually(buf: &str) -> String {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::simplify::{simplify, SimplifiedText};
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_dehtml() {
|
fn test_dehtml() {
|
||||||
@@ -333,18 +385,18 @@ mod tests {
|
|||||||
("<b> bar <i> foo", "* bar _ foo"),
|
("<b> bar <i> foo", "* bar _ foo"),
|
||||||
("& bar", "& bar"),
|
("& bar", "& bar"),
|
||||||
// Despite missing ', this should be shown:
|
// Despite missing ', this should be shown:
|
||||||
("<a href='/foo.png>Hi</a> ", "Hi "),
|
("<a href='/foo.png>Hi</a> ", "Hi"),
|
||||||
("No link: <a href='https://get.delta.chat/'/>", "No link: "),
|
("No link: <a href='https://get.delta.chat/'/>", "No link:"),
|
||||||
(
|
(
|
||||||
"No link: <a href='https://get.delta.chat/'></a>",
|
"No link: <a href='https://get.delta.chat/'></a>",
|
||||||
"No link: ",
|
"No link:",
|
||||||
),
|
),
|
||||||
("<!doctype html>\n<b>fat text</b>", "*fat text*"),
|
("<!doctype html>\n<b>fat text</b>", "*fat text*"),
|
||||||
// Invalid html (at least DC should show the text if the html is invalid):
|
// Invalid html (at least DC should show the text if the html is invalid):
|
||||||
("<!some invalid html code>\n<b>some text</b>", "some text"),
|
("<!some invalid html code>\n<b>some text</b>", "some text"),
|
||||||
];
|
];
|
||||||
for (input, output) in cases {
|
for (input, output) in cases {
|
||||||
assert_eq!(simplify(dehtml(input).unwrap(), true).text, output);
|
assert_eq!(dehtml(input).unwrap().text, output);
|
||||||
}
|
}
|
||||||
let none_cases = vec!["<html> </html>", ""];
|
let none_cases = vec!["<html> </html>", ""];
|
||||||
for input in none_cases {
|
for input in none_cases {
|
||||||
@@ -354,31 +406,54 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_dehtml_parse_br() {
|
fn test_dehtml_parse_br() {
|
||||||
let html = "\r\r\nline1<br>\r\n\r\n\r\rline2<br/>line3\n\r";
|
let html = "line1<br>line2";
|
||||||
let plain = dehtml(html).unwrap();
|
let plain = dehtml(html).unwrap().text;
|
||||||
|
assert_eq!(plain, "line1\nline2");
|
||||||
|
|
||||||
assert_eq!(plain, "line1\n\r\r\rline2\nline3");
|
let html = "line1<br> line2";
|
||||||
|
let plain = dehtml(html).unwrap().text;
|
||||||
|
assert_eq!(plain, "line1\nline2");
|
||||||
|
|
||||||
|
let html = "line1 <br><br> line2";
|
||||||
|
let plain = dehtml(html).unwrap().text;
|
||||||
|
assert_eq!(plain, "line1\n\nline2");
|
||||||
|
|
||||||
|
let html = "\r\r\nline1<br>\r\n\r\n\r\rline2<br/>line3\n\r";
|
||||||
|
let plain = dehtml(html).unwrap().text;
|
||||||
|
assert_eq!(plain, "line1\nline2\nline3");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dehtml_parse_span() {
|
||||||
|
assert_eq!(dehtml("<span>Foo</span>bar").unwrap().text, "Foobar");
|
||||||
|
assert_eq!(dehtml("<span>Foo</span> bar").unwrap().text, "Foo bar");
|
||||||
|
assert_eq!(dehtml("<span>Foo </span>bar").unwrap().text, "Foo bar");
|
||||||
|
assert_eq!(dehtml("<span>Foo</span>\nbar").unwrap().text, "Foo bar");
|
||||||
|
assert_eq!(dehtml("\n<span>Foo</span> bar").unwrap().text, "Foo bar");
|
||||||
|
assert_eq!(dehtml("<span>Foo</span>\n\nbar").unwrap().text, "Foo bar");
|
||||||
|
assert_eq!(dehtml("Foo\n<span>bar</span>").unwrap().text, "Foo bar");
|
||||||
|
assert_eq!(dehtml("Foo<span>\nbar</span>").unwrap().text, "Foo bar");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_dehtml_parse_p() {
|
fn test_dehtml_parse_p() {
|
||||||
let html = "<p>Foo</p><p>Bar</p>";
|
let html = "<p>Foo</p><p>Bar</p>";
|
||||||
let plain = dehtml(html).unwrap();
|
let plain = dehtml(html).unwrap().text;
|
||||||
assert_eq!(plain, "Foo\n\nBar");
|
assert_eq!(plain, "Foo\n\nBar");
|
||||||
|
|
||||||
let html = "<p>Foo<p>Bar";
|
let html = "<p>Foo<p>Bar";
|
||||||
let plain = dehtml(html).unwrap();
|
let plain = dehtml(html).unwrap().text;
|
||||||
assert_eq!(plain, "Foo\n\nBar");
|
assert_eq!(plain, "Foo\n\nBar");
|
||||||
|
|
||||||
let html = "<p>Foo</p><p>Bar<p>Baz";
|
let html = "<p>Foo</p><p>Bar<p>Baz";
|
||||||
let plain = dehtml(html).unwrap();
|
let plain = dehtml(html).unwrap().text;
|
||||||
assert_eq!(plain, "Foo\n\nBar\n\nBaz");
|
assert_eq!(plain, "Foo\n\nBar\n\nBaz");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_dehtml_parse_href() {
|
fn test_dehtml_parse_href() {
|
||||||
let html = "<a href=url>text</a";
|
let html = "<a href=url>text</a";
|
||||||
let plain = dehtml(html).unwrap();
|
let plain = dehtml(html).unwrap().text;
|
||||||
|
|
||||||
assert_eq!(plain, "[text](url)");
|
assert_eq!(plain, "[text](url)");
|
||||||
}
|
}
|
||||||
@@ -386,7 +461,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_dehtml_bold_text() {
|
fn test_dehtml_bold_text() {
|
||||||
let html = "<!DOCTYPE name [<!DOCTYPE ...>]><!-- comment -->text <b><?php echo ... ?>bold</b><![CDATA[<>]]>";
|
let html = "<!DOCTYPE name [<!DOCTYPE ...>]><!-- comment -->text <b><?php echo ... ?>bold</b><![CDATA[<>]]>";
|
||||||
let plain = dehtml(html).unwrap();
|
let plain = dehtml(html).unwrap().text;
|
||||||
|
|
||||||
assert_eq!(plain, "text *bold*<>");
|
assert_eq!(plain, "text *bold*<>");
|
||||||
}
|
}
|
||||||
@@ -396,7 +471,7 @@ mod tests {
|
|||||||
let html =
|
let html =
|
||||||
"<>"'& äÄöÖüÜß fooÆçÇ ♦‎‏‌&noent;‍";
|
"<>"'& äÄöÖüÜß fooÆçÇ ♦‎‏‌&noent;‍";
|
||||||
|
|
||||||
let plain = dehtml(html).unwrap();
|
let plain = dehtml(html).unwrap().text;
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
plain,
|
plain,
|
||||||
@@ -420,32 +495,38 @@ mod tests {
|
|||||||
</html>
|
</html>
|
||||||
"##;
|
"##;
|
||||||
let txt = dehtml(input).unwrap();
|
let txt = dehtml(input).unwrap();
|
||||||
assert_eq!(txt.trim(), "lots of text");
|
assert_eq!(txt.text.trim(), "lots of text");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_pre_tag() {
|
fn test_pre_tag() {
|
||||||
let input = "<html><pre>\ntwo\nlines\n</pre></html>";
|
let input = "<html><pre>\ntwo\nlines\n</pre></html>";
|
||||||
let txt = dehtml(input).unwrap();
|
let txt = dehtml(input).unwrap();
|
||||||
assert_eq!(txt.trim(), "two\nlines");
|
assert_eq!(txt.text.trim(), "two\nlines");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
async fn test_quote_div() {
|
async fn test_quote_div() {
|
||||||
let input = include_str!("../test-data/message/gmx-quote-body.eml");
|
let input = include_str!("../test-data/message/gmx-quote-body.eml");
|
||||||
let dehtml = dehtml(input).unwrap();
|
let dehtml = dehtml(input).unwrap();
|
||||||
println!("{dehtml}");
|
|
||||||
let SimplifiedText {
|
let SimplifiedText {
|
||||||
text,
|
text,
|
||||||
is_forwarded,
|
is_forwarded,
|
||||||
is_cut,
|
is_cut,
|
||||||
top_quote,
|
top_quote,
|
||||||
footer,
|
footer,
|
||||||
} = simplify(dehtml, false);
|
} = dehtml;
|
||||||
assert_eq!(text, "Test");
|
assert_eq!(text, "Test");
|
||||||
assert_eq!(is_forwarded, false);
|
assert_eq!(is_forwarded, false);
|
||||||
assert_eq!(is_cut, false);
|
assert_eq!(is_cut, false);
|
||||||
assert_eq!(top_quote.as_deref(), Some("test"));
|
assert_eq!(top_quote.as_deref(), Some("test"));
|
||||||
assert_eq!(footer, None);
|
assert_eq!(footer, None);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_spaces() {
|
||||||
|
let input = include_str!("../test-data/spaces.html");
|
||||||
|
let txt = dehtml(input).unwrap();
|
||||||
|
assert_eq!(txt.text, "Welcome back to Strolling!\n\nHey there,\n\nWelcome back! Use this link to securely sign in to your Strolling account:\n\nSign in to Strolling\n\nFor your security, the link will expire in 24 hours time.\n\nSee you soon!\n\nYou can also copy\n\nhttps://strolling.rosano.ca/members/?token=XXX\n\nIf you did not make this request, you can safely ignore this email.\n\nThis message was sent from [strolling.rosano.ca](https://strolling.rosano.ca/) to [alice@example.org](mailto:alice@example.org)");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1076,16 +1076,20 @@ impl MimeMessage {
|
|||||||
Default::default()
|
Default::default()
|
||||||
} else {
|
} else {
|
||||||
let is_html = mime_type == mime::TEXT_HTML;
|
let is_html = mime_type == mime::TEXT_HTML;
|
||||||
let out = if is_html {
|
if is_html {
|
||||||
self.is_mime_modified = true;
|
self.is_mime_modified = true;
|
||||||
dehtml(&decoded_data).unwrap_or_else(|| {
|
if let Some(text) = dehtml(&decoded_data) {
|
||||||
|
text
|
||||||
|
} else {
|
||||||
dehtml_failed = true;
|
dehtml_failed = true;
|
||||||
decoded_data.clone()
|
SimplifiedText {
|
||||||
})
|
text: decoded_data.clone(),
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
decoded_data.clone()
|
simplify(decoded_data.clone(), self.has_chat_version())
|
||||||
};
|
}
|
||||||
simplify(out, self.has_chat_version())
|
|
||||||
};
|
};
|
||||||
|
|
||||||
self.is_mime_modified = self.is_mime_modified
|
self.is_mime_modified = self.is_mime_modified
|
||||||
|
|||||||
@@ -734,7 +734,7 @@ async fn load_imf_email(context: &Context, imf_raw: &[u8]) -> Message {
|
|||||||
async fn test_html_only_mail() {
|
async fn test_html_only_mail() {
|
||||||
let t = TestContext::new_alice().await;
|
let t = TestContext::new_alice().await;
|
||||||
let msg = load_imf_email(&t, include_bytes!("../../test-data/message/wrong-html.eml")).await;
|
let msg = load_imf_email(&t, include_bytes!("../../test-data/message/wrong-html.eml")).await;
|
||||||
assert_eq!(msg.text.unwrap(), " Guten Abend, \n\n Lots of text \n\n text with Umlaut ä... \n\n MfG [...]");
|
assert_eq!(msg.text.unwrap(), "Guten Abend,\n\nLots of text\n\ntext with Umlaut ä...\n\nMfG\n\n--------------------------------------\n\n[Camping ](https://example.com/)\n\nsomeaddress\n\nsometown");
|
||||||
}
|
}
|
||||||
|
|
||||||
static GH_MAILINGLIST: &[u8] =
|
static GH_MAILINGLIST: &[u8] =
|
||||||
|
|||||||
@@ -72,7 +72,7 @@ pub(crate) fn split_lines(buf: &str) -> Vec<&str> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Simplified text and some additional information gained from the input.
|
/// Simplified text and some additional information gained from the input.
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default, PartialEq, Eq)]
|
||||||
pub(crate) struct SimplifiedText {
|
pub(crate) struct SimplifiedText {
|
||||||
/// The text itself.
|
/// The text itself.
|
||||||
pub text: String,
|
pub text: String,
|
||||||
@@ -91,6 +91,14 @@ pub(crate) struct SimplifiedText {
|
|||||||
pub footer: Option<String>,
|
pub footer: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn simplify_quote(quote: &str) -> (String, bool) {
|
||||||
|
let quote_lines = split_lines(quote);
|
||||||
|
let (quote_lines, quote_footer_lines) = remove_message_footer("e_lines);
|
||||||
|
let is_cut = quote_footer_lines.is_some();
|
||||||
|
|
||||||
|
(render_message(quote_lines, false), is_cut)
|
||||||
|
}
|
||||||
|
|
||||||
/// Simplify message text for chat display.
|
/// Simplify message text for chat display.
|
||||||
/// Remove quotes, signatures, trailing empty lines etc.
|
/// Remove quotes, signatures, trailing empty lines etc.
|
||||||
pub(crate) fn simplify(mut input: String, is_chat_message: bool) -> SimplifiedText {
|
pub(crate) fn simplify(mut input: String, is_chat_message: bool) -> SimplifiedText {
|
||||||
@@ -125,11 +133,9 @@ pub(crate) fn simplify(mut input: String, is_chat_message: bool) -> SimplifiedTe
|
|||||||
|
|
||||||
if !is_chat_message {
|
if !is_chat_message {
|
||||||
top_quote = top_quote.map(|quote| {
|
top_quote = top_quote.map(|quote| {
|
||||||
let quote_lines = split_lines("e);
|
let (quote, quote_cut) = simplify_quote("e);
|
||||||
let (quote_lines, quote_footer_lines) = remove_message_footer("e_lines);
|
is_cut |= quote_cut;
|
||||||
is_cut = is_cut || quote_footer_lines.is_some();
|
quote
|
||||||
|
|
||||||
render_message(quote_lines, false)
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
172
test-data/spaces.html
Normal file
172
test-data/spaces.html
Normal file
@@ -0,0 +1,172 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta name="viewport" content="width=device-width">
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
|
<title>🔑 Secure sign in link for Strolling</title>
|
||||||
|
<style>
|
||||||
|
/* -------------------------------------
|
||||||
|
RESPONSIVE AND MOBILE FRIENDLY STYLES
|
||||||
|
------------------------------------- */
|
||||||
|
@media only screen and (max-width: 620px) {
|
||||||
|
table[class=body] h1 {
|
||||||
|
font-size: 28px !important;
|
||||||
|
margin-bottom: 10px !important;
|
||||||
|
}
|
||||||
|
table[class=body] p,
|
||||||
|
table[class=body] ul,
|
||||||
|
table[class=body] ol,
|
||||||
|
table[class=body] td,
|
||||||
|
table[class=body] span,
|
||||||
|
table[class=body] a {
|
||||||
|
font-size: 16px !important;
|
||||||
|
}
|
||||||
|
table[class=body] .wrapper,
|
||||||
|
table[class=body] .article {
|
||||||
|
padding: 10px !important;
|
||||||
|
}
|
||||||
|
table[class=body] .content {
|
||||||
|
padding: 0 !important;
|
||||||
|
}
|
||||||
|
table[class=body] .container {
|
||||||
|
padding: 0 !important;
|
||||||
|
width: 100% !important;
|
||||||
|
}
|
||||||
|
table[class=body] .main {
|
||||||
|
border-left-width: 0 !important;
|
||||||
|
border-radius: 0 !important;
|
||||||
|
border-right-width: 0 !important;
|
||||||
|
}
|
||||||
|
table[class=body] .btn table {
|
||||||
|
width: 100% !important;
|
||||||
|
}
|
||||||
|
table[class=body] .btn a {
|
||||||
|
width: 100% !important;
|
||||||
|
}
|
||||||
|
table[class=body] .img-responsive {
|
||||||
|
height: auto !important;
|
||||||
|
max-width: 100% !important;
|
||||||
|
width: auto !important;
|
||||||
|
}
|
||||||
|
table[class=body] p[class=small],
|
||||||
|
table[class=body] a[class=small] {
|
||||||
|
font-size: 11px !important;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* -------------------------------------
|
||||||
|
PRESERVE THESE STYLES IN THE HEAD
|
||||||
|
------------------------------------- */
|
||||||
|
@media all {
|
||||||
|
.ExternalClass {
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
.ExternalClass,
|
||||||
|
.ExternalClass p,
|
||||||
|
.ExternalClass span,
|
||||||
|
.ExternalClass font,
|
||||||
|
.ExternalClass td,
|
||||||
|
.ExternalClass div {
|
||||||
|
line-height: 100%;
|
||||||
|
}
|
||||||
|
.recipient-link a {
|
||||||
|
color: inherit !important;
|
||||||
|
font-family: inherit !important;
|
||||||
|
font-size: inherit !important;
|
||||||
|
font-weight: inherit !important;
|
||||||
|
line-height: inherit !important;
|
||||||
|
text-decoration: none !important;
|
||||||
|
}
|
||||||
|
#MessageViewBody a {
|
||||||
|
color: inherit;
|
||||||
|
text-decoration: none;
|
||||||
|
font-size: inherit;
|
||||||
|
font-family: inherit;
|
||||||
|
font-weight: inherit;
|
||||||
|
line-height: inherit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
hr {
|
||||||
|
border-width: 0;
|
||||||
|
height: 0;
|
||||||
|
margin-top: 34px;
|
||||||
|
margin-bottom: 34px;
|
||||||
|
border-bottom-width: 1px;
|
||||||
|
border-bottom-color: #EEF5F8;
|
||||||
|
}
|
||||||
|
a {
|
||||||
|
color: #3A464C;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body style="background-color: #ffffff; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; -webkit-font-smoothing: antialiased; font-size: 14px; line-height: 1.5em; margin: 0; padding: 0; -ms-text-size-adjust: 100%; -webkit-text-size-adjust: 100%;">
|
||||||
|
<table border="0" cellpadding="0" cellspacing="0" class="body" style="border-collapse: separate; mso-table-lspace: 0pt; mso-table-rspace: 0pt; width: 100%;">
|
||||||
|
<tr>
|
||||||
|
<td style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 14px; vertical-align: top;"> </td>
|
||||||
|
<td class="container" style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 14px; vertical-align: top; display: block; Margin: 0 auto; max-width: 540px; padding: 10px; width: 540px;">
|
||||||
|
<div class="content" style="box-sizing: border-box; display: block; Margin: 0 auto; max-width: 600px; padding: 30px 20px;">
|
||||||
|
|
||||||
|
<!-- START CENTERED CONTAINER -->
|
||||||
|
<span class="preheader" style="color: transparent; display: none; height: 0; max-height: 0; max-width: 0; opacity: 0; overflow: hidden; mso-hide: all; visibility: hidden; width: 0;">Welcome back to Strolling!</span>
|
||||||
|
<table class="main" style="border-collapse: separate; mso-table-lspace: 0pt; mso-table-rspace: 0pt; width: 100%; background: #ffffff; border-radius: 8px;">
|
||||||
|
|
||||||
|
<!-- START MAIN CONTENT AREA -->
|
||||||
|
<tr>
|
||||||
|
<td class="wrapper" style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 14px; vertical-align: top; box-sizing: border-box;">
|
||||||
|
<table border="0" cellpadding="0" cellspacing="0" style="border-collapse: separate; mso-table-lspace: 0pt; mso-table-rspace: 0pt; width: 100%;">
|
||||||
|
<tr>
|
||||||
|
<td style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 14px; vertical-align: top;">
|
||||||
|
<p style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 20px; color: #15212A; font-weight: bold; line-height: 24px; margin: 0; margin-bottom: 15px;">Hey there,</p>
|
||||||
|
<p style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 16px; color: #3A464C; font-weight: normal; margin: 0; line-height: 24px; margin-bottom: 32px;">Welcome back! Use this link to securely sign in to your Strolling account:</p>
|
||||||
|
<table border="0" cellpadding="0" cellspacing="0" class="btn btn-primary" style="border-collapse: separate; mso-table-lspace: 0pt; mso-table-rspace: 0pt; width: 100%; box-sizing: border-box;">
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td align="left" style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 16px; vertical-align: top; padding-bottom: 35px;">
|
||||||
|
<table border="0" cellpadding="0" cellspacing="0" style="border-collapse: separate; mso-table-lspace: 0pt; mso-table-rspace: 0pt; width: auto;">
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 16px; vertical-align: top; background-color: #e5689b; border-radius: 5px; text-align: center;"> <a href="https://strolling.rosano.ca/members/?token=XXX&action=signin&r=https%3A%2F%2Fstrolling.rosano.ca%2F" target="_blank" style="display: inline-block; color: #ffffff; background-color: #e5689b; border: solid 1px #e5689b; border-radius: 5px; box-sizing: border-box; cursor: pointer; text-decoration: none; font-size: 16px; font-weight: normal; margin: 0; padding: 9px 22px 10px; border-color: #e5689b;">Sign in to Strolling</a> </td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
<p style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 16px; color: #3A464C; font-weight: normal; line-height: 24px; margin: 0; margin-bottom: 11px;">For your security, the link will expire in 24 hours time.</p>
|
||||||
|
<p style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 16px; color: #3A464C; font-weight: normal; line-height: 24px; margin: 0; margin-bottom: 30px;">See you soon!</p>
|
||||||
|
<hr/>
|
||||||
|
<p style="word-break: break-all; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 15px; color: #3A464C; font-weight: normal; margin: 0; line-height: 24px;">You can also copy & paste this URL into your browser:</p>
|
||||||
|
<p style="word-break: break-all; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 15px; line-height: 22px; margin-top:0; color: #3A464C;">https://strolling.rosano.ca/members/?token=XXX&action=signin&r=https%3A%2F%2Fstrolling.rosano.ca%2F</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<!-- START FOOTER -->
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 14px; vertical-align: top; padding-top: 80px;">
|
||||||
|
<p class="small" style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; line-height: 16px; font-size: 11px; color: #738A94; font-weight: normal; margin: 0;">If you did not make this request, you can safely ignore this email.</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 14px; vertical-align: top; padding-top: 2px;">
|
||||||
|
<p class="small" style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; line-height: 16px; font-size: 11px; color: #738A94; font-weight: normal; margin: 0;">This message was sent from <a class="small" href="https://strolling.rosano.ca/" style="text-decoration: underline; color: #738A94; font-size: 11px;">strolling.rosano.ca</a> to <a class="small" href="mailto:alice@example.org" style="text-decoration: underline; color: #738A94; font-size: 11px;">alice@example.org</a></p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<!-- END FOOTER -->
|
||||||
|
</table>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<!-- END MAIN CONTENT AREA -->
|
||||||
|
</table>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- END CENTERED CONTAINER -->
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
<td style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 14px; vertical-align: top;"> </td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
Reference in New Issue
Block a user