mirror of
https://github.com/chatmail/core.git
synced 2026-04-19 22:46:29 +03:00
* draft API to deal with uncut message texts * add column mime_modified * add mime_modified flag to MimeParser and save it in the database * save mime_headers also when mime_modified is set * cargo fmt * set mime_modified on parsed html-texts and when there are multiple alternative-parts; add test for that * prototype functions, add to repl and ffi * use correct mime_modified flag * basically parse Mime-Structure to HTML * add basic tests for HTML-parsing * convert text/plain to html for getting original * respect charset for plain texts * make test more specific * fix handling non-utf-8 charsets for plain messages * add test for plain_to_html() * add failing test for plaintext linkify * linkify urls in plain text * fix regex * plain text linkify: add failing test for encapsulated links as <https://domain.com> * plain text linkify: make encapsulated links as <https://domain.com> work * plain text linkify: require word boundary at beginning of link, add tests for that * plain text linkify: linkify emails * plain text: support format=flowed * plain text: support quotes * make clippy happy * set mime-modified also when simplify() cuts non-html messages, add tests for that * streamline mime recursion * repl tool: write original html to file for further processing * convert cid:- to data:-protocol * add a test for cid: to data: conversion * make clippy happy * fix html-tests to work with windows-lineends * clarify what the returned html-code may contain * add some more detailed doc comments * add mime_modified column only if not exist this additional check is needed as the column may added with another dbversion in some shipped beta-versions. * incorporate documentation suggestions from review * rename get_original_mime_html() to more simple get_html() * rename api is_mime_modified() to more simple has_html(); internally, mime_modified-flag stays as-is, however * rename MimeS to MimeMultipartType * do not set mime-modified flag for encrypted messages that need extra-handling for saved mime-structure * fix typo * move get_msg_html() to MsgId.get_html() * incorporate more documentation suggestions from review * remove unused return value from collect_texts_recursive() * avoid mime_modified being mutable in write-parts-loop * move 'use futures::future::FutureExt' atop of html.rs * move attributes defining plain-text to a dedicated structure * more PlainText to separate file * escape cid when building regex * let dc_get_msg_html() return NULL when calling with bad param
253 lines
8.0 KiB
Rust
253 lines
8.0 KiB
Rust
///! Handle plain text together with some attributes.
|
|
use crate::simplify::split_lines;
|
|
use once_cell::sync::Lazy;
|
|
|
|
#[derive(Debug)]
|
|
pub struct PlainText {
|
|
pub text: String,
|
|
|
|
/// Text may "flowed" as defined in [RFC 2646](https://tools.ietf.org/html/rfc2646).
|
|
/// At a glance, that means, if a line ends with a space, it is merged with the next one
|
|
/// and the first leading spaces is ignored
|
|
/// (to allow lines starting with `>` that normally indicates a quote)
|
|
pub flowed: bool,
|
|
|
|
/// If set together with "flowed",
|
|
/// The space indicating merging two lines is removed.
|
|
pub delsp: bool,
|
|
}
|
|
|
|
impl PlainText {
|
|
/// Convert plain text to HTML.
|
|
/// The function handles quotes, links, fixed and floating text paragraphs.
|
|
pub async fn to_html(&self) -> String {
|
|
static LINKIFY_MAIL_RE: Lazy<regex::Regex> =
|
|
Lazy::new(|| regex::Regex::new(r#"\b([\w.\-+]+@[\w.\-]+)\b"#).unwrap());
|
|
|
|
static LINKIFY_URL_RE: Lazy<regex::Regex> = Lazy::new(|| {
|
|
regex::Regex::new(r#"\b((http|https|ftp|ftps):[\w.,:;$/@!?&%\-~=#+]+)"#).unwrap()
|
|
});
|
|
|
|
let lines = split_lines(&self.text);
|
|
|
|
let mut ret =
|
|
"<!DOCTYPE html>\n<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>\n".to_string();
|
|
|
|
for line in lines {
|
|
let is_quote = line.starts_with('>');
|
|
|
|
// we need to do html-entity-encoding after linkify, as otherwise encapsulated links
|
|
// as <http://example.org> cannot be handled correctly
|
|
// (they would become <http://example.org> where the trailing > would become a valid url part).
|
|
// to avoid double encoding, we escape our html-entities by \r that must not be used in the string elsewhere.
|
|
let line = line.to_string().replace("\r", "");
|
|
|
|
let mut line = LINKIFY_MAIL_RE
|
|
.replace_all(&*line, "\rLTa href=\rQUOTmailto:$1\rQUOT\rGT$1\rLT/a\rGT")
|
|
.as_ref()
|
|
.to_string();
|
|
|
|
line = LINKIFY_URL_RE
|
|
.replace_all(&*line, "\rLTa href=\rQUOT$1\rQUOT\rGT$1\rLT/a\rGT")
|
|
.as_ref()
|
|
.to_string();
|
|
|
|
// encode html-entities after linkify the raw string
|
|
line = escaper::encode_minimal(&line);
|
|
|
|
// make our escaped html-entities real after encoding all others
|
|
line = line.replace("\rLT", "<");
|
|
line = line.replace("\rGT", ">");
|
|
line = line.replace("\rQUOT", "\"");
|
|
|
|
if self.flowed {
|
|
// flowed text as of RFC 3676 -
|
|
// a leading space shall be removed
|
|
// and is only there to allow > at the beginning of a line that is no quote.
|
|
line = line.strip_prefix(" ").unwrap_or(&line).to_string();
|
|
if is_quote {
|
|
line = "<em>".to_owned() + &line + "</em>";
|
|
}
|
|
|
|
// a trailing space indicates that the line can be merged with the next one;
|
|
// for sake of simplicity, we skip merging for quotes (quotes may be combined with
|
|
// delsp, so `> >` is different from `>>` etc. see RFC 3676 for details)
|
|
if line.ends_with(' ') && !is_quote {
|
|
if self.delsp {
|
|
line.pop();
|
|
}
|
|
} else {
|
|
line += "<br/>\n";
|
|
}
|
|
} else {
|
|
// normal, fixed text
|
|
if is_quote {
|
|
line = "<em>".to_owned() + &line + "</em>";
|
|
}
|
|
line += "<br/>\n";
|
|
}
|
|
|
|
ret += &*line;
|
|
}
|
|
ret += "</body></html>\n";
|
|
ret
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[async_std::test]
|
|
async fn test_plain_to_html() {
|
|
let html = PlainText {
|
|
text: r##"line 1
|
|
line 2
|
|
line with https://link-mid-of-line.org and http://link-end-of-line.com/file?foo=bar%20
|
|
http://link-at-start-of-line.org
|
|
"##
|
|
.to_string(),
|
|
flowed: false,
|
|
delsp: false,
|
|
}
|
|
.to_html()
|
|
.await;
|
|
assert_eq!(
|
|
html,
|
|
r##"<!DOCTYPE html>
|
|
<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>
|
|
line 1<br/>
|
|
line 2<br/>
|
|
line with <a href="https://link-mid-of-line.org">https://link-mid-of-line.org</a> and <a href="http://link-end-of-line.com/file?foo=bar%20">http://link-end-of-line.com/file?foo=bar%20</a><br/>
|
|
<a href="http://link-at-start-of-line.org">http://link-at-start-of-line.org</a><br/>
|
|
<br/>
|
|
</body></html>
|
|
"##
|
|
);
|
|
}
|
|
|
|
#[async_std::test]
|
|
async fn test_plain_to_html_encapsulated() {
|
|
let html = PlainText {
|
|
text: r#"line with <http://encapsulated.link/?foo=_bar> here!"#.to_string(),
|
|
flowed: false,
|
|
delsp: false,
|
|
}
|
|
.to_html()
|
|
.await;
|
|
assert_eq!(
|
|
html,
|
|
r#"<!DOCTYPE html>
|
|
<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>
|
|
line with <<a href="http://encapsulated.link/?foo=_bar">http://encapsulated.link/?foo=_bar</a>> here!<br/>
|
|
</body></html>
|
|
"#
|
|
);
|
|
}
|
|
|
|
#[async_std::test]
|
|
async fn test_plain_to_html_nolink() {
|
|
let html = PlainText {
|
|
text: r#"line with nohttp://no.link here"#.to_string(),
|
|
flowed: false,
|
|
delsp: false,
|
|
}
|
|
.to_html()
|
|
.await;
|
|
assert_eq!(
|
|
html,
|
|
r#"<!DOCTYPE html>
|
|
<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>
|
|
line with nohttp://no.link here<br/>
|
|
</body></html>
|
|
"#
|
|
);
|
|
}
|
|
|
|
#[async_std::test]
|
|
async fn test_plain_to_html_mailto() {
|
|
let html = PlainText {
|
|
text: r#"just an address: foo@bar.org another@one.de"#.to_string(),
|
|
flowed: false,
|
|
delsp: false,
|
|
}
|
|
.to_html()
|
|
.await;
|
|
assert_eq!(
|
|
html,
|
|
r#"<!DOCTYPE html>
|
|
<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>
|
|
just an address: <a href="mailto:foo@bar.org">foo@bar.org</a> <a href="mailto:another@one.de">another@one.de</a><br/>
|
|
</body></html>
|
|
"#
|
|
);
|
|
}
|
|
|
|
#[async_std::test]
|
|
async fn test_plain_to_html_flowed() {
|
|
let html = PlainText {
|
|
text: "line \nstill line\n>quote \n>still quote\n >no quote".to_string(),
|
|
flowed: true,
|
|
delsp: false,
|
|
}
|
|
.to_html()
|
|
.await;
|
|
assert_eq!(
|
|
html,
|
|
r#"<!DOCTYPE html>
|
|
<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>
|
|
line still line<br/>
|
|
<em>>quote </em><br/>
|
|
<em>>still quote</em><br/>
|
|
>no quote<br/>
|
|
</body></html>
|
|
"#
|
|
);
|
|
}
|
|
|
|
#[async_std::test]
|
|
async fn test_plain_to_html_flowed_delsp() {
|
|
let html = PlainText {
|
|
text: "line \nstill line\n>quote \n>still quote\n >no quote".to_string(),
|
|
flowed: true,
|
|
delsp: true,
|
|
}
|
|
.to_html()
|
|
.await;
|
|
assert_eq!(
|
|
html,
|
|
r#"<!DOCTYPE html>
|
|
<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>
|
|
linestill line<br/>
|
|
<em>>quote </em><br/>
|
|
<em>>still quote</em><br/>
|
|
>no quote<br/>
|
|
</body></html>
|
|
"#
|
|
);
|
|
}
|
|
|
|
#[async_std::test]
|
|
async fn test_plain_to_html_fixed() {
|
|
let html = PlainText {
|
|
text: "line \nstill line\n>quote \n>still quote\n >no quote".to_string(),
|
|
flowed: false,
|
|
delsp: false,
|
|
}
|
|
.to_html()
|
|
.await;
|
|
assert_eq!(
|
|
html,
|
|
r#"<!DOCTYPE html>
|
|
<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>
|
|
line <br/>
|
|
still line<br/>
|
|
<em>>quote </em><br/>
|
|
<em>>still quote</em><br/>
|
|
>no quote<br/>
|
|
</body></html>
|
|
"#
|
|
);
|
|
}
|
|
}
|