mirror of
https://github.com/chatmail/core.git
synced 2026-05-08 17:36:29 +03:00
Parse <blockquote> tags for better quote detection (#2313)
This commit is contained in:
@@ -23,11 +23,14 @@ struct Dehtml {
|
|||||||
/// Everything between <div name="quote"> and <div name="quoted-content"> is usually metadata
|
/// Everything between <div name="quote"> and <div name="quoted-content"> is usually metadata
|
||||||
/// If this is > `0`, then we are inside a `<div name="quoted-content">`.
|
/// If this is > `0`, then we are inside a `<div name="quoted-content">`.
|
||||||
divs_since_quoted_content_div: u32,
|
divs_since_quoted_content_div: u32,
|
||||||
|
/// All-Inkl just puts the quote into `<blockquote> </blockquote>`. This count is
|
||||||
|
/// increased at each `<blockquote>` and decreased at each `</blockquote>`.
|
||||||
|
blockquotes_since_blockquote: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Dehtml {
|
impl Dehtml {
|
||||||
fn line_prefix(&self) -> &str {
|
fn line_prefix(&self) -> &str {
|
||||||
if self.divs_since_quoted_content_div > 0 {
|
if self.divs_since_quoted_content_div > 0 || self.blockquotes_since_blockquote > 0 {
|
||||||
"> "
|
"> "
|
||||||
} else {
|
} else {
|
||||||
""
|
""
|
||||||
@@ -67,7 +70,7 @@ pub fn dehtml(buf: &str) -> Option<String> {
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn dehtml_quick_xml(buf: &str) -> String {
|
fn dehtml_quick_xml(buf: &str) -> String {
|
||||||
let buf = buf.trim().trim_start_matches("<!doctype html>");
|
let buf = buf.trim().trim_start_matches("<!doctype html>");
|
||||||
|
|
||||||
let mut dehtml = Dehtml {
|
let mut dehtml = Dehtml {
|
||||||
@@ -76,6 +79,7 @@ pub fn dehtml_quick_xml(buf: &str) -> String {
|
|||||||
last_href: None,
|
last_href: None,
|
||||||
divs_since_quote_div: 0,
|
divs_since_quote_div: 0,
|
||||||
divs_since_quoted_content_div: 0,
|
divs_since_quoted_content_div: 0,
|
||||||
|
blockquotes_since_blockquote: 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut reader = quick_xml::Reader::from_str(buf);
|
let mut reader = quick_xml::Reader::from_str(buf);
|
||||||
@@ -179,6 +183,7 @@ fn dehtml_endtag_cb(event: &BytesEnd, dehtml: &mut Dehtml) {
|
|||||||
dehtml.strbuilder += "_";
|
dehtml.strbuilder += "_";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
"blockquote" => pop_tag(&mut dehtml.blockquotes_since_blockquote),
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -241,6 +246,7 @@ fn dehtml_starttag_cb<B: std::io::BufRead>(
|
|||||||
dehtml.strbuilder += "_";
|
dehtml.strbuilder += "_";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
"blockquote" => dehtml.blockquotes_since_blockquote += 1,
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2744,6 +2744,19 @@ On 2020-10-25, Bob wrote:
|
|||||||
assert_eq!(mimeparser.parts[0].param.get(Param::Quote).unwrap(), "Now?");
|
assert_eq!(mimeparser.parts[0].param.get(Param::Quote).unwrap(), "Now?");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[async_std::test]
|
||||||
|
async fn test_allinkl_blockquote() {
|
||||||
|
// all-inkl.com puts quotes into `<blockquote> </blockquote>`.
|
||||||
|
let t = TestContext::new().await;
|
||||||
|
let raw = include_bytes!("../test-data/message/allinkl-quote.eml");
|
||||||
|
let mimeparser = MimeMessage::from_bytes(&t, raw).await.unwrap();
|
||||||
|
assert!(mimeparser.parts[0].msg.starts_with("It's 1.0."));
|
||||||
|
assert_eq!(
|
||||||
|
mimeparser.parts[0].param.get(Param::Quote).unwrap(),
|
||||||
|
"What's the version?"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[async_std::test]
|
#[async_std::test]
|
||||||
async fn test_add_subj_to_multimedia_msg() {
|
async fn test_add_subj_to_multimedia_msg() {
|
||||||
let t = TestContext::new_alice().await;
|
let t = TestContext::new_alice().await;
|
||||||
|
|||||||
46
test-data/message/allinkl-quote.eml
Normal file
46
test-data/message/allinkl-quote.eml
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
Return-Path: <alice@example.org>
|
||||||
|
Delivered-To: bob@example.org
|
||||||
|
Received: from hq5.merlinux.eu
|
||||||
|
by hq5.merlinux.eu with LMTP
|
||||||
|
id eHU/Co4EUmBAQQAAPzvFDg
|
||||||
|
(envelope-from <alice@example.org>)
|
||||||
|
for <bob@example.org>; Wed, 17 Mar 2021 14:30:54 +0100
|
||||||
|
Received: from dd37930.kasserver.com (dd37930.kasserver.com [85.13.154.127])
|
||||||
|
by hq5.merlinux.eu (Postfix) with ESMTPS id CB5D927A0071
|
||||||
|
for <bob@example.org>; Wed, 17 Mar 2021 14:30:53 +0100 (CET)
|
||||||
|
Received: from dd37930.kasserver.com (dd0805.kasserver.com [85.13.161.253])
|
||||||
|
by dd37930.kasserver.com (Postfix) with ESMTPSA id 724E853C0979
|
||||||
|
for <bob@example.org>; Wed, 17 Mar 2021 14:30:53 +0100 (CET)
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/html; charset=ISO-8859-1
|
||||||
|
Content-Transfer-Encoding: quoted-printable
|
||||||
|
X-SenderIP: 217.80.3.233
|
||||||
|
User-Agent: ALL-INKL Webmail 2.11
|
||||||
|
In-Reply-To: <Mr.nru4puZrBpw.JfbybhIh75A@testrun.org>
|
||||||
|
References: <Mr.nru4puZrBpw.JfbybhIh75A@testrun.org><Mr.nru4puZrBpw.JfbybhIh75A@testrun.org>
|
||||||
|
Subject: Re: Message from Hocuri
|
||||||
|
From: alice@example.org
|
||||||
|
To: bob@example.org
|
||||||
|
Message-Id: <20210317133053.724E853C0979@dd37930.kasserver.com>
|
||||||
|
Date: Wed, 17 Mar 2021 14:30:53 +0100 (CET)
|
||||||
|
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www=
|
||||||
|
=2Ew3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html lang=3D"de" xml:lang=
|
||||||
|
=3D"en" xmlns=3D"http://www.w3.org/1999/xhtml"><head><meta http-equiv=3D"Co=
|
||||||
|
ntent-Type" content=3D"text/html; charset=3DISO-8859-1" /><title></title><s=
|
||||||
|
tyle type=3D"text/css">html,body{background-color:#fff;color:#333;line-heig=
|
||||||
|
ht:1.4;font-family:sans-serif,Arial,Verdana,Trebuchet MS;}</style></head><b=
|
||||||
|
ody><p>It's 1.0.</p>
|
||||||
|
<div ></div>
|
||||||
|
<p>Hocuri schrieb am 17.03.2021 14:25 (GMT +01:00):</p>
|
||||||
|
<blockquote cite=3D"mid:Mr.nru4puZrBpw.JfbybhIh75A@testrun.org">
|
||||||
|
<pre>What's the version?
|
||||||
|
|
||||||
|
--=20
|
||||||
|
Sent with my Delta Chat Messenger: <a href=3D"https://delta.chat" target=3D=
|
||||||
|
"_blank" rel=3D"nofollow noopener" title=3D"https://delta.chat">https://del=
|
||||||
|
ta.chat</a>
|
||||||
|
|
||||||
|
|
||||||
|
</pre>
|
||||||
|
</blockquote></body></html>
|
||||||
Reference in New Issue
Block a user