Parse <blockquote> tags for better quote detection (#2313)

This commit is contained in:
Hocuri
2021-04-07 18:45:00 +02:00
committed by GitHub
parent 720135a915
commit 179a2a50e6
3 changed files with 67 additions and 2 deletions

View File

@@ -23,11 +23,14 @@ struct Dehtml {
/// Everything between <div name="quote"> and <div name="quoted-content"> is usually metadata
/// If this is > `0`, then we are inside a `<div name="quoted-content">`.
divs_since_quoted_content_div: u32,
/// All-Inkl just puts the quote into `<blockquote> </blockquote>`. This count is
/// increased at each `<blockquote>` and decreased at each `</blockquote>`.
blockquotes_since_blockquote: u32,
}
impl Dehtml {
fn line_prefix(&self) -> &str {
if self.divs_since_quoted_content_div > 0 {
if self.divs_since_quoted_content_div > 0 || self.blockquotes_since_blockquote > 0 {
"> "
} else {
""
@@ -67,7 +70,7 @@ pub fn dehtml(buf: &str) -> Option<String> {
None
}
pub fn dehtml_quick_xml(buf: &str) -> String {
fn dehtml_quick_xml(buf: &str) -> String {
let buf = buf.trim().trim_start_matches("<!doctype html>");
let mut dehtml = Dehtml {
@@ -76,6 +79,7 @@ pub fn dehtml_quick_xml(buf: &str) -> String {
last_href: None,
divs_since_quote_div: 0,
divs_since_quoted_content_div: 0,
blockquotes_since_blockquote: 0,
};
let mut reader = quick_xml::Reader::from_str(buf);
@@ -179,6 +183,7 @@ fn dehtml_endtag_cb(event: &BytesEnd, dehtml: &mut Dehtml) {
dehtml.strbuilder += "_";
}
}
"blockquote" => pop_tag(&mut dehtml.blockquotes_since_blockquote),
_ => {}
}
}
@@ -241,6 +246,7 @@ fn dehtml_starttag_cb<B: std::io::BufRead>(
dehtml.strbuilder += "_";
}
}
"blockquote" => dehtml.blockquotes_since_blockquote += 1,
_ => {}
}
}

View File

@@ -2744,6 +2744,19 @@ On 2020-10-25, Bob wrote:
assert_eq!(mimeparser.parts[0].param.get(Param::Quote).unwrap(), "Now?");
}
#[async_std::test]
async fn test_allinkl_blockquote() {
// all-inkl.com puts quotes into `<blockquote> </blockquote>`.
let t = TestContext::new().await;
let raw = include_bytes!("../test-data/message/allinkl-quote.eml");
let mimeparser = MimeMessage::from_bytes(&t, raw).await.unwrap();
assert!(mimeparser.parts[0].msg.starts_with("It's 1.0."));
assert_eq!(
mimeparser.parts[0].param.get(Param::Quote).unwrap(),
"What's the version?"
);
}
#[async_std::test]
async fn test_add_subj_to_multimedia_msg() {
let t = TestContext::new_alice().await;