diff --git a/src/dehtml.rs b/src/dehtml.rs index dba2c14c0..03b40b3f5 100644 --- a/src/dehtml.rs +++ b/src/dehtml.rs @@ -152,7 +152,7 @@ fn dehtml_endtag_cb(event: &BytesEnd, dehtml: &mut Dehtml) { .to_lowercase(); match tag.as_str() { - "p" | "table" | "td" | "style" | "script" | "title" | "pre" => { + "style" | "script" | "title" | "pre" => { dehtml.strbuilder += &dehtml.append_prefix("\n\n"); dehtml.add_text = AddText::YesRemoveLineEnds; } @@ -200,7 +200,9 @@ fn dehtml_starttag_cb( match tag.as_str() { "p" | "table" | "td" => { - dehtml.strbuilder += &dehtml.append_prefix("\n\n"); + if !dehtml.strbuilder.is_empty() { + dehtml.strbuilder += &dehtml.append_prefix("\n\n"); + } dehtml.add_text = AddText::YesRemoveLineEnds; } #[rustfmt::skip] @@ -353,6 +355,21 @@ mod tests { assert_eq!(plain, "line1\n\r\r\rline2\nline3"); } + #[test] + fn test_dehtml_parse_p() { + let html = "

Foo

Bar

"; + let plain = dehtml(html).unwrap(); + assert_eq!(plain, "Foo\n\nBar"); + + let html = "

Foo

Bar"; + let plain = dehtml(html).unwrap(); + assert_eq!(plain, "Foo\n\nBar"); + + let html = "

Foo

Bar

Baz"; + let plain = dehtml(html).unwrap(); + assert_eq!(plain, "Foo\n\nBar\n\nBaz"); + } + #[test] fn test_dehtml_parse_href() { let html = "text