From 134b09dba57178242e4f3a7dd8d1e8b523763410 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Mon, 6 Apr 2020 14:02:56 +0200 Subject: [PATCH] Fix #1373, ignore incorrect html close tags --- src/dehtml.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/dehtml.rs b/src/dehtml.rs index 0b2f5435f..5639e3af4 100644 --- a/src/dehtml.rs +++ b/src/dehtml.rs @@ -35,6 +35,7 @@ pub fn dehtml(buf: &str) -> String { }; let mut reader = quick_xml::Reader::from_str(buf); + reader.check_end_names(false); let mut buf = Vec::new(); @@ -225,4 +226,23 @@ mod tests { "<>\"\'& äÄöÖüÜß fooÆçÇ \u{2666}\u{200e}\u{200f}\u{200c}&noent;\u{200d}" ); } + + #[test] + fn test_unclosed_tags() { + let input = r##" + + + + Hi + + + + lots of text + + + "##; + let txt = dehtml(input); + assert_eq!(txt.trim(), "lots of text"); + } }