Fix #1373, ignore incorrect html close tags

This commit is contained in:
Hocuri
2020-04-06 14:02:56 +02:00
committed by holger krekel
parent 76b93274e8
commit 134b09dba5

View File

@@ -35,6 +35,7 @@ pub fn dehtml(buf: &str) -> String {
};
let mut reader = quick_xml::Reader::from_str(buf);
reader.check_end_names(false);
let mut buf = Vec::new();
@@ -225,4 +226,23 @@ mod tests {
"<>\"\'& äÄöÖüÜß fooÆçÇ \u{2666}\u{200e}\u{200f}\u{200c}&noent;\u{200d}"
);
}
#[test]
fn test_unclosed_tags() {
let input = r##"
<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'
'http://www.w3.org/TR/html4/loose.dtd'>
<html>
<head>
<title>Hi</title>
<meta http-equiv='Content-Type' content='text/html; charset=iso-8859-1'>
</head>
<body>
lots of text
</body>
</html>
"##;
let txt = dehtml(input);
assert_eq!(txt.trim(), "lots of text");
}
}