fix: preserve upper-/lowercase of links parsed by dehtml() (#5362)

this PR fixes a bug that lowercases all links handleld by `dehtml()`,
which is wrong.

closes #5361
This commit is contained in:
bjoern
2024-03-19 16:38:23 +01:00
committed by GitHub
parent 1ebbe26ebb
commit 810be4f6c7

View File

@@ -301,7 +301,7 @@ fn dehtml_starttag_cb<B: std::io::BufRead>(
let href = href
.decode_and_unescape_value(reader)
.unwrap_or_default()
.to_lowercase();
.to_string();
if !href.is_empty() {
dehtml.last_href = Some(href);
@@ -463,6 +463,13 @@ mod tests {
assert_eq!(plain, "[text](url)");
}
#[test]
fn test_dehtml_case_sensitive_link() {
let html = "<html><A HrEf=\"https://foo.bar/Data\">case in URLs matter</A></html>";
let plain = dehtml(html).unwrap().text;
assert_eq!(plain, "[case in URLs matter](https://foo.bar/Data)");
}
#[test]
fn test_dehtml_bold_text() {
let html = "<!DOCTYPE name [<!DOCTYPE ...>]><!-- comment -->text <b><?php echo ... ?>bold</b><![CDATA[<>]]>";