From 810be4f6c7f8c00fa20cabcf32686d87276436a2 Mon Sep 17 00:00:00 2001 From: bjoern Date: Tue, 19 Mar 2024 16:38:23 +0100 Subject: [PATCH] fix: preserve upper-/lowercase of links parsed by `dehtml()` (#5362) this PR fixes a bug that lowercases all links handleld by `dehtml()`, which is wrong. closes #5361 --- src/dehtml.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/dehtml.rs b/src/dehtml.rs index cda0e83d3..b001bb50f 100644 --- a/src/dehtml.rs +++ b/src/dehtml.rs @@ -301,7 +301,7 @@ fn dehtml_starttag_cb( let href = href .decode_and_unescape_value(reader) .unwrap_or_default() - .to_lowercase(); + .to_string(); if !href.is_empty() { dehtml.last_href = Some(href); @@ -463,6 +463,13 @@ mod tests { assert_eq!(plain, "[text](url)"); } + #[test] + fn test_dehtml_case_sensitive_link() { + let html = "case in URLs matter"; + let plain = dehtml(html).unwrap().text; + assert_eq!(plain, "[case in URLs matter](https://foo.bar/Data)"); + } + #[test] fn test_dehtml_bold_text() { let html = "]>text bold]]>";