fix: do not run simplify() on dehtml() output

simplify() is written to process incoming plaintext messages
and extract footers and quotes from them.
Incoming messages contain various quote styles
and simplify() implements heuristics to detects them.

If dehtml() output is processed by simplify(),
simplify() heuristics may erroneously detect
footers and quotes in produced plaintext.

dehtml() should directly detect quotes
instead of converting them to plaintext quotes
for parsing with simplify().
This commit is contained in:
link2xt
2023-06-26 20:21:01 +00:00
parent 5fe94e8bce
commit 2d30afd212
5 changed files with 333 additions and 70 deletions

View File

@@ -72,7 +72,7 @@ pub(crate) fn split_lines(buf: &str) -> Vec<&str> {
}
/// Simplified text and some additional information gained from the input.
#[derive(Debug, Default)]
#[derive(Debug, Default, PartialEq, Eq)]
pub(crate) struct SimplifiedText {
/// The text itself.
pub text: String,
@@ -91,6 +91,14 @@ pub(crate) struct SimplifiedText {
pub footer: Option<String>,
}
pub(crate) fn simplify_quote(quote: &str) -> (String, bool) {
let quote_lines = split_lines(quote);
let (quote_lines, quote_footer_lines) = remove_message_footer(&quote_lines);
let is_cut = quote_footer_lines.is_some();
(render_message(quote_lines, false), is_cut)
}
/// Simplify message text for chat display.
/// Remove quotes, signatures, trailing empty lines etc.
pub(crate) fn simplify(mut input: String, is_chat_message: bool) -> SimplifiedText {
@@ -125,11 +133,9 @@ pub(crate) fn simplify(mut input: String, is_chat_message: bool) -> SimplifiedTe
if !is_chat_message {
top_quote = top_quote.map(|quote| {
let quote_lines = split_lines(&quote);
let (quote_lines, quote_footer_lines) = remove_message_footer(&quote_lines);
is_cut = is_cut || quote_footer_lines.is_some();
render_message(quote_lines, false)
let (quote, quote_cut) = simplify_quote(&quote);
is_cut |= quote_cut;
quote
});
}