truncate incoming messages by lines (#3480)

* truncate incoming messages by lines,
because many linebreaks seem to cause the chat open delay on deltachat-ios

* run cargo fmt

* remove DC_DESIRED_TEXT_LINES_THRESHOLD
and use Strings instead of Cow<str>

* remove usage of clippy::indexing_slicing in truncate_by_lines (#3596)

* adjust comments

* Fix truncate_by_lines tests

* Reword indexing/slicing error

* Remove unnecessary conditional

* Fix a typo in the comment

Co-authored-by: link2xt <link2xt@testrun.org>
This commit is contained in:
Simon Laux
2022-09-17 16:15:33 +02:00
committed by GitHub
parent 569628a202
commit 62afd3d4c3
4 changed files with 151 additions and 12 deletions

View File

@@ -61,6 +61,7 @@
this affects `dc_get_chat_contacts()`, `dc_get_contacts()` and `dc_get_blocked_contacts()` #3562
- add `internet_access` flag to `dc_msg_get_webxdc_info()` #3516
- `DC_EVENT_WEBXDC_INSTANCE_DELETED` is emitted when a message containing a webxdc gets deleted #3105
- truncate incoming messages by lines instead of just length #3480
### Fixes
- do not emit notifications for blocked chats #3557

View File

@@ -167,6 +167,11 @@ pub const DC_MSG_ID_LAST_SPECIAL: u32 = 9;
/// String that indicates that something is left out or truncated.
pub const DC_ELLIPSIS: &str = "[...]";
// how many lines desktop can display when fullscreen (fullscreen at zoomlevel 1x)
// (taken from "subjective" testing what looks ok)
pub const DC_DESIRED_TEXT_LINES: usize = 38;
// how many chars desktop can display per line (from "subjective" testing)
pub const DC_DESIRED_TEXT_LINE_LEN: usize = 100;
/// Message length limit.
///
@@ -176,7 +181,7 @@ pub const DC_ELLIPSIS: &str = "[...]";
///
/// Note that for simplicity maximum length is defined as the number of Unicode Scalar Values (Rust
/// `char`s), not Unicode Grapheme Clusters.
pub const DC_DESIRED_TEXT_LEN: usize = 5000;
pub const DC_DESIRED_TEXT_LEN: usize = DC_DESIRED_TEXT_LINE_LEN * DC_DESIRED_TEXT_LINES;
// Flags for empty server job

View File

@@ -12,7 +12,7 @@ use once_cell::sync::Lazy;
use crate::aheader::Aheader;
use crate::blob::BlobObject;
use crate::constants::{DC_DESIRED_TEXT_LEN, DC_ELLIPSIS};
use crate::constants::{DC_DESIRED_TEXT_LINES, DC_DESIRED_TEXT_LINE_LEN};
use crate::contact::{addr_cmp, addr_normalize, ContactId};
use crate::context::Context;
use crate::decrypt::{create_decryption_info, try_decrypt};
@@ -28,7 +28,7 @@ use crate::peerstate::Peerstate;
use crate::simplify::{simplify, SimplifiedText};
use crate::stock_str;
use crate::sync::SyncItems;
use crate::tools::{get_filemeta, parse_receive_headers, truncate};
use crate::tools::{get_filemeta, parse_receive_headers, truncate_by_lines};
/// A parsed MIME message.
///
@@ -1012,14 +1012,15 @@ impl MimeMessage {
(simplified_txt, top_quote)
};
let simplified_txt = if simplified_txt.chars().count()
> DC_DESIRED_TEXT_LEN + DC_ELLIPSIS.len()
{
self.is_mime_modified = true;
truncate(&*simplified_txt, DC_DESIRED_TEXT_LEN).to_string()
} else {
simplified_txt
};
// Truncate text if it has too many lines
let (simplified_txt, was_truncated) = truncate_by_lines(
simplified_txt,
DC_DESIRED_TEXT_LINES,
DC_DESIRED_TEXT_LINE_LEN,
);
if was_truncated {
self.is_mime_modified = was_truncated;
}
if !simplified_txt.is_empty() || simplified_quote.is_some() {
let mut part = Part {
@@ -1817,7 +1818,7 @@ mod tests {
use crate::{
chatlist::Chatlist,
config::Config,
constants::Blocked,
constants::{Blocked, DC_DESIRED_TEXT_LEN, DC_ELLIPSIS},
message::{Message, MessageState, MessengerMessage},
receive_imf::receive_imf,
test_utils::TestContext,

View File

@@ -49,6 +49,65 @@ pub(crate) fn truncate(buf: &str, approx_chars: usize) -> Cow<str> {
}
}
/// Shortens a string to a specified line count and adds "[...]" to the
/// end of the shortened string.
///
/// returns tuple with the String and a boolean whether is was truncated
pub(crate) fn truncate_by_lines(
buf: String,
max_lines: usize,
max_line_len: usize,
) -> (String, bool) {
let mut lines = 0;
let mut line_chars = 0;
let mut break_point: Option<usize> = None;
for (index, char) in buf.char_indices() {
if char == '\n' {
line_chars = 0;
lines += 1;
} else {
line_chars += 1;
if line_chars > max_line_len {
line_chars = 1;
lines += 1;
}
}
if lines == max_lines {
break_point = Some(index);
break;
}
}
if let Some(end_pos) = break_point {
// Text has too many lines and needs to be truncated.
let text = {
if let Some(buffer) = buf.get(..end_pos) {
if let Some(index) = buffer.rfind(|c| c == ' ' || c == '\n') {
buf.get(..=index)
} else {
buf.get(..end_pos)
}
} else {
None
}
};
if let Some(truncated_text) = text {
(format!("{}{}", truncated_text, DC_ELLIPSIS), true)
} else {
// In case of indexing/slicing error, we return an error
// message as a preview and add HTML version. This should
// never happen.
let error_text = "[Truncation of the message failed, this is a bug in the Delta Chat core. Please report it.\nYou can still open the full text to view the original message.]";
(error_text.to_string(), true)
}
} else {
// text is unchanged
(buf, false)
}
}
/* ******************************************************************************
* date/time tools
******************************************************************************/
@@ -744,6 +803,79 @@ Hop: From: hq5.example.org; By: hq5.example.org; Date: Mon, 27 Dec 2021 11:21:22
);
}
mod truncate_by_lines {
use super::*;
#[test]
fn test_just_text() {
let s = "this is a little test string".to_string();
assert_eq!(
truncate_by_lines(s, 4, 6),
("this is a little test [...]".to_string(), true)
);
}
#[test]
fn test_with_linebreaks() {
let s = "this\n is\n a little test string".to_string();
assert_eq!(
truncate_by_lines(s, 4, 6),
("this\n is\n a little [...]".to_string(), true)
);
}
#[test]
fn test_only_linebreaks() {
let s = "\n\n\n\n\n\n\n".to_string();
assert_eq!(
truncate_by_lines(s, 4, 5),
("\n\n\n[...]".to_string(), true)
);
}
#[test]
fn limit_hits_end() {
let s = "hello\n world !".to_string();
assert_eq!(
truncate_by_lines(s, 2, 8),
("hello\n world !".to_string(), false)
);
}
#[test]
fn test_edge() {
assert_eq!(
truncate_by_lines("".to_string(), 2, 4),
("".to_string(), false)
);
assert_eq!(
truncate_by_lines("\n hello \n world".to_string(), 2, 4),
("\n [...]".to_string(), true)
);
assert_eq!(
truncate_by_lines("𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ".to_string(), 1, 2),
("𐠈0[...]".to_string(), true)
);
assert_eq!(
truncate_by_lines("𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ".to_string(), 1, 0),
("[...]".to_string(), true)
);
// 9 characters, so no truncation
assert_eq!(
truncate_by_lines("𑒀ὐ¢🜀\u{1e01b}A a🟠".to_string(), 1, 12),
("𑒀ὐ¢🜀\u{1e01b}A a🟠".to_string(), false),
);
// 12 characters, truncation
assert_eq!(
truncate_by_lines("𑒀ὐ¢🜀\u{1e01b}A a🟠bcd".to_string(), 1, 7),
("𑒀ὐ¢🜀\u{1e01b}A [...]".to_string(), true),
);
}
}
#[test]
fn test_create_id() {
let buf = create_id();