From 3440daca1ab0e95d7c4617b5e6a84d204f11cbc1 Mon Sep 17 00:00:00 2001 From: link2xt Date: Sat, 21 Aug 2021 22:02:14 +0300 Subject: [PATCH] Reduce message length limit to 5000 chars (#2615) - Use the same limit for info: full text can be read in HTML anyway. - Remove DC_MAX_GET_{TEXT,INFO}_LEN constants from deltachat.h - Fix a typo: s/DC_ELLIPSE/DC_ELLIPSIS/ - Do not truncate the text when loading from the database. - Update the documentation: limit is in Rust chars, not bytes --- deltachat-ffi/deltachat.h | 4 ---- src/constants.rs | 36 +++++++++--------------------------- src/dc_tools.rs | 13 +++++-------- src/message.rs | 8 +++----- src/mimeparser.rs | 27 +++++++++++++-------------- 5 files changed, 30 insertions(+), 58 deletions(-) diff --git a/deltachat-ffi/deltachat.h b/deltachat-ffi/deltachat.h index 8bc5724a6..8255d8b5c 100644 --- a/deltachat-ffi/deltachat.h +++ b/deltachat-ffi/deltachat.h @@ -3220,10 +3220,6 @@ int64_t dc_chat_get_remaining_mute_duration (const dc_chat_t* chat); #define DC_STATE_OUT_MDN_RCVD 28 -#define DC_MAX_GET_TEXT_LEN 30000 // approx. max. length returned by dc_msg_get_text() -#define DC_MAX_GET_INFO_LEN 100000 // approx. max. length returned by dc_get_msg_info() - - /** * Create new message object. Message objects are needed e.g. for sending messages using * dc_send_msg(). Moreover, they are returned e.g. from dc_get_msg(), diff --git a/src/constants.rs b/src/constants.rs index 6f32d899a..821a1796a 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -165,36 +165,18 @@ pub const DC_MSG_ID_MARKER1: u32 = 1; pub const DC_MSG_ID_DAYMARKER: u32 = 9; pub const DC_MSG_ID_LAST_SPECIAL: u32 = 9; -/// string that indicates sth. is left out or truncated -pub const DC_ELLIPSE: &str = "[...]"; +/// String that indicates that something is left out or truncated. +pub const DC_ELLIPSIS: &str = "[...]"; -/// to keep bubbles and chat flow usable, -/// and to avoid problems with controls using very long texts, -/// we limit the text length to DC_DESIRED_TEXT_LEN. -/// if the text is longer, the full text can be retrieved using has_html()/get_html(). +/// Message length limit. /// -/// we are using a bit less than DC_MAX_GET_TEXT_LEN to avoid cutting twice -/// (a bit less as truncation may not be exact and ellipses may be added). +/// To keep bubbles and chat flow usable and to avoid problems with controls using very long texts, +/// we limit the text length to `DC_DESIRED_TEXT_LEN`. If the text is longer, the full text can be +/// retrieved using has_html()/get_html(). /// -/// note, that DC_DESIRED_TEXT_LEN and DC_MAX_GET_TEXT_LEN -/// define max. number of bytes, _not_ unicode graphemes. -/// in general, that seems to be okay for such an upper limit, -/// esp. as calculating the number of graphemes is not simple -/// (one graphemes may be a sequence of code points which is a sequence of bytes). -/// also even if we have the exact number of graphemes, -/// that would not always help on getting an idea about the screen space used -/// (to keep bubbles and chat flow usable). -/// -/// therefore, the number of bytes is only a very rough estimation, -/// however, the ~30K seems to work okayish for a while, -/// if it turns out, it is too few for some alphabet, we can still increase. -pub const DC_DESIRED_TEXT_LEN: usize = 29_000; - -/// approx. max. length (number of bytes) returned by dc_msg_get_text() -pub const DC_MAX_GET_TEXT_LEN: usize = 30_000; - -/// approx. max. length returned by dc_get_msg_info() -pub const DC_MAX_GET_INFO_LEN: usize = 100_000; +/// Note that for simplicity maximum length is defined as the number of Unicode Scalar Values (Rust +/// `char`s), not Unicode Grapheme Clusters. +pub const DC_DESIRED_TEXT_LEN: usize = 5000; pub const DC_CONTACT_ID_UNDEFINED: u32 = 0; pub const DC_CONTACT_ID_SELF: u32 = 1; diff --git a/src/dc_tools.rs b/src/dc_tools.rs index 6f3356326..b480973cc 100644 --- a/src/dc_tools.rs +++ b/src/dc_tools.rs @@ -17,7 +17,7 @@ use chrono::{Local, TimeZone}; use rand::{thread_rng, Rng}; use crate::chat::{add_device_msg, add_device_msg_with_importance}; -use crate::constants::{Viewtype, DC_ELLIPSE, DC_OUTDATED_WARNING_DAYS}; +use crate::constants::{Viewtype, DC_ELLIPSIS, DC_OUTDATED_WARNING_DAYS}; use crate::context::Context; use crate::events::EventType; use crate::message::Message; @@ -29,7 +29,7 @@ use crate::stock_str; #[allow(clippy::indexing_slicing)] pub(crate) fn dc_truncate(buf: &str, approx_chars: usize) -> Cow { let count = buf.chars().count(); - if approx_chars > 0 && count > approx_chars + DC_ELLIPSE.len() { + if count > approx_chars + DC_ELLIPSIS.len() { let end_pos = buf .char_indices() .nth(approx_chars) @@ -37,9 +37,9 @@ pub(crate) fn dc_truncate(buf: &str, approx_chars: usize) -> Cow { .unwrap_or_default(); if let Some(index) = buf[..end_pos].rfind(|c| c == ' ' || c == '\n') { - Cow::Owned(format!("{}{}", &buf[..=index], DC_ELLIPSE)) + Cow::Owned(format!("{}{}", &buf[..=index], DC_ELLIPSIS)) } else { - Cow::Owned(format!("{}{}", &buf[..end_pos], DC_ELLIPSE)) + Cow::Owned(format!("{}{}", &buf[..end_pos], DC_ELLIPSIS)) } } else { Cow::Borrowed(buf) @@ -711,10 +711,7 @@ mod tests { assert_eq!(dc_truncate("\n hello \n world", 4), "\n [...]"); assert_eq!(dc_truncate("𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ", 1), "𐠈[...]"); - assert_eq!( - dc_truncate("𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ", 0), - "𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ" - ); + assert_eq!(dc_truncate("𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ", 0), "[...]"); // 9 characters, so no truncation assert_eq!(dc_truncate("𑒀ὐ¢🜀\u{1e01b}A a🟠", 6), "𑒀ὐ¢🜀\u{1e01b}A a🟠",); diff --git a/src/message.rs b/src/message.rs index 9f293bb03..8f5905624 100644 --- a/src/message.rs +++ b/src/message.rs @@ -14,7 +14,7 @@ use crate::chat::{self, Chat, ChatId}; use crate::config::Config; use crate::constants::{ Blocked, Chattype, VideochatType, Viewtype, DC_CHAT_ID_TRASH, DC_CONTACT_ID_INFO, - DC_CONTACT_ID_SELF, DC_MAX_GET_INFO_LEN, DC_MAX_GET_TEXT_LEN, DC_MSG_ID_LAST_SPECIAL, + DC_CONTACT_ID_SELF, DC_DESIRED_TEXT_LEN, DC_MSG_ID_LAST_SPECIAL, }; use crate::contact::{Contact, Origin}; use crate::context::Context; @@ -541,9 +541,7 @@ impl Message { } pub fn get_text(&self) -> Option { - self.text - .as_ref() - .map(|text| dc_truncate(text, DC_MAX_GET_TEXT_LEN).to_string()) + self.text.as_ref().map(|s| s.to_string()) } pub fn get_subject(&self) -> &str { @@ -1142,7 +1140,7 @@ pub async fn get_msg_info(context: &Context, msg_id: MsgId) -> Result { return Ok(ret); } let rawtxt = rawtxt.unwrap_or_default(); - let rawtxt = dc_truncate(rawtxt.trim(), DC_MAX_GET_INFO_LEN); + let rawtxt = dc_truncate(rawtxt.trim(), DC_DESIRED_TEXT_LEN); let fts = dc_timestamp_to_str(msg.get_timestamp()); ret += &format!("Sent: {}", fts); diff --git a/src/mimeparser.rs b/src/mimeparser.rs index 57173882e..9978047a8 100644 --- a/src/mimeparser.rs +++ b/src/mimeparser.rs @@ -10,7 +10,7 @@ use once_cell::sync::Lazy; use crate::aheader::Aheader; use crate::blob::BlobObject; -use crate::constants::{Viewtype, DC_DESIRED_TEXT_LEN, DC_ELLIPSE}; +use crate::constants::{Viewtype, DC_DESIRED_TEXT_LEN, DC_ELLIPSIS}; use crate::contact::addr_normalize; use crate::context::Context; use crate::dc_tools::{dc_get_filemeta, dc_truncate}; @@ -903,13 +903,14 @@ impl MimeMessage { (simplified_txt, top_quote) }; - let simplified_txt = - if simplified_txt.len() > DC_DESIRED_TEXT_LEN + DC_ELLIPSE.len() { - self.is_mime_modified = true; - dc_truncate(&*simplified_txt, DC_DESIRED_TEXT_LEN).to_string() - } else { - simplified_txt - }; + let simplified_txt = if simplified_txt.chars().count() + > DC_DESIRED_TEXT_LEN + DC_ELLIPSIS.len() + { + self.is_mime_modified = true; + dc_truncate(&*simplified_txt, DC_DESIRED_TEXT_LEN).to_string() + } else { + simplified_txt + }; if !simplified_txt.is_empty() || simplified_quote.is_some() { let mut part = Part { @@ -1604,7 +1605,6 @@ mod tests { #![allow(clippy::indexing_slicing)] use super::*; - use crate::constants::DC_MAX_GET_TEXT_LEN; use crate::{ chatlist::Chatlist, config::Config, @@ -2880,21 +2880,20 @@ On 2020-10-25, Bob wrote: let t = TestContext::new().await; static REPEAT_TXT: &str = "this text with 42 chars is just repeated.\n"; - static REPEAT_CNT: usize = 2000; // results in a text of 84k, should be more than DC_MAX_GET_TEXT_LEN + static REPEAT_CNT: usize = 2000; // results in a text of 84k, should be more than DC_DESIRED_TEXT_LEN let long_txt = format!("From: alice@c.de\n\n{}", REPEAT_TXT.repeat(REPEAT_CNT)); - assert!(DC_DESIRED_TEXT_LEN + DC_ELLIPSE.len() < DC_MAX_GET_TEXT_LEN); let mimemsg = MimeMessage::from_bytes(&t, long_txt.as_ref()) .await .unwrap(); assert_eq!(long_txt.matches("just repeated").count(), REPEAT_CNT); - assert!(long_txt.len() > DC_MAX_GET_TEXT_LEN); + assert!(long_txt.len() > DC_DESIRED_TEXT_LEN); assert!(mimemsg.is_mime_modified); assert!( mimemsg.parts[0].msg.matches("just repeated").count() - < DC_MAX_GET_TEXT_LEN / REPEAT_TXT.len() + <= DC_DESIRED_TEXT_LEN / REPEAT_TXT.len() ); - assert!(mimemsg.parts[0].msg.len() <= DC_MAX_GET_TEXT_LEN); + assert!(mimemsg.parts[0].msg.len() <= DC_DESIRED_TEXT_LEN + DC_ELLIPSIS.len()); } #[async_std::test]