Reduce message length limit to 5000 chars (#2615)

- Use the same limit for info: full text can be read in HTML anyway.
- Remove DC_MAX_GET_{TEXT,INFO}_LEN constants from deltachat.h
- Fix a typo: s/DC_ELLIPSE/DC_ELLIPSIS/
- Do not truncate the text when loading from the database.
- Update the documentation: limit is in Rust chars, not bytes
This commit is contained in:
link2xt
2021-08-21 22:02:14 +03:00
committed by GitHub
parent d0bfb555dd
commit 3440daca1a
5 changed files with 30 additions and 58 deletions

View File

@@ -3220,10 +3220,6 @@ int64_t dc_chat_get_remaining_mute_duration (const dc_chat_t* chat);
#define DC_STATE_OUT_MDN_RCVD 28 #define DC_STATE_OUT_MDN_RCVD 28
#define DC_MAX_GET_TEXT_LEN 30000 // approx. max. length returned by dc_msg_get_text()
#define DC_MAX_GET_INFO_LEN 100000 // approx. max. length returned by dc_get_msg_info()
/** /**
* Create new message object. Message objects are needed e.g. for sending messages using * Create new message object. Message objects are needed e.g. for sending messages using
* dc_send_msg(). Moreover, they are returned e.g. from dc_get_msg(), * dc_send_msg(). Moreover, they are returned e.g. from dc_get_msg(),

View File

@@ -165,36 +165,18 @@ pub const DC_MSG_ID_MARKER1: u32 = 1;
pub const DC_MSG_ID_DAYMARKER: u32 = 9; pub const DC_MSG_ID_DAYMARKER: u32 = 9;
pub const DC_MSG_ID_LAST_SPECIAL: u32 = 9; pub const DC_MSG_ID_LAST_SPECIAL: u32 = 9;
/// string that indicates sth. is left out or truncated /// String that indicates that something is left out or truncated.
pub const DC_ELLIPSE: &str = "[...]"; pub const DC_ELLIPSIS: &str = "[...]";
/// to keep bubbles and chat flow usable, /// Message length limit.
/// and to avoid problems with controls using very long texts,
/// we limit the text length to DC_DESIRED_TEXT_LEN.
/// if the text is longer, the full text can be retrieved using has_html()/get_html().
/// ///
/// we are using a bit less than DC_MAX_GET_TEXT_LEN to avoid cutting twice /// To keep bubbles and chat flow usable and to avoid problems with controls using very long texts,
/// (a bit less as truncation may not be exact and ellipses may be added). /// we limit the text length to `DC_DESIRED_TEXT_LEN`. If the text is longer, the full text can be
/// retrieved using has_html()/get_html().
/// ///
/// note, that DC_DESIRED_TEXT_LEN and DC_MAX_GET_TEXT_LEN /// Note that for simplicity maximum length is defined as the number of Unicode Scalar Values (Rust
/// define max. number of bytes, _not_ unicode graphemes. /// `char`s), not Unicode Grapheme Clusters.
/// in general, that seems to be okay for such an upper limit, pub const DC_DESIRED_TEXT_LEN: usize = 5000;
/// esp. as calculating the number of graphemes is not simple
/// (one graphemes may be a sequence of code points which is a sequence of bytes).
/// also even if we have the exact number of graphemes,
/// that would not always help on getting an idea about the screen space used
/// (to keep bubbles and chat flow usable).
///
/// therefore, the number of bytes is only a very rough estimation,
/// however, the ~30K seems to work okayish for a while,
/// if it turns out, it is too few for some alphabet, we can still increase.
pub const DC_DESIRED_TEXT_LEN: usize = 29_000;
/// approx. max. length (number of bytes) returned by dc_msg_get_text()
pub const DC_MAX_GET_TEXT_LEN: usize = 30_000;
/// approx. max. length returned by dc_get_msg_info()
pub const DC_MAX_GET_INFO_LEN: usize = 100_000;
pub const DC_CONTACT_ID_UNDEFINED: u32 = 0; pub const DC_CONTACT_ID_UNDEFINED: u32 = 0;
pub const DC_CONTACT_ID_SELF: u32 = 1; pub const DC_CONTACT_ID_SELF: u32 = 1;

View File

@@ -17,7 +17,7 @@ use chrono::{Local, TimeZone};
use rand::{thread_rng, Rng}; use rand::{thread_rng, Rng};
use crate::chat::{add_device_msg, add_device_msg_with_importance}; use crate::chat::{add_device_msg, add_device_msg_with_importance};
use crate::constants::{Viewtype, DC_ELLIPSE, DC_OUTDATED_WARNING_DAYS}; use crate::constants::{Viewtype, DC_ELLIPSIS, DC_OUTDATED_WARNING_DAYS};
use crate::context::Context; use crate::context::Context;
use crate::events::EventType; use crate::events::EventType;
use crate::message::Message; use crate::message::Message;
@@ -29,7 +29,7 @@ use crate::stock_str;
#[allow(clippy::indexing_slicing)] #[allow(clippy::indexing_slicing)]
pub(crate) fn dc_truncate(buf: &str, approx_chars: usize) -> Cow<str> { pub(crate) fn dc_truncate(buf: &str, approx_chars: usize) -> Cow<str> {
let count = buf.chars().count(); let count = buf.chars().count();
if approx_chars > 0 && count > approx_chars + DC_ELLIPSE.len() { if count > approx_chars + DC_ELLIPSIS.len() {
let end_pos = buf let end_pos = buf
.char_indices() .char_indices()
.nth(approx_chars) .nth(approx_chars)
@@ -37,9 +37,9 @@ pub(crate) fn dc_truncate(buf: &str, approx_chars: usize) -> Cow<str> {
.unwrap_or_default(); .unwrap_or_default();
if let Some(index) = buf[..end_pos].rfind(|c| c == ' ' || c == '\n') { if let Some(index) = buf[..end_pos].rfind(|c| c == ' ' || c == '\n') {
Cow::Owned(format!("{}{}", &buf[..=index], DC_ELLIPSE)) Cow::Owned(format!("{}{}", &buf[..=index], DC_ELLIPSIS))
} else { } else {
Cow::Owned(format!("{}{}", &buf[..end_pos], DC_ELLIPSE)) Cow::Owned(format!("{}{}", &buf[..end_pos], DC_ELLIPSIS))
} }
} else { } else {
Cow::Borrowed(buf) Cow::Borrowed(buf)
@@ -711,10 +711,7 @@ mod tests {
assert_eq!(dc_truncate("\n hello \n world", 4), "\n [...]"); assert_eq!(dc_truncate("\n hello \n world", 4), "\n [...]");
assert_eq!(dc_truncate("𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ", 1), "𐠈[...]"); assert_eq!(dc_truncate("𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ", 1), "𐠈[...]");
assert_eq!( assert_eq!(dc_truncate("𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ", 0), "[...]");
dc_truncate("𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ", 0),
"𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ"
);
// 9 characters, so no truncation // 9 characters, so no truncation
assert_eq!(dc_truncate("𑒀ὐ¢🜀\u{1e01b}A a🟠", 6), "𑒀ὐ¢🜀\u{1e01b}A a🟠",); assert_eq!(dc_truncate("𑒀ὐ¢🜀\u{1e01b}A a🟠", 6), "𑒀ὐ¢🜀\u{1e01b}A a🟠",);

View File

@@ -14,7 +14,7 @@ use crate::chat::{self, Chat, ChatId};
use crate::config::Config; use crate::config::Config;
use crate::constants::{ use crate::constants::{
Blocked, Chattype, VideochatType, Viewtype, DC_CHAT_ID_TRASH, DC_CONTACT_ID_INFO, Blocked, Chattype, VideochatType, Viewtype, DC_CHAT_ID_TRASH, DC_CONTACT_ID_INFO,
DC_CONTACT_ID_SELF, DC_MAX_GET_INFO_LEN, DC_MAX_GET_TEXT_LEN, DC_MSG_ID_LAST_SPECIAL, DC_CONTACT_ID_SELF, DC_DESIRED_TEXT_LEN, DC_MSG_ID_LAST_SPECIAL,
}; };
use crate::contact::{Contact, Origin}; use crate::contact::{Contact, Origin};
use crate::context::Context; use crate::context::Context;
@@ -541,9 +541,7 @@ impl Message {
} }
pub fn get_text(&self) -> Option<String> { pub fn get_text(&self) -> Option<String> {
self.text self.text.as_ref().map(|s| s.to_string())
.as_ref()
.map(|text| dc_truncate(text, DC_MAX_GET_TEXT_LEN).to_string())
} }
pub fn get_subject(&self) -> &str { pub fn get_subject(&self) -> &str {
@@ -1142,7 +1140,7 @@ pub async fn get_msg_info(context: &Context, msg_id: MsgId) -> Result<String> {
return Ok(ret); return Ok(ret);
} }
let rawtxt = rawtxt.unwrap_or_default(); let rawtxt = rawtxt.unwrap_or_default();
let rawtxt = dc_truncate(rawtxt.trim(), DC_MAX_GET_INFO_LEN); let rawtxt = dc_truncate(rawtxt.trim(), DC_DESIRED_TEXT_LEN);
let fts = dc_timestamp_to_str(msg.get_timestamp()); let fts = dc_timestamp_to_str(msg.get_timestamp());
ret += &format!("Sent: {}", fts); ret += &format!("Sent: {}", fts);

View File

@@ -10,7 +10,7 @@ use once_cell::sync::Lazy;
use crate::aheader::Aheader; use crate::aheader::Aheader;
use crate::blob::BlobObject; use crate::blob::BlobObject;
use crate::constants::{Viewtype, DC_DESIRED_TEXT_LEN, DC_ELLIPSE}; use crate::constants::{Viewtype, DC_DESIRED_TEXT_LEN, DC_ELLIPSIS};
use crate::contact::addr_normalize; use crate::contact::addr_normalize;
use crate::context::Context; use crate::context::Context;
use crate::dc_tools::{dc_get_filemeta, dc_truncate}; use crate::dc_tools::{dc_get_filemeta, dc_truncate};
@@ -903,13 +903,14 @@ impl MimeMessage {
(simplified_txt, top_quote) (simplified_txt, top_quote)
}; };
let simplified_txt = let simplified_txt = if simplified_txt.chars().count()
if simplified_txt.len() > DC_DESIRED_TEXT_LEN + DC_ELLIPSE.len() { > DC_DESIRED_TEXT_LEN + DC_ELLIPSIS.len()
self.is_mime_modified = true; {
dc_truncate(&*simplified_txt, DC_DESIRED_TEXT_LEN).to_string() self.is_mime_modified = true;
} else { dc_truncate(&*simplified_txt, DC_DESIRED_TEXT_LEN).to_string()
simplified_txt } else {
}; simplified_txt
};
if !simplified_txt.is_empty() || simplified_quote.is_some() { if !simplified_txt.is_empty() || simplified_quote.is_some() {
let mut part = Part { let mut part = Part {
@@ -1604,7 +1605,6 @@ mod tests {
#![allow(clippy::indexing_slicing)] #![allow(clippy::indexing_slicing)]
use super::*; use super::*;
use crate::constants::DC_MAX_GET_TEXT_LEN;
use crate::{ use crate::{
chatlist::Chatlist, chatlist::Chatlist,
config::Config, config::Config,
@@ -2880,21 +2880,20 @@ On 2020-10-25, Bob wrote:
let t = TestContext::new().await; let t = TestContext::new().await;
static REPEAT_TXT: &str = "this text with 42 chars is just repeated.\n"; static REPEAT_TXT: &str = "this text with 42 chars is just repeated.\n";
static REPEAT_CNT: usize = 2000; // results in a text of 84k, should be more than DC_MAX_GET_TEXT_LEN static REPEAT_CNT: usize = 2000; // results in a text of 84k, should be more than DC_DESIRED_TEXT_LEN
let long_txt = format!("From: alice@c.de\n\n{}", REPEAT_TXT.repeat(REPEAT_CNT)); let long_txt = format!("From: alice@c.de\n\n{}", REPEAT_TXT.repeat(REPEAT_CNT));
assert!(DC_DESIRED_TEXT_LEN + DC_ELLIPSE.len() < DC_MAX_GET_TEXT_LEN);
let mimemsg = MimeMessage::from_bytes(&t, long_txt.as_ref()) let mimemsg = MimeMessage::from_bytes(&t, long_txt.as_ref())
.await .await
.unwrap(); .unwrap();
assert_eq!(long_txt.matches("just repeated").count(), REPEAT_CNT); assert_eq!(long_txt.matches("just repeated").count(), REPEAT_CNT);
assert!(long_txt.len() > DC_MAX_GET_TEXT_LEN); assert!(long_txt.len() > DC_DESIRED_TEXT_LEN);
assert!(mimemsg.is_mime_modified); assert!(mimemsg.is_mime_modified);
assert!( assert!(
mimemsg.parts[0].msg.matches("just repeated").count() mimemsg.parts[0].msg.matches("just repeated").count()
< DC_MAX_GET_TEXT_LEN / REPEAT_TXT.len() <= DC_DESIRED_TEXT_LEN / REPEAT_TXT.len()
); );
assert!(mimemsg.parts[0].msg.len() <= DC_MAX_GET_TEXT_LEN); assert!(mimemsg.parts[0].msg.len() <= DC_DESIRED_TEXT_LEN + DC_ELLIPSIS.len());
} }
#[async_std::test] #[async_std::test]