Reduce message length limit to 5000 chars (#2615)

- Use the same limit for info: full text can be read in HTML anyway.
- Remove DC_MAX_GET_{TEXT,INFO}_LEN constants from deltachat.h
- Fix a typo: s/DC_ELLIPSE/DC_ELLIPSIS/
- Do not truncate the text when loading from the database.
- Update the documentation: limit is in Rust chars, not bytes
This commit is contained in:
link2xt
2021-08-21 22:02:14 +03:00
committed by GitHub
parent d0bfb555dd
commit 3440daca1a
5 changed files with 30 additions and 58 deletions

View File

@@ -3220,10 +3220,6 @@ int64_t dc_chat_get_remaining_mute_duration (const dc_chat_t* chat);
#define DC_STATE_OUT_MDN_RCVD 28
#define DC_MAX_GET_TEXT_LEN 30000 // approx. max. length returned by dc_msg_get_text()
#define DC_MAX_GET_INFO_LEN 100000 // approx. max. length returned by dc_get_msg_info()
/**
* Create new message object. Message objects are needed e.g. for sending messages using
* dc_send_msg(). Moreover, they are returned e.g. from dc_get_msg(),

View File

@@ -165,36 +165,18 @@ pub const DC_MSG_ID_MARKER1: u32 = 1;
pub const DC_MSG_ID_DAYMARKER: u32 = 9;
pub const DC_MSG_ID_LAST_SPECIAL: u32 = 9;
/// string that indicates sth. is left out or truncated
pub const DC_ELLIPSE: &str = "[...]";
/// String that indicates that something is left out or truncated.
pub const DC_ELLIPSIS: &str = "[...]";
/// to keep bubbles and chat flow usable,
/// and to avoid problems with controls using very long texts,
/// we limit the text length to DC_DESIRED_TEXT_LEN.
/// if the text is longer, the full text can be retrieved using has_html()/get_html().
/// Message length limit.
///
/// we are using a bit less than DC_MAX_GET_TEXT_LEN to avoid cutting twice
/// (a bit less as truncation may not be exact and ellipses may be added).
/// To keep bubbles and chat flow usable and to avoid problems with controls using very long texts,
/// we limit the text length to `DC_DESIRED_TEXT_LEN`. If the text is longer, the full text can be
/// retrieved using has_html()/get_html().
///
/// note, that DC_DESIRED_TEXT_LEN and DC_MAX_GET_TEXT_LEN
/// define max. number of bytes, _not_ unicode graphemes.
/// in general, that seems to be okay for such an upper limit,
/// esp. as calculating the number of graphemes is not simple
/// (one graphemes may be a sequence of code points which is a sequence of bytes).
/// also even if we have the exact number of graphemes,
/// that would not always help on getting an idea about the screen space used
/// (to keep bubbles and chat flow usable).
///
/// therefore, the number of bytes is only a very rough estimation,
/// however, the ~30K seems to work okayish for a while,
/// if it turns out, it is too few for some alphabet, we can still increase.
pub const DC_DESIRED_TEXT_LEN: usize = 29_000;
/// approx. max. length (number of bytes) returned by dc_msg_get_text()
pub const DC_MAX_GET_TEXT_LEN: usize = 30_000;
/// approx. max. length returned by dc_get_msg_info()
pub const DC_MAX_GET_INFO_LEN: usize = 100_000;
/// Note that for simplicity maximum length is defined as the number of Unicode Scalar Values (Rust
/// `char`s), not Unicode Grapheme Clusters.
pub const DC_DESIRED_TEXT_LEN: usize = 5000;
pub const DC_CONTACT_ID_UNDEFINED: u32 = 0;
pub const DC_CONTACT_ID_SELF: u32 = 1;

View File

@@ -17,7 +17,7 @@ use chrono::{Local, TimeZone};
use rand::{thread_rng, Rng};
use crate::chat::{add_device_msg, add_device_msg_with_importance};
use crate::constants::{Viewtype, DC_ELLIPSE, DC_OUTDATED_WARNING_DAYS};
use crate::constants::{Viewtype, DC_ELLIPSIS, DC_OUTDATED_WARNING_DAYS};
use crate::context::Context;
use crate::events::EventType;
use crate::message::Message;
@@ -29,7 +29,7 @@ use crate::stock_str;
#[allow(clippy::indexing_slicing)]
pub(crate) fn dc_truncate(buf: &str, approx_chars: usize) -> Cow<str> {
let count = buf.chars().count();
if approx_chars > 0 && count > approx_chars + DC_ELLIPSE.len() {
if count > approx_chars + DC_ELLIPSIS.len() {
let end_pos = buf
.char_indices()
.nth(approx_chars)
@@ -37,9 +37,9 @@ pub(crate) fn dc_truncate(buf: &str, approx_chars: usize) -> Cow<str> {
.unwrap_or_default();
if let Some(index) = buf[..end_pos].rfind(|c| c == ' ' || c == '\n') {
Cow::Owned(format!("{}{}", &buf[..=index], DC_ELLIPSE))
Cow::Owned(format!("{}{}", &buf[..=index], DC_ELLIPSIS))
} else {
Cow::Owned(format!("{}{}", &buf[..end_pos], DC_ELLIPSE))
Cow::Owned(format!("{}{}", &buf[..end_pos], DC_ELLIPSIS))
}
} else {
Cow::Borrowed(buf)
@@ -711,10 +711,7 @@ mod tests {
assert_eq!(dc_truncate("\n hello \n world", 4), "\n [...]");
assert_eq!(dc_truncate("𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ", 1), "𐠈[...]");
assert_eq!(
dc_truncate("𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ", 0),
"𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ"
);
assert_eq!(dc_truncate("𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ", 0), "[...]");
// 9 characters, so no truncation
assert_eq!(dc_truncate("𑒀ὐ¢🜀\u{1e01b}A a🟠", 6), "𑒀ὐ¢🜀\u{1e01b}A a🟠",);

View File

@@ -14,7 +14,7 @@ use crate::chat::{self, Chat, ChatId};
use crate::config::Config;
use crate::constants::{
Blocked, Chattype, VideochatType, Viewtype, DC_CHAT_ID_TRASH, DC_CONTACT_ID_INFO,
DC_CONTACT_ID_SELF, DC_MAX_GET_INFO_LEN, DC_MAX_GET_TEXT_LEN, DC_MSG_ID_LAST_SPECIAL,
DC_CONTACT_ID_SELF, DC_DESIRED_TEXT_LEN, DC_MSG_ID_LAST_SPECIAL,
};
use crate::contact::{Contact, Origin};
use crate::context::Context;
@@ -541,9 +541,7 @@ impl Message {
}
pub fn get_text(&self) -> Option<String> {
self.text
.as_ref()
.map(|text| dc_truncate(text, DC_MAX_GET_TEXT_LEN).to_string())
self.text.as_ref().map(|s| s.to_string())
}
pub fn get_subject(&self) -> &str {
@@ -1142,7 +1140,7 @@ pub async fn get_msg_info(context: &Context, msg_id: MsgId) -> Result<String> {
return Ok(ret);
}
let rawtxt = rawtxt.unwrap_or_default();
let rawtxt = dc_truncate(rawtxt.trim(), DC_MAX_GET_INFO_LEN);
let rawtxt = dc_truncate(rawtxt.trim(), DC_DESIRED_TEXT_LEN);
let fts = dc_timestamp_to_str(msg.get_timestamp());
ret += &format!("Sent: {}", fts);

View File

@@ -10,7 +10,7 @@ use once_cell::sync::Lazy;
use crate::aheader::Aheader;
use crate::blob::BlobObject;
use crate::constants::{Viewtype, DC_DESIRED_TEXT_LEN, DC_ELLIPSE};
use crate::constants::{Viewtype, DC_DESIRED_TEXT_LEN, DC_ELLIPSIS};
use crate::contact::addr_normalize;
use crate::context::Context;
use crate::dc_tools::{dc_get_filemeta, dc_truncate};
@@ -903,13 +903,14 @@ impl MimeMessage {
(simplified_txt, top_quote)
};
let simplified_txt =
if simplified_txt.len() > DC_DESIRED_TEXT_LEN + DC_ELLIPSE.len() {
self.is_mime_modified = true;
dc_truncate(&*simplified_txt, DC_DESIRED_TEXT_LEN).to_string()
} else {
simplified_txt
};
let simplified_txt = if simplified_txt.chars().count()
> DC_DESIRED_TEXT_LEN + DC_ELLIPSIS.len()
{
self.is_mime_modified = true;
dc_truncate(&*simplified_txt, DC_DESIRED_TEXT_LEN).to_string()
} else {
simplified_txt
};
if !simplified_txt.is_empty() || simplified_quote.is_some() {
let mut part = Part {
@@ -1604,7 +1605,6 @@ mod tests {
#![allow(clippy::indexing_slicing)]
use super::*;
use crate::constants::DC_MAX_GET_TEXT_LEN;
use crate::{
chatlist::Chatlist,
config::Config,
@@ -2880,21 +2880,20 @@ On 2020-10-25, Bob wrote:
let t = TestContext::new().await;
static REPEAT_TXT: &str = "this text with 42 chars is just repeated.\n";
static REPEAT_CNT: usize = 2000; // results in a text of 84k, should be more than DC_MAX_GET_TEXT_LEN
static REPEAT_CNT: usize = 2000; // results in a text of 84k, should be more than DC_DESIRED_TEXT_LEN
let long_txt = format!("From: alice@c.de\n\n{}", REPEAT_TXT.repeat(REPEAT_CNT));
assert!(DC_DESIRED_TEXT_LEN + DC_ELLIPSE.len() < DC_MAX_GET_TEXT_LEN);
let mimemsg = MimeMessage::from_bytes(&t, long_txt.as_ref())
.await
.unwrap();
assert_eq!(long_txt.matches("just repeated").count(), REPEAT_CNT);
assert!(long_txt.len() > DC_MAX_GET_TEXT_LEN);
assert!(long_txt.len() > DC_DESIRED_TEXT_LEN);
assert!(mimemsg.is_mime_modified);
assert!(
mimemsg.parts[0].msg.matches("just repeated").count()
< DC_MAX_GET_TEXT_LEN / REPEAT_TXT.len()
<= DC_DESIRED_TEXT_LEN / REPEAT_TXT.len()
);
assert!(mimemsg.parts[0].msg.len() <= DC_MAX_GET_TEXT_LEN);
assert!(mimemsg.parts[0].msg.len() <= DC_DESIRED_TEXT_LEN + DC_ELLIPSIS.len());
}
#[async_std::test]