mirror of
https://github.com/chatmail/core.git
synced 2026-05-04 05:46:29 +03:00
Reduce message length limit to 5000 chars (#2615)
- Use the same limit for info: full text can be read in HTML anyway.
- Remove DC_MAX_GET_{TEXT,INFO}_LEN constants from deltachat.h
- Fix a typo: s/DC_ELLIPSE/DC_ELLIPSIS/
- Do not truncate the text when loading from the database.
- Update the documentation: limit is in Rust chars, not bytes
This commit is contained in:
@@ -3220,10 +3220,6 @@ int64_t dc_chat_get_remaining_mute_duration (const dc_chat_t* chat);
|
||||
#define DC_STATE_OUT_MDN_RCVD 28
|
||||
|
||||
|
||||
#define DC_MAX_GET_TEXT_LEN 30000 // approx. max. length returned by dc_msg_get_text()
|
||||
#define DC_MAX_GET_INFO_LEN 100000 // approx. max. length returned by dc_get_msg_info()
|
||||
|
||||
|
||||
/**
|
||||
* Create new message object. Message objects are needed e.g. for sending messages using
|
||||
* dc_send_msg(). Moreover, they are returned e.g. from dc_get_msg(),
|
||||
|
||||
@@ -165,36 +165,18 @@ pub const DC_MSG_ID_MARKER1: u32 = 1;
|
||||
pub const DC_MSG_ID_DAYMARKER: u32 = 9;
|
||||
pub const DC_MSG_ID_LAST_SPECIAL: u32 = 9;
|
||||
|
||||
/// string that indicates sth. is left out or truncated
|
||||
pub const DC_ELLIPSE: &str = "[...]";
|
||||
/// String that indicates that something is left out or truncated.
|
||||
pub const DC_ELLIPSIS: &str = "[...]";
|
||||
|
||||
/// to keep bubbles and chat flow usable,
|
||||
/// and to avoid problems with controls using very long texts,
|
||||
/// we limit the text length to DC_DESIRED_TEXT_LEN.
|
||||
/// if the text is longer, the full text can be retrieved using has_html()/get_html().
|
||||
/// Message length limit.
|
||||
///
|
||||
/// we are using a bit less than DC_MAX_GET_TEXT_LEN to avoid cutting twice
|
||||
/// (a bit less as truncation may not be exact and ellipses may be added).
|
||||
/// To keep bubbles and chat flow usable and to avoid problems with controls using very long texts,
|
||||
/// we limit the text length to `DC_DESIRED_TEXT_LEN`. If the text is longer, the full text can be
|
||||
/// retrieved using has_html()/get_html().
|
||||
///
|
||||
/// note, that DC_DESIRED_TEXT_LEN and DC_MAX_GET_TEXT_LEN
|
||||
/// define max. number of bytes, _not_ unicode graphemes.
|
||||
/// in general, that seems to be okay for such an upper limit,
|
||||
/// esp. as calculating the number of graphemes is not simple
|
||||
/// (one graphemes may be a sequence of code points which is a sequence of bytes).
|
||||
/// also even if we have the exact number of graphemes,
|
||||
/// that would not always help on getting an idea about the screen space used
|
||||
/// (to keep bubbles and chat flow usable).
|
||||
///
|
||||
/// therefore, the number of bytes is only a very rough estimation,
|
||||
/// however, the ~30K seems to work okayish for a while,
|
||||
/// if it turns out, it is too few for some alphabet, we can still increase.
|
||||
pub const DC_DESIRED_TEXT_LEN: usize = 29_000;
|
||||
|
||||
/// approx. max. length (number of bytes) returned by dc_msg_get_text()
|
||||
pub const DC_MAX_GET_TEXT_LEN: usize = 30_000;
|
||||
|
||||
/// approx. max. length returned by dc_get_msg_info()
|
||||
pub const DC_MAX_GET_INFO_LEN: usize = 100_000;
|
||||
/// Note that for simplicity maximum length is defined as the number of Unicode Scalar Values (Rust
|
||||
/// `char`s), not Unicode Grapheme Clusters.
|
||||
pub const DC_DESIRED_TEXT_LEN: usize = 5000;
|
||||
|
||||
pub const DC_CONTACT_ID_UNDEFINED: u32 = 0;
|
||||
pub const DC_CONTACT_ID_SELF: u32 = 1;
|
||||
|
||||
@@ -17,7 +17,7 @@ use chrono::{Local, TimeZone};
|
||||
use rand::{thread_rng, Rng};
|
||||
|
||||
use crate::chat::{add_device_msg, add_device_msg_with_importance};
|
||||
use crate::constants::{Viewtype, DC_ELLIPSE, DC_OUTDATED_WARNING_DAYS};
|
||||
use crate::constants::{Viewtype, DC_ELLIPSIS, DC_OUTDATED_WARNING_DAYS};
|
||||
use crate::context::Context;
|
||||
use crate::events::EventType;
|
||||
use crate::message::Message;
|
||||
@@ -29,7 +29,7 @@ use crate::stock_str;
|
||||
#[allow(clippy::indexing_slicing)]
|
||||
pub(crate) fn dc_truncate(buf: &str, approx_chars: usize) -> Cow<str> {
|
||||
let count = buf.chars().count();
|
||||
if approx_chars > 0 && count > approx_chars + DC_ELLIPSE.len() {
|
||||
if count > approx_chars + DC_ELLIPSIS.len() {
|
||||
let end_pos = buf
|
||||
.char_indices()
|
||||
.nth(approx_chars)
|
||||
@@ -37,9 +37,9 @@ pub(crate) fn dc_truncate(buf: &str, approx_chars: usize) -> Cow<str> {
|
||||
.unwrap_or_default();
|
||||
|
||||
if let Some(index) = buf[..end_pos].rfind(|c| c == ' ' || c == '\n') {
|
||||
Cow::Owned(format!("{}{}", &buf[..=index], DC_ELLIPSE))
|
||||
Cow::Owned(format!("{}{}", &buf[..=index], DC_ELLIPSIS))
|
||||
} else {
|
||||
Cow::Owned(format!("{}{}", &buf[..end_pos], DC_ELLIPSE))
|
||||
Cow::Owned(format!("{}{}", &buf[..end_pos], DC_ELLIPSIS))
|
||||
}
|
||||
} else {
|
||||
Cow::Borrowed(buf)
|
||||
@@ -711,10 +711,7 @@ mod tests {
|
||||
assert_eq!(dc_truncate("\n hello \n world", 4), "\n [...]");
|
||||
|
||||
assert_eq!(dc_truncate("𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ", 1), "𐠈[...]");
|
||||
assert_eq!(
|
||||
dc_truncate("𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ", 0),
|
||||
"𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ"
|
||||
);
|
||||
assert_eq!(dc_truncate("𐠈0Aᝮa𫝀®!ꫛa¡0A𐢧00𐹠®A 丽ⷐએ", 0), "[...]");
|
||||
|
||||
// 9 characters, so no truncation
|
||||
assert_eq!(dc_truncate("𑒀ὐ¢🜀\u{1e01b}A a🟠", 6), "𑒀ὐ¢🜀\u{1e01b}A a🟠",);
|
||||
|
||||
@@ -14,7 +14,7 @@ use crate::chat::{self, Chat, ChatId};
|
||||
use crate::config::Config;
|
||||
use crate::constants::{
|
||||
Blocked, Chattype, VideochatType, Viewtype, DC_CHAT_ID_TRASH, DC_CONTACT_ID_INFO,
|
||||
DC_CONTACT_ID_SELF, DC_MAX_GET_INFO_LEN, DC_MAX_GET_TEXT_LEN, DC_MSG_ID_LAST_SPECIAL,
|
||||
DC_CONTACT_ID_SELF, DC_DESIRED_TEXT_LEN, DC_MSG_ID_LAST_SPECIAL,
|
||||
};
|
||||
use crate::contact::{Contact, Origin};
|
||||
use crate::context::Context;
|
||||
@@ -541,9 +541,7 @@ impl Message {
|
||||
}
|
||||
|
||||
pub fn get_text(&self) -> Option<String> {
|
||||
self.text
|
||||
.as_ref()
|
||||
.map(|text| dc_truncate(text, DC_MAX_GET_TEXT_LEN).to_string())
|
||||
self.text.as_ref().map(|s| s.to_string())
|
||||
}
|
||||
|
||||
pub fn get_subject(&self) -> &str {
|
||||
@@ -1142,7 +1140,7 @@ pub async fn get_msg_info(context: &Context, msg_id: MsgId) -> Result<String> {
|
||||
return Ok(ret);
|
||||
}
|
||||
let rawtxt = rawtxt.unwrap_or_default();
|
||||
let rawtxt = dc_truncate(rawtxt.trim(), DC_MAX_GET_INFO_LEN);
|
||||
let rawtxt = dc_truncate(rawtxt.trim(), DC_DESIRED_TEXT_LEN);
|
||||
|
||||
let fts = dc_timestamp_to_str(msg.get_timestamp());
|
||||
ret += &format!("Sent: {}", fts);
|
||||
|
||||
@@ -10,7 +10,7 @@ use once_cell::sync::Lazy;
|
||||
|
||||
use crate::aheader::Aheader;
|
||||
use crate::blob::BlobObject;
|
||||
use crate::constants::{Viewtype, DC_DESIRED_TEXT_LEN, DC_ELLIPSE};
|
||||
use crate::constants::{Viewtype, DC_DESIRED_TEXT_LEN, DC_ELLIPSIS};
|
||||
use crate::contact::addr_normalize;
|
||||
use crate::context::Context;
|
||||
use crate::dc_tools::{dc_get_filemeta, dc_truncate};
|
||||
@@ -903,13 +903,14 @@ impl MimeMessage {
|
||||
(simplified_txt, top_quote)
|
||||
};
|
||||
|
||||
let simplified_txt =
|
||||
if simplified_txt.len() > DC_DESIRED_TEXT_LEN + DC_ELLIPSE.len() {
|
||||
self.is_mime_modified = true;
|
||||
dc_truncate(&*simplified_txt, DC_DESIRED_TEXT_LEN).to_string()
|
||||
} else {
|
||||
simplified_txt
|
||||
};
|
||||
let simplified_txt = if simplified_txt.chars().count()
|
||||
> DC_DESIRED_TEXT_LEN + DC_ELLIPSIS.len()
|
||||
{
|
||||
self.is_mime_modified = true;
|
||||
dc_truncate(&*simplified_txt, DC_DESIRED_TEXT_LEN).to_string()
|
||||
} else {
|
||||
simplified_txt
|
||||
};
|
||||
|
||||
if !simplified_txt.is_empty() || simplified_quote.is_some() {
|
||||
let mut part = Part {
|
||||
@@ -1604,7 +1605,6 @@ mod tests {
|
||||
#![allow(clippy::indexing_slicing)]
|
||||
|
||||
use super::*;
|
||||
use crate::constants::DC_MAX_GET_TEXT_LEN;
|
||||
use crate::{
|
||||
chatlist::Chatlist,
|
||||
config::Config,
|
||||
@@ -2880,21 +2880,20 @@ On 2020-10-25, Bob wrote:
|
||||
let t = TestContext::new().await;
|
||||
|
||||
static REPEAT_TXT: &str = "this text with 42 chars is just repeated.\n";
|
||||
static REPEAT_CNT: usize = 2000; // results in a text of 84k, should be more than DC_MAX_GET_TEXT_LEN
|
||||
static REPEAT_CNT: usize = 2000; // results in a text of 84k, should be more than DC_DESIRED_TEXT_LEN
|
||||
let long_txt = format!("From: alice@c.de\n\n{}", REPEAT_TXT.repeat(REPEAT_CNT));
|
||||
assert!(DC_DESIRED_TEXT_LEN + DC_ELLIPSE.len() < DC_MAX_GET_TEXT_LEN);
|
||||
|
||||
let mimemsg = MimeMessage::from_bytes(&t, long_txt.as_ref())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(long_txt.matches("just repeated").count(), REPEAT_CNT);
|
||||
assert!(long_txt.len() > DC_MAX_GET_TEXT_LEN);
|
||||
assert!(long_txt.len() > DC_DESIRED_TEXT_LEN);
|
||||
assert!(mimemsg.is_mime_modified);
|
||||
assert!(
|
||||
mimemsg.parts[0].msg.matches("just repeated").count()
|
||||
< DC_MAX_GET_TEXT_LEN / REPEAT_TXT.len()
|
||||
<= DC_DESIRED_TEXT_LEN / REPEAT_TXT.len()
|
||||
);
|
||||
assert!(mimemsg.parts[0].msg.len() <= DC_MAX_GET_TEXT_LEN);
|
||||
assert!(mimemsg.parts[0].msg.len() <= DC_DESIRED_TEXT_LEN + DC_ELLIPSIS.len());
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
|
||||
Reference in New Issue
Block a user