feat: pre-messages / next version of download on demand (#7371)

Closes <https://github.com/chatmail/core/issues/7367>

Co-authored-by: iequidoo <dgreshilov@gmail.com>
Co-authored-by: Hocuri <hocuri@gmx.de>
This commit is contained in:
Simon Laux
2026-01-08 22:14:32 +00:00
committed by GitHub
parent 46bbe5f077
commit 2631745a57
43 changed files with 2843 additions and 1393 deletions

View File

@@ -1,27 +1,19 @@
//! # Download large messages manually.
use std::cmp::max;
use std::collections::BTreeMap;
use anyhow::{Result, anyhow, bail, ensure};
use deltachat_derive::{FromSql, ToSql};
use serde::{Deserialize, Serialize};
use crate::config::Config;
use crate::context::Context;
use crate::imap::session::Session;
use crate::message::{Message, MsgId, Viewtype};
use crate::mimeparser::{MimeMessage, Part};
use crate::tools::time;
use crate::{EventType, chatlist_events, stock_str};
use crate::log::warn;
use crate::message::{self, Message, MsgId, rfc724_mid_exists};
use crate::{EventType, chatlist_events};
/// Download limits should not be used below `MIN_DOWNLOAD_LIMIT`.
///
/// For better UX, some messages as add-member, non-delivery-reports (NDN) or read-receipts (MDN)
/// should always be downloaded completely to handle them correctly,
/// also in larger groups and if group and contact avatar are attached.
/// Most of these cases are caught by `MIN_DOWNLOAD_LIMIT`.
pub(crate) const MIN_DOWNLOAD_LIMIT: u32 = 163840;
pub(crate) mod post_msg_metadata;
pub(crate) use post_msg_metadata::PostMsgMetadata;
/// If a message is downloaded only partially
/// and `delete_server_after` is set to small timeouts (eg. "at once"),
@@ -29,6 +21,16 @@ pub(crate) const MIN_DOWNLOAD_LIMIT: u32 = 163840;
/// `MIN_DELETE_SERVER_AFTER` increases the timeout in this case.
pub(crate) const MIN_DELETE_SERVER_AFTER: i64 = 48 * 60 * 60;
/// From this point onward outgoing messages are considered large
/// and get a Pre-Message, which announces the Post-Message.
/// This is only about sending so we can modify it any time.
/// Current value is a bit less than the minimum auto-download setting from the UIs (which is 160
/// KiB).
pub(crate) const PRE_MSG_ATTACHMENT_SIZE_THRESHOLD: u64 = 140_000;
/// Max size for pre messages. A warning is emitted when this is exceeded.
pub(crate) const PRE_MSG_SIZE_WARNING_THRESHOLD: usize = 150_000;
/// Download state of the message.
#[derive(
Debug,
@@ -64,20 +66,8 @@ pub enum DownloadState {
InProgress = 1000,
}
impl Context {
// Returns validated download limit or `None` for "no limit".
pub(crate) async fn download_limit(&self) -> Result<Option<u32>> {
let download_limit = self.get_config_int(Config::DownloadLimit).await?;
if download_limit <= 0 {
Ok(None)
} else {
Ok(Some(max(MIN_DOWNLOAD_LIMIT, download_limit as u32)))
}
}
}
impl MsgId {
/// Schedules full message download for partially downloaded message.
/// Schedules Post-Message download for partially downloaded message.
pub async fn download_full(self, context: &Context) -> Result<()> {
let msg = Message::load_from_db(context, self).await?;
match msg.download_state() {
@@ -86,11 +76,22 @@ impl MsgId {
}
DownloadState::InProgress => return Err(anyhow!("Download already in progress.")),
DownloadState::Available | DownloadState::Failure => {
if msg.rfc724_mid().is_empty() {
return Err(anyhow!("Download not possible, message has no rfc724_mid"));
}
self.update_download_state(context, DownloadState::InProgress)
.await?;
info!(
context,
"Requesting full download of {:?}.",
msg.rfc724_mid()
);
context
.sql
.execute("INSERT INTO download (msg_id) VALUES (?)", (self,))
.execute(
"INSERT INTO download (rfc724_mid, msg_id) VALUES (?,?)",
(msg.rfc724_mid(), msg.id),
)
.await?;
context.scheduler.interrupt_inbox().await;
}
@@ -139,20 +140,9 @@ impl Message {
/// Most messages are downloaded automatically on fetch instead.
pub(crate) async fn download_msg(
context: &Context,
msg_id: MsgId,
rfc724_mid: String,
session: &mut Session,
) -> Result<()> {
let Some(msg) = Message::load_from_db_optional(context, msg_id).await? else {
// If partially downloaded message was already deleted
// we do not know its Message-ID anymore
// so cannot download it.
//
// Probably the message expired due to `delete_device_after`
// setting or was otherwise removed from the device,
// so we don't want it to reappear anyway.
return Ok(());
};
let transport_id = session.transport_id();
let row = context
.sql
@@ -161,7 +151,7 @@ pub(crate) async fn download_msg(
WHERE rfc724_mid=?
AND transport_id=?
AND target!=''",
(&msg.rfc724_mid, transport_id),
(&rfc724_mid, transport_id),
|row| {
let server_uid: u32 = row.get(0)?;
let server_folder: String = row.get(1)?;
@@ -172,11 +162,13 @@ pub(crate) async fn download_msg(
let Some((server_uid, server_folder)) = row else {
// No IMAP record found, we don't know the UID and folder.
return Err(anyhow!("Call download_full() again to try over."));
return Err(anyhow!(
"IMAP location for {rfc724_mid:?} post-message is unknown"
));
};
session
.fetch_single_msg(context, &server_folder, server_uid, msg.rfc724_mid.clone())
.fetch_single_msg(context, &server_folder, server_uid, rfc724_mid)
.await?;
Ok(())
}
@@ -209,7 +201,7 @@ impl Session {
let mut uid_message_ids: BTreeMap<u32, String> = BTreeMap::new();
uid_message_ids.insert(uid, rfc724_mid);
let (sender, receiver) = async_channel::unbounded();
self.fetch_many_msgs(context, folder, vec![uid], &uid_message_ids, false, sender)
self.fetch_many_msgs(context, folder, vec![uid], &uid_message_ids, sender)
.await?;
if receiver.recv().await.is_err() {
bail!("Failed to fetch UID {uid}");
@@ -218,41 +210,139 @@ impl Session {
}
}
impl MimeMessage {
/// Creates a placeholder part and add that to `parts`.
///
/// To create the placeholder, only the outermost header can be used,
/// the mime-structure itself is not available.
///
/// The placeholder part currently contains a text with size and availability of the message.
pub(crate) async fn create_stub_from_partial_download(
&mut self,
context: &Context,
org_bytes: u32,
) -> Result<()> {
let mut text = format!(
"[{}]",
stock_str::partial_download_msg_body(context, org_bytes).await
);
if let Some(delete_server_after) = context.get_config_delete_server_after().await? {
let until = stock_str::download_availability(
context,
time() + max(delete_server_after, MIN_DELETE_SERVER_AFTER),
)
.await;
text += format!(" [{until}]").as_str();
};
info!(context, "Partial download: {}", text);
self.do_add_single_part(Part {
typ: Viewtype::Text,
msg: text,
..Default::default()
});
Ok(())
async fn set_state_to_failure(context: &Context, rfc724_mid: &str) -> Result<()> {
if let Some(msg_id) = rfc724_mid_exists(context, rfc724_mid).await? {
// Update download state to failure
// so it can be retried.
//
// On success update_download_state() is not needed
// as receive_imf() already
// set the state and emitted the event.
msg_id
.update_download_state(context, DownloadState::Failure)
.await?;
}
Ok(())
}
async fn available_post_msgs_contains_rfc724_mid(
context: &Context,
rfc724_mid: &str,
) -> Result<bool> {
Ok(context
.sql
.query_get_value::<String>(
"SELECT rfc724_mid FROM available_post_msgs WHERE rfc724_mid=?",
(&rfc724_mid,),
)
.await?
.is_some())
}
async fn delete_from_available_post_msgs(context: &Context, rfc724_mid: &str) -> Result<()> {
context
.sql
.execute(
"DELETE FROM available_post_msgs WHERE rfc724_mid=?",
(&rfc724_mid,),
)
.await?;
Ok(())
}
async fn delete_from_downloads(context: &Context, rfc724_mid: &str) -> Result<()> {
context
.sql
.execute("DELETE FROM download WHERE rfc724_mid=?", (&rfc724_mid,))
.await?;
Ok(())
}
pub(crate) async fn msg_is_downloaded_for(context: &Context, rfc724_mid: &str) -> Result<bool> {
Ok(message::rfc724_mid_exists(context, rfc724_mid)
.await?
.is_some())
}
pub(crate) async fn download_msgs(context: &Context, session: &mut Session) -> Result<()> {
let rfc724_mids = context
.sql
.query_map_vec("SELECT rfc724_mid FROM download", (), |row| {
let rfc724_mid: String = row.get(0)?;
Ok(rfc724_mid)
})
.await?;
for rfc724_mid in &rfc724_mids {
let res = download_msg(context, rfc724_mid.clone(), session).await;
if res.is_ok() {
delete_from_downloads(context, rfc724_mid).await?;
delete_from_available_post_msgs(context, rfc724_mid).await?;
}
if let Err(err) = res {
warn!(
context,
"Failed to download message rfc724_mid={rfc724_mid}: {:#}.", err
);
if !msg_is_downloaded_for(context, rfc724_mid).await? {
// This is probably a classical email that vanished before we could download it
warn!(
context,
"{rfc724_mid} download failed and there is no downloaded pre-message."
);
delete_from_downloads(context, rfc724_mid).await?;
} else if available_post_msgs_contains_rfc724_mid(context, rfc724_mid).await? {
warn!(
context,
"{rfc724_mid} is in available_post_msgs table but we failed to fetch it,
so set the message to DownloadState::Failure - probably it was deleted on the server in the meantime"
);
set_state_to_failure(context, rfc724_mid).await?;
delete_from_downloads(context, rfc724_mid).await?;
delete_from_available_post_msgs(context, rfc724_mid).await?;
} else {
// leave the message in DownloadState::InProgress;
// it will be downloaded once it arrives.
}
}
}
Ok(())
}
/// Downloads known post-messages without pre-messages
/// in order to guard against lost pre-messages.
pub(crate) async fn download_known_post_messages_without_pre_message(
context: &Context,
session: &mut Session,
) -> Result<()> {
let rfc724_mids = context
.sql
.query_map_vec("SELECT rfc724_mid FROM available_post_msgs", (), |row| {
let rfc724_mid: String = row.get(0)?;
Ok(rfc724_mid)
})
.await?;
for rfc724_mid in &rfc724_mids {
if !msg_is_downloaded_for(context, rfc724_mid).await? {
// Download the Post-Message unconditionally,
// because the Pre-Message got lost.
// The message may be in the wrong order,
// but at least we have it at all.
let res = download_msg(context, rfc724_mid.clone(), session).await;
if res.is_ok() {
delete_from_available_post_msgs(context, rfc724_mid).await?;
}
if let Err(err) = res {
warn!(
context,
"download_known_post_messages_without_pre_message: Failed to download message rfc724_mid={rfc724_mid}: {:#}.",
err
);
}
}
}
Ok(())
}
#[cfg(test)]
@@ -260,11 +350,8 @@ mod tests {
use num_traits::FromPrimitive;
use super::*;
use crate::chat::{get_chat_msgs, send_msg};
use crate::ephemeral::Timer;
use crate::message::delete_msgs;
use crate::receive_imf::receive_imf_from_inbox;
use crate::test_utils::{E2EE_INFO_MSGS, TestContext, TestContextManager};
use crate::chat::send_msg;
use crate::test_utils::TestContext;
#[test]
fn test_downloadstate_values() {
@@ -282,29 +369,6 @@ mod tests {
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_download_limit() -> Result<()> {
let t = TestContext::new_alice().await;
assert_eq!(t.download_limit().await?, None);
t.set_config(Config::DownloadLimit, Some("200000")).await?;
assert_eq!(t.download_limit().await?, Some(200000));
t.set_config(Config::DownloadLimit, Some("20000")).await?;
assert_eq!(t.download_limit().await?, Some(MIN_DOWNLOAD_LIMIT));
t.set_config(Config::DownloadLimit, None).await?;
assert_eq!(t.download_limit().await?, None);
for val in &["0", "-1", "-100", "", "foo"] {
t.set_config(Config::DownloadLimit, Some(val)).await?;
assert_eq!(t.download_limit().await?, None);
}
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_update_download_state() -> Result<()> {
let t = TestContext::new_alice().await;
@@ -336,230 +400,4 @@ mod tests {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_partial_receive_imf() -> Result<()> {
let t = TestContext::new_alice().await;
let header = "Received: (Postfix, from userid 1000); Mon, 4 Dec 2006 14:51:39 +0100 (CET)\n\
From: bob@example.com\n\
To: alice@example.org\n\
Subject: foo\n\
Message-ID: <Mr.12345678901@example.com>\n\
Chat-Version: 1.0\n\
Date: Sun, 22 Mar 2020 22:37:57 +0000\
Content-Type: text/plain";
receive_imf_from_inbox(
&t,
"Mr.12345678901@example.com",
header.as_bytes(),
false,
Some(100000),
)
.await?;
let msg = t.get_last_msg().await;
assert_eq!(msg.download_state(), DownloadState::Available);
assert_eq!(msg.get_subject(), "foo");
assert!(
msg.get_text()
.contains(&stock_str::partial_download_msg_body(&t, 100000).await)
);
receive_imf_from_inbox(
&t,
"Mr.12345678901@example.com",
format!("{header}\n\n100k text...").as_bytes(),
false,
None,
)
.await?;
let msg = t.get_last_msg().await;
assert_eq!(msg.download_state(), DownloadState::Done);
assert_eq!(msg.get_subject(), "foo");
assert_eq!(msg.get_text(), "100k text...");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_partial_download_and_ephemeral() -> Result<()> {
let t = TestContext::new_alice().await;
let chat_id = t
.create_chat_with_contact("bob", "bob@example.org")
.await
.id;
chat_id
.set_ephemeral_timer(&t, Timer::Enabled { duration: 60 })
.await?;
// download message from bob partially, this must not change the ephemeral timer
receive_imf_from_inbox(
&t,
"first@example.org",
b"From: Bob <bob@example.org>\n\
To: Alice <alice@example.org>\n\
Chat-Version: 1.0\n\
Subject: subject\n\
Message-ID: <first@example.org>\n\
Date: Sun, 14 Nov 2021 00:10:00 +0000\
Content-Type: text/plain",
false,
Some(100000),
)
.await?;
assert_eq!(
chat_id.get_ephemeral_timer(&t).await?,
Timer::Enabled { duration: 60 }
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_status_update_expands_to_nothing() -> Result<()> {
let alice = TestContext::new_alice().await;
let bob = TestContext::new_bob().await;
let chat_id = alice.create_chat(&bob).await.id;
let file = alice.get_blobdir().join("minimal.xdc");
tokio::fs::write(&file, include_bytes!("../test-data/webxdc/minimal.xdc")).await?;
let mut instance = Message::new(Viewtype::File);
instance.set_file_and_deduplicate(&alice, &file, None, None)?;
let _sent1 = alice.send_msg(chat_id, &mut instance).await;
alice
.send_webxdc_status_update(instance.id, r#"{"payload":7}"#)
.await?;
alice.flush_status_updates().await?;
let sent2 = alice.pop_sent_msg().await;
let sent2_rfc724_mid = sent2.load_from_db().await.rfc724_mid;
// not downloading the status update results in an placeholder
receive_imf_from_inbox(
&bob,
&sent2_rfc724_mid,
sent2.payload().as_bytes(),
false,
Some(sent2.payload().len() as u32),
)
.await?;
let msg = bob.get_last_msg().await;
let chat_id = msg.chat_id;
assert_eq!(
get_chat_msgs(&bob, chat_id).await?.len(),
E2EE_INFO_MSGS + 1
);
assert_eq!(msg.download_state(), DownloadState::Available);
// downloading the status update afterwards expands to nothing and moves the placeholder to trash-chat
// (usually status updates are too small for not being downloaded directly)
receive_imf_from_inbox(
&bob,
&sent2_rfc724_mid,
sent2.payload().as_bytes(),
false,
None,
)
.await?;
assert_eq!(get_chat_msgs(&bob, chat_id).await?.len(), E2EE_INFO_MSGS);
assert!(
Message::load_from_db_optional(&bob, msg.id)
.await?
.is_none()
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_mdn_expands_to_nothing() -> Result<()> {
let bob = TestContext::new_bob().await;
let raw = b"Subject: Message opened\n\
Date: Mon, 10 Jan 2020 00:00:00 +0000\n\
Chat-Version: 1.0\n\
Message-ID: <bar@example.org>\n\
To: Alice <alice@example.org>\n\
From: Bob <bob@example.org>\n\
Content-Type: multipart/report; report-type=disposition-notification;\n\t\
boundary=\"kJBbU58X1xeWNHgBtTbMk80M5qnV4N\"\n\
\n\
\n\
--kJBbU58X1xeWNHgBtTbMk80M5qnV4N\n\
Content-Type: text/plain; charset=utf-8\n\
\n\
bla\n\
\n\
\n\
--kJBbU58X1xeWNHgBtTbMk80M5qnV4N\n\
Content-Type: message/disposition-notification\n\
\n\
Reporting-UA: Delta Chat 1.88.0\n\
Original-Recipient: rfc822;bob@example.org\n\
Final-Recipient: rfc822;bob@example.org\n\
Original-Message-ID: <foo@example.org>\n\
Disposition: manual-action/MDN-sent-automatically; displayed\n\
\n\
\n\
--kJBbU58X1xeWNHgBtTbMk80M5qnV4N--\n\
";
// not downloading the mdn results in an placeholder
receive_imf_from_inbox(&bob, "bar@example.org", raw, false, Some(raw.len() as u32)).await?;
let msg = bob.get_last_msg().await;
let chat_id = msg.chat_id;
assert_eq!(get_chat_msgs(&bob, chat_id).await?.len(), 1);
assert_eq!(msg.download_state(), DownloadState::Available);
// downloading the mdn afterwards expands to nothing and deletes the placeholder directly
// (usually mdn are too small for not being downloaded directly)
receive_imf_from_inbox(&bob, "bar@example.org", raw, false, None).await?;
assert_eq!(get_chat_msgs(&bob, chat_id).await?.len(), 0);
assert!(
Message::load_from_db_optional(&bob, msg.id)
.await?
.is_none()
);
Ok(())
}
/// Tests that fully downloading the message
/// works even if the Message-ID already exists
/// in the database assigned to the trash chat.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_partial_download_trashed() -> Result<()> {
let mut tcm = TestContextManager::new();
let alice = &tcm.alice().await;
let imf_raw = b"From: Bob <bob@example.org>\n\
To: Alice <alice@example.org>\n\
Chat-Version: 1.0\n\
Subject: subject\n\
Message-ID: <first@example.org>\n\
Date: Sun, 14 Nov 2021 00:10:00 +0000\
Content-Type: text/plain";
// Download message from Bob partially.
let partial_received_msg =
receive_imf_from_inbox(alice, "first@example.org", imf_raw, false, Some(100000))
.await?
.unwrap();
assert_eq!(partial_received_msg.msg_ids.len(), 1);
// Delete the received message.
// Not it is still in the database,
// but in the trash chat.
delete_msgs(alice, &[partial_received_msg.msg_ids[0]]).await?;
// Fully download message after deletion.
let full_received_msg =
receive_imf_from_inbox(alice, "first@example.org", imf_raw, false, None).await?;
// The message does not reappear.
// However, `receive_imf` should not fail.
assert!(full_received_msg.is_none());
Ok(())
}
}