From f28fcec81dd7259a2a17bd620fa5aa8793589340 Mon Sep 17 00:00:00 2001 From: link2xt Date: Sun, 27 Feb 2022 12:15:35 +0000 Subject: [PATCH] imap: do not treat messages without Message-ID as duplicates Message-IDs are now retrieved only during fetching and saved into imap table. dc_receive_imf_inner does not attempt to extract the Message-ID anymore. For messages without Message-ID the ID is now generated in imap::fetch_new_messages rather than dc_receive_imf_inner, so the same ID is used in the imap table (maintained by the imap module) and msgs table (maintained by dc_receive_imf module). Message-ID generation based on the Date, From and To field hashing has been replaced with a simple dc_create_id() to avoid retrieving Date, From, and To fields in the imap module, as it's hard to test that it stays compatible between Delta Chat versions in this module. This breaks jump-to-quote for quoted messages without Message-ID, which is not critical. Also prefetch X-Microsoft-Original-Message-ID, so retrieval of duplicate messages with X-Microsoft-Original-Message-ID can be skipped like it is done for messages with Message-ID header. --- CHANGELOG.md | 3 ++ src/dc_receive_imf.rs | 93 ++++++++++++++++--------------------------- src/download.rs | 21 ++++++++-- src/imap.rs | 34 ++++++++++++---- src/mimeparser.rs | 2 +- 5 files changed, 83 insertions(+), 70 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ed7fcc3a..5f1a1bd9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ - change semantics of `dc_get_webxdc_status_updates()` second parameter and remove update-id from `DC_EVENT_WEBXDC_STATUS_UPDATE` #3081 +### Fixes +- do not delete messages without Message-IDs as duplicates #3095 + ### Changes - add more SMTP logging #3093 - place common headers like `From:` before the large `Autocrypt:` header #3079 diff --git a/src/dc_receive_imf.rs b/src/dc_receive_imf.rs index b7482b0e2..44ad9ec54 100644 --- a/src/dc_receive_imf.rs +++ b/src/dc_receive_imf.rs @@ -5,7 +5,7 @@ use std::collections::BTreeSet; use std::convert::TryFrom; use anyhow::{bail, ensure, Context as _, Result}; -use mailparse::SingleInfo; +use mailparse::{parse_mail, SingleInfo}; use num_traits::FromPrimitive; use once_cell::sync::Lazy; use regex::Regex; @@ -21,7 +21,7 @@ use crate::contact::{ addr_cmp, may_be_valid_addr, normalize_name, Contact, Origin, VerifiedStatus, }; use crate::context::Context; -use crate::dc_tools::{dc_extract_grpid_from_rfc724_mid, dc_smeared_time}; +use crate::dc_tools::{dc_create_id, dc_extract_grpid_from_rfc724_mid, dc_smeared_time}; use crate::download::DownloadState; use crate::ephemeral::{stock_ephemeral_timer_changed, Timer as EphemeralTimer}; use crate::events::EventType; @@ -30,7 +30,7 @@ use crate::job::{self, Action}; use crate::log::LogExt; use crate::message::{self, rfc724_mid_exists, Message, MessageState, MessengerMessage, MsgId}; use crate::mimeparser::{ - parse_message_ids, AvatarAction, MailinglistType, MimeMessage, SystemMessage, + parse_message_id, parse_message_ids, AvatarAction, MailinglistType, MimeMessage, SystemMessage, }; use crate::param::{Param, Params}; use crate::peerstate::{Peerstate, PeerstateKeyType, PeerstateVerifiedStatus}; @@ -56,6 +56,34 @@ pub struct ReceivedMsg { // Feel free to add more fields here } +/// Emulates reception of a message from the network. +/// +/// This method returns errors on a failure to parse the mail or extract Message-ID. It's only used +/// for tests and REPL tool, not actual message reception pipeline. +pub async fn dc_receive_imf( + context: &Context, + imf_raw: &[u8], + server_folder: &str, + seen: bool, +) -> Result> { + let mail = parse_mail(imf_raw).context("can't parse mail")?; + let rfc724_mid = mail + .headers + .get_header_value(HeaderDef::MessageId) + .and_then(|msgid| parse_message_id(&msgid).ok()) + .unwrap_or_else(dc_create_id); + dc_receive_imf_inner( + context, + &rfc724_mid, + imf_raw, + server_folder, + seen, + None, + false, + ) + .await +} + /// Receive a message and add it to the database. /// /// Returns an error on recoverable errors, e.g. database errors. In this case, @@ -67,19 +95,12 @@ pub struct ReceivedMsg { /// downloaded again, sets `chat_id=DC_CHAT_ID_TRASH` and returns `Ok(Some(…))` /// - If the message is so wrong that we didn't even create a database entry, /// returns `Ok(None)` -pub async fn dc_receive_imf( - context: &Context, - imf_raw: &[u8], - server_folder: &str, - seen: bool, -) -> Result> { - dc_receive_imf_inner(context, imf_raw, server_folder, seen, None, false).await -} - +/// /// If `is_partial_download` is set, it contains the full message size in bytes. /// Do not confuse that with `replace_partial_download` that will be set when the full message is loaded later. pub(crate) async fn dc_receive_imf_inner( context: &Context, + rfc724_mid: &str, imf_raw: &[u8], server_folder: &str, seen: bool, @@ -111,17 +132,12 @@ pub(crate) async fn dc_receive_imf_inner( return Ok(None); } - let rfc724_mid = mime_parser.get_rfc724_mid().unwrap_or_else(|| - // missing Message-IDs may come if the mail was set from this account with another - // client that relies in the SMTP server to generate one. - // true eg. for the Webmailer used in all-inkl-KAS - dc_create_incoming_rfc724_mid(&mime_parser)); info!(context, "received message has Message-Id: {}", rfc724_mid); // check, if the mail is already in our database. // make sure, this check is done eg. before securejoin-processing. let replace_partial_download = - if let Some(old_msg_id) = message::rfc724_mid_exists(context, &rfc724_mid).await? { + if let Some(old_msg_id) = message::rfc724_mid_exists(context, rfc724_mid).await? { let msg = Message::load_from_db(context, old_msg_id).await?; if msg.download_state() != DownloadState::Done && is_partial_download.is_none() { // the mesage was partially downloaded before and is fully downloaded now. @@ -195,7 +211,7 @@ pub(crate) async fn dc_receive_imf_inner( incoming, server_folder, &to_ids, - &rfc724_mid, + rfc724_mid, sent_timestamp, rcvd_timestamp, from_id, @@ -2323,28 +2339,6 @@ async fn add_or_lookup_contact_by_addr( Ok(row_id) } -/// Creates fake Message-ID to identify the message in the database for -/// messages which does not have one. -/// -/// Concatenates Date:, From: and To: fields, then hashes them. -fn dc_create_incoming_rfc724_mid(mime: &MimeMessage) -> String { - format!( - "{}@stub", - hex_hash(&format!( - "{}-{}-{}", - mime.get_header(HeaderDef::Date) - .map(|s| s.to_string()) - .unwrap_or_default(), - mime.get_header(HeaderDef::From_) - .map(|s| s.to_string()) - .unwrap_or_default(), - mime.get_header(HeaderDef::To) - .map(|s| s.to_string()) - .unwrap_or_default() - )) - ) -} - #[cfg(test)] mod tests { @@ -2401,23 +2395,6 @@ mod tests { assert_eq!(extract_grpid(&mimeparser, HeaderDef::References), grpid); } - #[async_std::test] - async fn test_dc_create_incoming_rfc724_mid() { - let context = TestContext::new().await; - let raw = b"From: Alice \n\ - To: Bob \n\ - Subject: Some subject\n\ - hello\n"; - let mimeparser = MimeMessage::from_bytes(&context.ctx, &raw[..]) - .await - .unwrap(); - - assert_eq!( - dc_create_incoming_rfc724_mid(&mimeparser), - "ca971a2eefd651f6@stub" - ); - } - static MSGRMSG: &[u8] = b"Received: (Postfix, from userid 1000); Mon, 4 Dec 2006 14:51:39 +0100 (CET)\n\ From: Bob \n\ diff --git a/src/download.rs b/src/download.rs index c0874780f..bf7ba55f5 100644 --- a/src/download.rs +++ b/src/download.rs @@ -3,6 +3,7 @@ use anyhow::{anyhow, Result}; use deltachat_derive::{FromSql, ToSql}; use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; use crate::config::Config; use crate::constants::Viewtype; @@ -146,7 +147,7 @@ impl Job { if let Some((server_uid, server_folder)) = row { match imap - .fetch_single_msg(context, &server_folder, server_uid) + .fetch_single_msg(context, &server_folder, server_uid, msg.rfc724_mid.clone()) .await { ImapActionResult::RetryLater | ImapActionResult::Failed => { @@ -185,6 +186,7 @@ impl Imap { context: &Context, folder: &str, uid: u32, + rfc724_mid: String, ) -> ImapActionResult { if let Some(imapresult) = self .prepare_imap_operation_on_msg(context, folder, uid) @@ -196,8 +198,10 @@ impl Imap { // we are connected, and the folder is selected info!(context, "Downloading message {}/{} fully...", folder, uid); + let mut uid_message_ids: BTreeMap = BTreeMap::new(); + uid_message_ids.insert(uid, rfc724_mid); let (last_uid, _received) = match self - .fetch_many_msgs(context, folder, vec![uid], false, false) + .fetch_many_msgs(context, folder, vec![uid], &uid_message_ids, false, false) .await { Ok(res) => res, @@ -337,7 +341,16 @@ mod tests { Date: Sun, 22 Mar 2020 22:37:57 +0000\ Content-Type: text/plain"; - dc_receive_imf_inner(&t, header.as_bytes(), "INBOX", false, Some(100000), false).await?; + dc_receive_imf_inner( + &t, + "Mr.12345678901@example.com", + header.as_bytes(), + "INBOX", + false, + Some(100000), + false, + ) + .await?; let msg = t.get_last_msg().await; assert_eq!(msg.download_state(), DownloadState::Available); assert_eq!(msg.get_subject(), "foo"); @@ -348,6 +361,7 @@ mod tests { dc_receive_imf_inner( &t, + "Mr.12345678901@example.com", format!("{}\n\n100k text...", header).as_bytes(), "INBOX", false, @@ -377,6 +391,7 @@ mod tests { // download message from bob partially, this must not change the ephemeral timer dc_receive_imf_inner( &t, + "first@example.org", b"From: Bob \n\ To: Alice \n\ Chat-Version: 1.0\n\ diff --git a/src/imap.rs b/src/imap.rs index f483a50be..660b98b5a 100644 --- a/src/imap.rs +++ b/src/imap.rs @@ -28,6 +28,7 @@ use crate::context::Context; use crate::dc_receive_imf::{ dc_receive_imf_inner, from_field_to_contact_id, get_prefetch_parent_message, ReceivedMsg, }; +use crate::dc_tools::dc_create_id; use crate::events::EventType; use crate::headerdef::{HeaderDef, HeaderDefMap}; use crate::job::{self, Action}; @@ -69,6 +70,7 @@ pub enum ImapActionResult { /// not necessarily sent by Delta Chat. const PREFETCH_FLAGS: &str = "(UID RFC822.SIZE BODY.PEEK[HEADER.FIELDS (\ MESSAGE-ID \ + X-MICROSOFT-ORIGINAL-MESSAGE-ID \ FROM \ IN-REPLY-TO REFERENCES \ CHAT-VERSION \ @@ -494,9 +496,8 @@ impl Imap { let msg = fetch?; // Get Message-ID - let message_id = get_fetch_headers(&msg) - .and_then(|headers| prefetch_get_message_id(&headers)) - .ok(); + let message_id = + get_fetch_headers(&msg).map_or(None, |headers| prefetch_get_message_id(&headers)); if let (Some(uid), Some(rfc724_mid)) = (msg.uid, message_id) { msg_ids.insert(uid, rfc724_mid); @@ -688,6 +689,7 @@ impl Imap { let download_limit = context.download_limit().await?; let mut uids_fetch_fully = Vec::with_capacity(msgs.len()); let mut uids_fetch_partially = Vec::with_capacity(msgs.len()); + let mut uid_message_ids = BTreeMap::new(); let mut largest_uid_skipped = None; // Store the info about IMAP messages in the database. @@ -700,7 +702,8 @@ impl Imap { } }; - let message_id = prefetch_get_message_id(&headers).unwrap_or_default(); + // Get the Message-ID or generate a fake one to identify the message in the database. + let message_id = prefetch_get_message_id(&headers).unwrap_or_else(dc_create_id); let target = match target_folder(context, folder, &headers).await? { Some(config) => match context.get_config(config).await? { @@ -772,6 +775,7 @@ impl Imap { } None => uids_fetch_fully.push(uid), } + uid_message_ids.insert(uid, message_id); } else { largest_uid_skipped = Some(uid); } @@ -787,6 +791,7 @@ impl Imap { context, folder, uids_fetch_fully, + &uid_message_ids, false, fetch_existing_msgs, ) @@ -797,6 +802,7 @@ impl Imap { context, folder, uids_fetch_partially, + &uid_message_ids, true, fetch_existing_msgs, ) @@ -1232,6 +1238,7 @@ impl Imap { context: &Context, folder: &str, server_uids: Vec, + uid_message_ids: &BTreeMap, fetch_partially: bool, fetching_existing_messages: bool, ) -> Result<(Option, Vec)> { @@ -1314,8 +1321,19 @@ impl Imap { .context("we checked that message has body right above, but it has vanished")?; let is_seen = msg.flags().any(|flag| flag == Flag::Seen); + let rfc724_mid = if let Some(rfc724_mid) = &uid_message_ids.get(&server_uid) { + rfc724_mid + } else { + warn!( + context, + "No Message-ID corresponding to UID {} passed in uid_messsage_ids", + server_uid + ); + "" + }; match dc_receive_imf_inner( &context, + rfc724_mid, body, &folder, is_seen, @@ -1841,13 +1859,13 @@ fn get_fetch_headers(prefetch_msg: &Fetch) -> Result> } } -fn prefetch_get_message_id(headers: &[mailparse::MailHeader]) -> Result { +fn prefetch_get_message_id(headers: &[mailparse::MailHeader]) -> Option { if let Some(message_id) = headers.get_header_value(HeaderDef::XMicrosoftOriginalMessageId) { - Ok(crate::mimeparser::parse_message_id(&message_id)?) + crate::mimeparser::parse_message_id(&message_id).ok() } else if let Some(message_id) = headers.get_header_value(HeaderDef::MessageId) { - Ok(crate::mimeparser::parse_message_id(&message_id)?) + crate::mimeparser::parse_message_id(&message_id).ok() } else { - bail!("prefetch: No message ID found"); + None } } diff --git a/src/mimeparser.rs b/src/mimeparser.rs index c61d004e5..8b23c552d 100644 --- a/src/mimeparser.rs +++ b/src/mimeparser.rs @@ -1138,7 +1138,7 @@ impl MimeMessage { } } - pub fn get_rfc724_mid(&self) -> Option { + pub(crate) fn get_rfc724_mid(&self) -> Option { self.get_header(HeaderDef::XMicrosoftOriginalMessageId) .or_else(|| self.get_header(HeaderDef::MessageId)) .and_then(|msgid| parse_message_id(msgid).ok())