mirror of
https://github.com/chatmail/core.git
synced 2026-05-03 13:26:28 +03:00
imap: do not treat messages without Message-ID as duplicates
Message-IDs are now retrieved only during fetching and saved into imap table. dc_receive_imf_inner does not attempt to extract the Message-ID anymore. For messages without Message-ID the ID is now generated in imap::fetch_new_messages rather than dc_receive_imf_inner, so the same ID is used in the imap table (maintained by the imap module) and msgs table (maintained by dc_receive_imf module). Message-ID generation based on the Date, From and To field hashing has been replaced with a simple dc_create_id() to avoid retrieving Date, From, and To fields in the imap module, as it's hard to test that it stays compatible between Delta Chat versions in this module. This breaks jump-to-quote for quoted messages without Message-ID, which is not critical. Also prefetch X-Microsoft-Original-Message-ID, so retrieval of duplicate messages with X-Microsoft-Original-Message-ID can be skipped like it is done for messages with Message-ID header.
This commit is contained in:
@@ -6,6 +6,9 @@
|
||||
- change semantics of `dc_get_webxdc_status_updates()` second parameter
|
||||
and remove update-id from `DC_EVENT_WEBXDC_STATUS_UPDATE` #3081
|
||||
|
||||
### Fixes
|
||||
- do not delete messages without Message-IDs as duplicates #3095
|
||||
|
||||
### Changes
|
||||
- add more SMTP logging #3093
|
||||
- place common headers like `From:` before the large `Autocrypt:` header #3079
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::collections::BTreeSet;
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use anyhow::{bail, ensure, Context as _, Result};
|
||||
use mailparse::SingleInfo;
|
||||
use mailparse::{parse_mail, SingleInfo};
|
||||
use num_traits::FromPrimitive;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
@@ -21,7 +21,7 @@ use crate::contact::{
|
||||
addr_cmp, may_be_valid_addr, normalize_name, Contact, Origin, VerifiedStatus,
|
||||
};
|
||||
use crate::context::Context;
|
||||
use crate::dc_tools::{dc_extract_grpid_from_rfc724_mid, dc_smeared_time};
|
||||
use crate::dc_tools::{dc_create_id, dc_extract_grpid_from_rfc724_mid, dc_smeared_time};
|
||||
use crate::download::DownloadState;
|
||||
use crate::ephemeral::{stock_ephemeral_timer_changed, Timer as EphemeralTimer};
|
||||
use crate::events::EventType;
|
||||
@@ -30,7 +30,7 @@ use crate::job::{self, Action};
|
||||
use crate::log::LogExt;
|
||||
use crate::message::{self, rfc724_mid_exists, Message, MessageState, MessengerMessage, MsgId};
|
||||
use crate::mimeparser::{
|
||||
parse_message_ids, AvatarAction, MailinglistType, MimeMessage, SystemMessage,
|
||||
parse_message_id, parse_message_ids, AvatarAction, MailinglistType, MimeMessage, SystemMessage,
|
||||
};
|
||||
use crate::param::{Param, Params};
|
||||
use crate::peerstate::{Peerstate, PeerstateKeyType, PeerstateVerifiedStatus};
|
||||
@@ -56,6 +56,34 @@ pub struct ReceivedMsg {
|
||||
// Feel free to add more fields here
|
||||
}
|
||||
|
||||
/// Emulates reception of a message from the network.
|
||||
///
|
||||
/// This method returns errors on a failure to parse the mail or extract Message-ID. It's only used
|
||||
/// for tests and REPL tool, not actual message reception pipeline.
|
||||
pub async fn dc_receive_imf(
|
||||
context: &Context,
|
||||
imf_raw: &[u8],
|
||||
server_folder: &str,
|
||||
seen: bool,
|
||||
) -> Result<Option<ReceivedMsg>> {
|
||||
let mail = parse_mail(imf_raw).context("can't parse mail")?;
|
||||
let rfc724_mid = mail
|
||||
.headers
|
||||
.get_header_value(HeaderDef::MessageId)
|
||||
.and_then(|msgid| parse_message_id(&msgid).ok())
|
||||
.unwrap_or_else(dc_create_id);
|
||||
dc_receive_imf_inner(
|
||||
context,
|
||||
&rfc724_mid,
|
||||
imf_raw,
|
||||
server_folder,
|
||||
seen,
|
||||
None,
|
||||
false,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Receive a message and add it to the database.
|
||||
///
|
||||
/// Returns an error on recoverable errors, e.g. database errors. In this case,
|
||||
@@ -67,19 +95,12 @@ pub struct ReceivedMsg {
|
||||
/// downloaded again, sets `chat_id=DC_CHAT_ID_TRASH` and returns `Ok(Some(…))`
|
||||
/// - If the message is so wrong that we didn't even create a database entry,
|
||||
/// returns `Ok(None)`
|
||||
pub async fn dc_receive_imf(
|
||||
context: &Context,
|
||||
imf_raw: &[u8],
|
||||
server_folder: &str,
|
||||
seen: bool,
|
||||
) -> Result<Option<ReceivedMsg>> {
|
||||
dc_receive_imf_inner(context, imf_raw, server_folder, seen, None, false).await
|
||||
}
|
||||
|
||||
///
|
||||
/// If `is_partial_download` is set, it contains the full message size in bytes.
|
||||
/// Do not confuse that with `replace_partial_download` that will be set when the full message is loaded later.
|
||||
pub(crate) async fn dc_receive_imf_inner(
|
||||
context: &Context,
|
||||
rfc724_mid: &str,
|
||||
imf_raw: &[u8],
|
||||
server_folder: &str,
|
||||
seen: bool,
|
||||
@@ -111,17 +132,12 @@ pub(crate) async fn dc_receive_imf_inner(
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let rfc724_mid = mime_parser.get_rfc724_mid().unwrap_or_else(||
|
||||
// missing Message-IDs may come if the mail was set from this account with another
|
||||
// client that relies in the SMTP server to generate one.
|
||||
// true eg. for the Webmailer used in all-inkl-KAS
|
||||
dc_create_incoming_rfc724_mid(&mime_parser));
|
||||
info!(context, "received message has Message-Id: {}", rfc724_mid);
|
||||
|
||||
// check, if the mail is already in our database.
|
||||
// make sure, this check is done eg. before securejoin-processing.
|
||||
let replace_partial_download =
|
||||
if let Some(old_msg_id) = message::rfc724_mid_exists(context, &rfc724_mid).await? {
|
||||
if let Some(old_msg_id) = message::rfc724_mid_exists(context, rfc724_mid).await? {
|
||||
let msg = Message::load_from_db(context, old_msg_id).await?;
|
||||
if msg.download_state() != DownloadState::Done && is_partial_download.is_none() {
|
||||
// the mesage was partially downloaded before and is fully downloaded now.
|
||||
@@ -195,7 +211,7 @@ pub(crate) async fn dc_receive_imf_inner(
|
||||
incoming,
|
||||
server_folder,
|
||||
&to_ids,
|
||||
&rfc724_mid,
|
||||
rfc724_mid,
|
||||
sent_timestamp,
|
||||
rcvd_timestamp,
|
||||
from_id,
|
||||
@@ -2323,28 +2339,6 @@ async fn add_or_lookup_contact_by_addr(
|
||||
Ok(row_id)
|
||||
}
|
||||
|
||||
/// Creates fake Message-ID to identify the message in the database for
|
||||
/// messages which does not have one.
|
||||
///
|
||||
/// Concatenates Date:, From: and To: fields, then hashes them.
|
||||
fn dc_create_incoming_rfc724_mid(mime: &MimeMessage) -> String {
|
||||
format!(
|
||||
"{}@stub",
|
||||
hex_hash(&format!(
|
||||
"{}-{}-{}",
|
||||
mime.get_header(HeaderDef::Date)
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_default(),
|
||||
mime.get_header(HeaderDef::From_)
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_default(),
|
||||
mime.get_header(HeaderDef::To)
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_default()
|
||||
))
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
@@ -2401,23 +2395,6 @@ mod tests {
|
||||
assert_eq!(extract_grpid(&mimeparser, HeaderDef::References), grpid);
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn test_dc_create_incoming_rfc724_mid() {
|
||||
let context = TestContext::new().await;
|
||||
let raw = b"From: Alice <alice@example.org>\n\
|
||||
To: Bob <bob@example.org>\n\
|
||||
Subject: Some subject\n\
|
||||
hello\n";
|
||||
let mimeparser = MimeMessage::from_bytes(&context.ctx, &raw[..])
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
dc_create_incoming_rfc724_mid(&mimeparser),
|
||||
"ca971a2eefd651f6@stub"
|
||||
);
|
||||
}
|
||||
|
||||
static MSGRMSG: &[u8] =
|
||||
b"Received: (Postfix, from userid 1000); Mon, 4 Dec 2006 14:51:39 +0100 (CET)\n\
|
||||
From: Bob <bob@example.com>\n\
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
use anyhow::{anyhow, Result};
|
||||
use deltachat_derive::{FromSql, ToSql};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::config::Config;
|
||||
use crate::constants::Viewtype;
|
||||
@@ -146,7 +147,7 @@ impl Job {
|
||||
|
||||
if let Some((server_uid, server_folder)) = row {
|
||||
match imap
|
||||
.fetch_single_msg(context, &server_folder, server_uid)
|
||||
.fetch_single_msg(context, &server_folder, server_uid, msg.rfc724_mid.clone())
|
||||
.await
|
||||
{
|
||||
ImapActionResult::RetryLater | ImapActionResult::Failed => {
|
||||
@@ -185,6 +186,7 @@ impl Imap {
|
||||
context: &Context,
|
||||
folder: &str,
|
||||
uid: u32,
|
||||
rfc724_mid: String,
|
||||
) -> ImapActionResult {
|
||||
if let Some(imapresult) = self
|
||||
.prepare_imap_operation_on_msg(context, folder, uid)
|
||||
@@ -196,8 +198,10 @@ impl Imap {
|
||||
// we are connected, and the folder is selected
|
||||
info!(context, "Downloading message {}/{} fully...", folder, uid);
|
||||
|
||||
let mut uid_message_ids: BTreeMap<u32, String> = BTreeMap::new();
|
||||
uid_message_ids.insert(uid, rfc724_mid);
|
||||
let (last_uid, _received) = match self
|
||||
.fetch_many_msgs(context, folder, vec![uid], false, false)
|
||||
.fetch_many_msgs(context, folder, vec![uid], &uid_message_ids, false, false)
|
||||
.await
|
||||
{
|
||||
Ok(res) => res,
|
||||
@@ -337,7 +341,16 @@ mod tests {
|
||||
Date: Sun, 22 Mar 2020 22:37:57 +0000\
|
||||
Content-Type: text/plain";
|
||||
|
||||
dc_receive_imf_inner(&t, header.as_bytes(), "INBOX", false, Some(100000), false).await?;
|
||||
dc_receive_imf_inner(
|
||||
&t,
|
||||
"Mr.12345678901@example.com",
|
||||
header.as_bytes(),
|
||||
"INBOX",
|
||||
false,
|
||||
Some(100000),
|
||||
false,
|
||||
)
|
||||
.await?;
|
||||
let msg = t.get_last_msg().await;
|
||||
assert_eq!(msg.download_state(), DownloadState::Available);
|
||||
assert_eq!(msg.get_subject(), "foo");
|
||||
@@ -348,6 +361,7 @@ mod tests {
|
||||
|
||||
dc_receive_imf_inner(
|
||||
&t,
|
||||
"Mr.12345678901@example.com",
|
||||
format!("{}\n\n100k text...", header).as_bytes(),
|
||||
"INBOX",
|
||||
false,
|
||||
@@ -377,6 +391,7 @@ mod tests {
|
||||
// download message from bob partially, this must not change the ephemeral timer
|
||||
dc_receive_imf_inner(
|
||||
&t,
|
||||
"first@example.org",
|
||||
b"From: Bob <bob@example.org>\n\
|
||||
To: Alice <alice@example.org>\n\
|
||||
Chat-Version: 1.0\n\
|
||||
|
||||
34
src/imap.rs
34
src/imap.rs
@@ -28,6 +28,7 @@ use crate::context::Context;
|
||||
use crate::dc_receive_imf::{
|
||||
dc_receive_imf_inner, from_field_to_contact_id, get_prefetch_parent_message, ReceivedMsg,
|
||||
};
|
||||
use crate::dc_tools::dc_create_id;
|
||||
use crate::events::EventType;
|
||||
use crate::headerdef::{HeaderDef, HeaderDefMap};
|
||||
use crate::job::{self, Action};
|
||||
@@ -69,6 +70,7 @@ pub enum ImapActionResult {
|
||||
/// not necessarily sent by Delta Chat.
|
||||
const PREFETCH_FLAGS: &str = "(UID RFC822.SIZE BODY.PEEK[HEADER.FIELDS (\
|
||||
MESSAGE-ID \
|
||||
X-MICROSOFT-ORIGINAL-MESSAGE-ID \
|
||||
FROM \
|
||||
IN-REPLY-TO REFERENCES \
|
||||
CHAT-VERSION \
|
||||
@@ -494,9 +496,8 @@ impl Imap {
|
||||
let msg = fetch?;
|
||||
|
||||
// Get Message-ID
|
||||
let message_id = get_fetch_headers(&msg)
|
||||
.and_then(|headers| prefetch_get_message_id(&headers))
|
||||
.ok();
|
||||
let message_id =
|
||||
get_fetch_headers(&msg).map_or(None, |headers| prefetch_get_message_id(&headers));
|
||||
|
||||
if let (Some(uid), Some(rfc724_mid)) = (msg.uid, message_id) {
|
||||
msg_ids.insert(uid, rfc724_mid);
|
||||
@@ -688,6 +689,7 @@ impl Imap {
|
||||
let download_limit = context.download_limit().await?;
|
||||
let mut uids_fetch_fully = Vec::with_capacity(msgs.len());
|
||||
let mut uids_fetch_partially = Vec::with_capacity(msgs.len());
|
||||
let mut uid_message_ids = BTreeMap::new();
|
||||
let mut largest_uid_skipped = None;
|
||||
|
||||
// Store the info about IMAP messages in the database.
|
||||
@@ -700,7 +702,8 @@ impl Imap {
|
||||
}
|
||||
};
|
||||
|
||||
let message_id = prefetch_get_message_id(&headers).unwrap_or_default();
|
||||
// Get the Message-ID or generate a fake one to identify the message in the database.
|
||||
let message_id = prefetch_get_message_id(&headers).unwrap_or_else(dc_create_id);
|
||||
|
||||
let target = match target_folder(context, folder, &headers).await? {
|
||||
Some(config) => match context.get_config(config).await? {
|
||||
@@ -772,6 +775,7 @@ impl Imap {
|
||||
}
|
||||
None => uids_fetch_fully.push(uid),
|
||||
}
|
||||
uid_message_ids.insert(uid, message_id);
|
||||
} else {
|
||||
largest_uid_skipped = Some(uid);
|
||||
}
|
||||
@@ -787,6 +791,7 @@ impl Imap {
|
||||
context,
|
||||
folder,
|
||||
uids_fetch_fully,
|
||||
&uid_message_ids,
|
||||
false,
|
||||
fetch_existing_msgs,
|
||||
)
|
||||
@@ -797,6 +802,7 @@ impl Imap {
|
||||
context,
|
||||
folder,
|
||||
uids_fetch_partially,
|
||||
&uid_message_ids,
|
||||
true,
|
||||
fetch_existing_msgs,
|
||||
)
|
||||
@@ -1232,6 +1238,7 @@ impl Imap {
|
||||
context: &Context,
|
||||
folder: &str,
|
||||
server_uids: Vec<u32>,
|
||||
uid_message_ids: &BTreeMap<u32, String>,
|
||||
fetch_partially: bool,
|
||||
fetching_existing_messages: bool,
|
||||
) -> Result<(Option<u32>, Vec<ReceivedMsg>)> {
|
||||
@@ -1314,8 +1321,19 @@ impl Imap {
|
||||
.context("we checked that message has body right above, but it has vanished")?;
|
||||
let is_seen = msg.flags().any(|flag| flag == Flag::Seen);
|
||||
|
||||
let rfc724_mid = if let Some(rfc724_mid) = &uid_message_ids.get(&server_uid) {
|
||||
rfc724_mid
|
||||
} else {
|
||||
warn!(
|
||||
context,
|
||||
"No Message-ID corresponding to UID {} passed in uid_messsage_ids",
|
||||
server_uid
|
||||
);
|
||||
""
|
||||
};
|
||||
match dc_receive_imf_inner(
|
||||
&context,
|
||||
rfc724_mid,
|
||||
body,
|
||||
&folder,
|
||||
is_seen,
|
||||
@@ -1841,13 +1859,13 @@ fn get_fetch_headers(prefetch_msg: &Fetch) -> Result<Vec<mailparse::MailHeader>>
|
||||
}
|
||||
}
|
||||
|
||||
fn prefetch_get_message_id(headers: &[mailparse::MailHeader]) -> Result<String> {
|
||||
fn prefetch_get_message_id(headers: &[mailparse::MailHeader]) -> Option<String> {
|
||||
if let Some(message_id) = headers.get_header_value(HeaderDef::XMicrosoftOriginalMessageId) {
|
||||
Ok(crate::mimeparser::parse_message_id(&message_id)?)
|
||||
crate::mimeparser::parse_message_id(&message_id).ok()
|
||||
} else if let Some(message_id) = headers.get_header_value(HeaderDef::MessageId) {
|
||||
Ok(crate::mimeparser::parse_message_id(&message_id)?)
|
||||
crate::mimeparser::parse_message_id(&message_id).ok()
|
||||
} else {
|
||||
bail!("prefetch: No message ID found");
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1138,7 +1138,7 @@ impl MimeMessage {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_rfc724_mid(&self) -> Option<String> {
|
||||
pub(crate) fn get_rfc724_mid(&self) -> Option<String> {
|
||||
self.get_header(HeaderDef::XMicrosoftOriginalMessageId)
|
||||
.or_else(|| self.get_header(HeaderDef::MessageId))
|
||||
.and_then(|msgid| parse_message_id(msgid).ok())
|
||||
|
||||
Reference in New Issue
Block a user