imap: do not treat messages without Message-ID as duplicates

Message-IDs are now retrieved only during fetching and saved into imap
table. dc_receive_imf_inner does not attempt to extract the Message-ID
anymore.

For messages without Message-ID the ID is now generated in
imap::fetch_new_messages rather than dc_receive_imf_inner,
so the same ID is used in the imap table (maintained by the imap
module) and msgs table (maintained by dc_receive_imf module).

Message-ID generation based on the Date, From and To field hashing has
been replaced with a simple dc_create_id() to avoid retrieving Date,
From, and To fields in the imap module, as it's hard to test that it
stays compatible between Delta Chat versions in this module. This
breaks jump-to-quote for quoted messages without Message-ID, which is
not critical.

Also prefetch X-Microsoft-Original-Message-ID, so retrieval of
duplicate messages with X-Microsoft-Original-Message-ID can be skipped
like it is done for messages with Message-ID header.
This commit is contained in:
link2xt
2022-02-27 12:15:35 +00:00
parent 586d027f86
commit f28fcec81d
5 changed files with 83 additions and 70 deletions

View File

@@ -6,6 +6,9 @@
- change semantics of `dc_get_webxdc_status_updates()` second parameter
and remove update-id from `DC_EVENT_WEBXDC_STATUS_UPDATE` #3081
### Fixes
- do not delete messages without Message-IDs as duplicates #3095
### Changes
- add more SMTP logging #3093
- place common headers like `From:` before the large `Autocrypt:` header #3079

View File

@@ -5,7 +5,7 @@ use std::collections::BTreeSet;
use std::convert::TryFrom;
use anyhow::{bail, ensure, Context as _, Result};
use mailparse::SingleInfo;
use mailparse::{parse_mail, SingleInfo};
use num_traits::FromPrimitive;
use once_cell::sync::Lazy;
use regex::Regex;
@@ -21,7 +21,7 @@ use crate::contact::{
addr_cmp, may_be_valid_addr, normalize_name, Contact, Origin, VerifiedStatus,
};
use crate::context::Context;
use crate::dc_tools::{dc_extract_grpid_from_rfc724_mid, dc_smeared_time};
use crate::dc_tools::{dc_create_id, dc_extract_grpid_from_rfc724_mid, dc_smeared_time};
use crate::download::DownloadState;
use crate::ephemeral::{stock_ephemeral_timer_changed, Timer as EphemeralTimer};
use crate::events::EventType;
@@ -30,7 +30,7 @@ use crate::job::{self, Action};
use crate::log::LogExt;
use crate::message::{self, rfc724_mid_exists, Message, MessageState, MessengerMessage, MsgId};
use crate::mimeparser::{
parse_message_ids, AvatarAction, MailinglistType, MimeMessage, SystemMessage,
parse_message_id, parse_message_ids, AvatarAction, MailinglistType, MimeMessage, SystemMessage,
};
use crate::param::{Param, Params};
use crate::peerstate::{Peerstate, PeerstateKeyType, PeerstateVerifiedStatus};
@@ -56,6 +56,34 @@ pub struct ReceivedMsg {
// Feel free to add more fields here
}
/// Emulates reception of a message from the network.
///
/// This method returns errors on a failure to parse the mail or extract Message-ID. It's only used
/// for tests and REPL tool, not actual message reception pipeline.
pub async fn dc_receive_imf(
context: &Context,
imf_raw: &[u8],
server_folder: &str,
seen: bool,
) -> Result<Option<ReceivedMsg>> {
let mail = parse_mail(imf_raw).context("can't parse mail")?;
let rfc724_mid = mail
.headers
.get_header_value(HeaderDef::MessageId)
.and_then(|msgid| parse_message_id(&msgid).ok())
.unwrap_or_else(dc_create_id);
dc_receive_imf_inner(
context,
&rfc724_mid,
imf_raw,
server_folder,
seen,
None,
false,
)
.await
}
/// Receive a message and add it to the database.
///
/// Returns an error on recoverable errors, e.g. database errors. In this case,
@@ -67,19 +95,12 @@ pub struct ReceivedMsg {
/// downloaded again, sets `chat_id=DC_CHAT_ID_TRASH` and returns `Ok(Some(…))`
/// - If the message is so wrong that we didn't even create a database entry,
/// returns `Ok(None)`
pub async fn dc_receive_imf(
context: &Context,
imf_raw: &[u8],
server_folder: &str,
seen: bool,
) -> Result<Option<ReceivedMsg>> {
dc_receive_imf_inner(context, imf_raw, server_folder, seen, None, false).await
}
///
/// If `is_partial_download` is set, it contains the full message size in bytes.
/// Do not confuse that with `replace_partial_download` that will be set when the full message is loaded later.
pub(crate) async fn dc_receive_imf_inner(
context: &Context,
rfc724_mid: &str,
imf_raw: &[u8],
server_folder: &str,
seen: bool,
@@ -111,17 +132,12 @@ pub(crate) async fn dc_receive_imf_inner(
return Ok(None);
}
let rfc724_mid = mime_parser.get_rfc724_mid().unwrap_or_else(||
// missing Message-IDs may come if the mail was set from this account with another
// client that relies in the SMTP server to generate one.
// true eg. for the Webmailer used in all-inkl-KAS
dc_create_incoming_rfc724_mid(&mime_parser));
info!(context, "received message has Message-Id: {}", rfc724_mid);
// check, if the mail is already in our database.
// make sure, this check is done eg. before securejoin-processing.
let replace_partial_download =
if let Some(old_msg_id) = message::rfc724_mid_exists(context, &rfc724_mid).await? {
if let Some(old_msg_id) = message::rfc724_mid_exists(context, rfc724_mid).await? {
let msg = Message::load_from_db(context, old_msg_id).await?;
if msg.download_state() != DownloadState::Done && is_partial_download.is_none() {
// the mesage was partially downloaded before and is fully downloaded now.
@@ -195,7 +211,7 @@ pub(crate) async fn dc_receive_imf_inner(
incoming,
server_folder,
&to_ids,
&rfc724_mid,
rfc724_mid,
sent_timestamp,
rcvd_timestamp,
from_id,
@@ -2323,28 +2339,6 @@ async fn add_or_lookup_contact_by_addr(
Ok(row_id)
}
/// Creates fake Message-ID to identify the message in the database for
/// messages which does not have one.
///
/// Concatenates Date:, From: and To: fields, then hashes them.
fn dc_create_incoming_rfc724_mid(mime: &MimeMessage) -> String {
format!(
"{}@stub",
hex_hash(&format!(
"{}-{}-{}",
mime.get_header(HeaderDef::Date)
.map(|s| s.to_string())
.unwrap_or_default(),
mime.get_header(HeaderDef::From_)
.map(|s| s.to_string())
.unwrap_or_default(),
mime.get_header(HeaderDef::To)
.map(|s| s.to_string())
.unwrap_or_default()
))
)
}
#[cfg(test)]
mod tests {
@@ -2401,23 +2395,6 @@ mod tests {
assert_eq!(extract_grpid(&mimeparser, HeaderDef::References), grpid);
}
#[async_std::test]
async fn test_dc_create_incoming_rfc724_mid() {
let context = TestContext::new().await;
let raw = b"From: Alice <alice@example.org>\n\
To: Bob <bob@example.org>\n\
Subject: Some subject\n\
hello\n";
let mimeparser = MimeMessage::from_bytes(&context.ctx, &raw[..])
.await
.unwrap();
assert_eq!(
dc_create_incoming_rfc724_mid(&mimeparser),
"ca971a2eefd651f6@stub"
);
}
static MSGRMSG: &[u8] =
b"Received: (Postfix, from userid 1000); Mon, 4 Dec 2006 14:51:39 +0100 (CET)\n\
From: Bob <bob@example.com>\n\

View File

@@ -3,6 +3,7 @@
use anyhow::{anyhow, Result};
use deltachat_derive::{FromSql, ToSql};
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use crate::config::Config;
use crate::constants::Viewtype;
@@ -146,7 +147,7 @@ impl Job {
if let Some((server_uid, server_folder)) = row {
match imap
.fetch_single_msg(context, &server_folder, server_uid)
.fetch_single_msg(context, &server_folder, server_uid, msg.rfc724_mid.clone())
.await
{
ImapActionResult::RetryLater | ImapActionResult::Failed => {
@@ -185,6 +186,7 @@ impl Imap {
context: &Context,
folder: &str,
uid: u32,
rfc724_mid: String,
) -> ImapActionResult {
if let Some(imapresult) = self
.prepare_imap_operation_on_msg(context, folder, uid)
@@ -196,8 +198,10 @@ impl Imap {
// we are connected, and the folder is selected
info!(context, "Downloading message {}/{} fully...", folder, uid);
let mut uid_message_ids: BTreeMap<u32, String> = BTreeMap::new();
uid_message_ids.insert(uid, rfc724_mid);
let (last_uid, _received) = match self
.fetch_many_msgs(context, folder, vec![uid], false, false)
.fetch_many_msgs(context, folder, vec![uid], &uid_message_ids, false, false)
.await
{
Ok(res) => res,
@@ -337,7 +341,16 @@ mod tests {
Date: Sun, 22 Mar 2020 22:37:57 +0000\
Content-Type: text/plain";
dc_receive_imf_inner(&t, header.as_bytes(), "INBOX", false, Some(100000), false).await?;
dc_receive_imf_inner(
&t,
"Mr.12345678901@example.com",
header.as_bytes(),
"INBOX",
false,
Some(100000),
false,
)
.await?;
let msg = t.get_last_msg().await;
assert_eq!(msg.download_state(), DownloadState::Available);
assert_eq!(msg.get_subject(), "foo");
@@ -348,6 +361,7 @@ mod tests {
dc_receive_imf_inner(
&t,
"Mr.12345678901@example.com",
format!("{}\n\n100k text...", header).as_bytes(),
"INBOX",
false,
@@ -377,6 +391,7 @@ mod tests {
// download message from bob partially, this must not change the ephemeral timer
dc_receive_imf_inner(
&t,
"first@example.org",
b"From: Bob <bob@example.org>\n\
To: Alice <alice@example.org>\n\
Chat-Version: 1.0\n\

View File

@@ -28,6 +28,7 @@ use crate::context::Context;
use crate::dc_receive_imf::{
dc_receive_imf_inner, from_field_to_contact_id, get_prefetch_parent_message, ReceivedMsg,
};
use crate::dc_tools::dc_create_id;
use crate::events::EventType;
use crate::headerdef::{HeaderDef, HeaderDefMap};
use crate::job::{self, Action};
@@ -69,6 +70,7 @@ pub enum ImapActionResult {
/// not necessarily sent by Delta Chat.
const PREFETCH_FLAGS: &str = "(UID RFC822.SIZE BODY.PEEK[HEADER.FIELDS (\
MESSAGE-ID \
X-MICROSOFT-ORIGINAL-MESSAGE-ID \
FROM \
IN-REPLY-TO REFERENCES \
CHAT-VERSION \
@@ -494,9 +496,8 @@ impl Imap {
let msg = fetch?;
// Get Message-ID
let message_id = get_fetch_headers(&msg)
.and_then(|headers| prefetch_get_message_id(&headers))
.ok();
let message_id =
get_fetch_headers(&msg).map_or(None, |headers| prefetch_get_message_id(&headers));
if let (Some(uid), Some(rfc724_mid)) = (msg.uid, message_id) {
msg_ids.insert(uid, rfc724_mid);
@@ -688,6 +689,7 @@ impl Imap {
let download_limit = context.download_limit().await?;
let mut uids_fetch_fully = Vec::with_capacity(msgs.len());
let mut uids_fetch_partially = Vec::with_capacity(msgs.len());
let mut uid_message_ids = BTreeMap::new();
let mut largest_uid_skipped = None;
// Store the info about IMAP messages in the database.
@@ -700,7 +702,8 @@ impl Imap {
}
};
let message_id = prefetch_get_message_id(&headers).unwrap_or_default();
// Get the Message-ID or generate a fake one to identify the message in the database.
let message_id = prefetch_get_message_id(&headers).unwrap_or_else(dc_create_id);
let target = match target_folder(context, folder, &headers).await? {
Some(config) => match context.get_config(config).await? {
@@ -772,6 +775,7 @@ impl Imap {
}
None => uids_fetch_fully.push(uid),
}
uid_message_ids.insert(uid, message_id);
} else {
largest_uid_skipped = Some(uid);
}
@@ -787,6 +791,7 @@ impl Imap {
context,
folder,
uids_fetch_fully,
&uid_message_ids,
false,
fetch_existing_msgs,
)
@@ -797,6 +802,7 @@ impl Imap {
context,
folder,
uids_fetch_partially,
&uid_message_ids,
true,
fetch_existing_msgs,
)
@@ -1232,6 +1238,7 @@ impl Imap {
context: &Context,
folder: &str,
server_uids: Vec<u32>,
uid_message_ids: &BTreeMap<u32, String>,
fetch_partially: bool,
fetching_existing_messages: bool,
) -> Result<(Option<u32>, Vec<ReceivedMsg>)> {
@@ -1314,8 +1321,19 @@ impl Imap {
.context("we checked that message has body right above, but it has vanished")?;
let is_seen = msg.flags().any(|flag| flag == Flag::Seen);
let rfc724_mid = if let Some(rfc724_mid) = &uid_message_ids.get(&server_uid) {
rfc724_mid
} else {
warn!(
context,
"No Message-ID corresponding to UID {} passed in uid_messsage_ids",
server_uid
);
""
};
match dc_receive_imf_inner(
&context,
rfc724_mid,
body,
&folder,
is_seen,
@@ -1841,13 +1859,13 @@ fn get_fetch_headers(prefetch_msg: &Fetch) -> Result<Vec<mailparse::MailHeader>>
}
}
fn prefetch_get_message_id(headers: &[mailparse::MailHeader]) -> Result<String> {
fn prefetch_get_message_id(headers: &[mailparse::MailHeader]) -> Option<String> {
if let Some(message_id) = headers.get_header_value(HeaderDef::XMicrosoftOriginalMessageId) {
Ok(crate::mimeparser::parse_message_id(&message_id)?)
crate::mimeparser::parse_message_id(&message_id).ok()
} else if let Some(message_id) = headers.get_header_value(HeaderDef::MessageId) {
Ok(crate::mimeparser::parse_message_id(&message_id)?)
crate::mimeparser::parse_message_id(&message_id).ok()
} else {
bail!("prefetch: No message ID found");
None
}
}

View File

@@ -1138,7 +1138,7 @@ impl MimeMessage {
}
}
pub fn get_rfc724_mid(&self) -> Option<String> {
pub(crate) fn get_rfc724_mid(&self) -> Option<String> {
self.get_header(HeaderDef::XMicrosoftOriginalMessageId)
.or_else(|| self.get_header(HeaderDef::MessageId))
.and_then(|msgid| parse_message_id(msgid).ok())