Don't download ranges of messages (i.e. first:last) (#2061)

This commit is contained in:
Hocuri
2020-12-03 10:19:56 +01:00
committed by GitHub
parent e79533ae54
commit 7c9624e822

View File

@@ -684,7 +684,7 @@ impl Imap {
// check passed, go fetch the emails // check passed, go fetch the emails
let (new_last_seen_uid_processed, error_cnt) = self let (new_last_seen_uid_processed, error_cnt) = self
.fetch_many_msgs(context, &folder, &uids, fetch_existing_msgs) .fetch_many_msgs(context, &folder, uids, fetch_existing_msgs)
.await; .await;
read_errors += error_cnt; read_errors += error_cnt;
@@ -722,16 +722,14 @@ impl Imap {
.ok_or_else(|| format_err!("Not configured"))?; .ok_or_else(|| format_err!("Not configured"))?;
let search_command = format!("FROM \"{}\"", self_addr); let search_command = format!("FROM \"{}\"", self_addr);
let uids = session.uid_search(search_command).await?; let uids = session
let uid_strings: Vec<String> = uids.into_iter().map(|s| s.to_string()).collect(); .uid_search(search_command)
.await?
.into_iter()
.collect();
let mut result = Vec::new(); let mut result = Vec::new();
// We fetch the emails in chunks of 100 because according to https://tools.ietf.org/html/rfc2683#section-3.2.1.5 for uid_set in &build_sequence_sets(uids) {
// command lines should not be much more than 1000 chars and UIDs can get up to 9- or 10-digit
// (servers should allow at least 8000 chars)
for uid_chunk in uid_strings.chunks(100) {
let uid_set = uid_chunk.join(",");
let mut list = session let mut list = session
.uid_fetch(uid_set, "(UID BODY.PEEK[HEADER.FIELDS (FROM TO CC BCC)])") .uid_fetch(uid_set, "(UID BODY.PEEK[HEADER.FIELDS (FROM TO CC BCC)])")
.await .await
@@ -848,20 +846,12 @@ impl Imap {
&mut self, &mut self,
context: &Context, context: &Context,
folder: S, folder: S,
server_uids: &[u32], server_uids: Vec<u32>,
fetching_existing_messages: bool, fetching_existing_messages: bool,
) -> (Option<u32>, usize) { ) -> (Option<u32>, usize) {
let set = match server_uids { if server_uids.is_empty() {
[] => return (None, 0), return (None, 0);
[server_uid] => server_uid.to_string(),
[first_uid, .., last_uid] => {
// XXX: it is assumed that UIDs are sorted and
// contiguous. If UIDs are not contiguous, more
// messages than needed will be downloaded.
debug_assert!(first_uid < last_uid, "uids must be sorted");
format!("{}:{}", first_uid, last_uid)
} }
};
if !self.is_connected() { if !self.is_connected() {
warn!(context, "Not connected"); warn!(context, "Not connected");
@@ -877,6 +867,12 @@ impl Imap {
let session = self.session.as_mut().unwrap(); let session = self.session.as_mut().unwrap();
let sets = build_sequence_sets(server_uids.clone());
let mut read_errors = 0;
let mut count = 0;
let mut last_uid = None;
for set in sets.iter() {
let mut msgs = match session.uid_fetch(&set, BODY_FLAGS).await { let mut msgs = match session.uid_fetch(&set, BODY_FLAGS).await {
Ok(msgs) => msgs, Ok(msgs) => msgs,
Err(err) => { Err(err) => {
@@ -896,15 +892,17 @@ impl Imap {
let folder = folder.as_ref().to_string(); let folder = folder.as_ref().to_string();
let mut read_errors = 0;
let mut last_uid = None;
let mut count = 0;
while let Some(Ok(msg)) = msgs.next().await { while let Some(Ok(msg)) = msgs.next().await {
let server_uid = msg.uid.unwrap_or_default(); let server_uid = msg.uid.unwrap_or_default();
if !server_uids.contains(&server_uid) { if !server_uids.contains(&server_uid) {
// skip if there are some in between we are not interested in warn!(
context,
"Got unwanted uid {} not in {:?}, requested {:?}",
&server_uid,
server_uids,
&sets
);
continue; continue;
} }
count += 1; count += 1;
@@ -940,13 +938,16 @@ impl Imap {
} }
}; };
} }
}
if count != server_uids.len() { if count != server_uids.len() {
warn!( warn!(
context, context,
"failed to fetch all uids: got {}, requested {}", "failed to fetch all uids: got {}, requested {}, we requested the UIDs {:?} using {:?}",
count, count,
server_uids.len() server_uids.len(),
server_uids,
sets
); );
} }
@@ -1703,6 +1704,64 @@ async fn get_config_last_seen_uid<S: AsRef<str>>(context: &Context, folder: S) -
} }
} }
/// Builds a list of sequence/uid sets. The returned sets have each no more than around 1000
/// characters because according to https://tools.ietf.org/html/rfc2683#section-3.2.1.5
/// command lines should not be much more than 1000 chars (servers should allow at least 8000 chars)
fn build_sequence_sets(mut uids: Vec<u32>) -> Vec<String> {
uids.sort_unstable();
// first, try to find consecutive ranges:
let mut ranges: Vec<UidRange> = vec![];
for current in uids {
if let Some(last) = ranges.last_mut() {
if last.end + 1 == current {
last.end = current;
continue;
}
}
ranges.push(UidRange {
start: current,
end: current,
});
}
// Second, sort the uids into uid sets that are each below ~1000 characters
let mut result = vec![String::new()];
for range in ranges {
if let Some(last) = result.last_mut() {
if !last.is_empty() {
last.push(',');
}
last.push_str(&range.to_string());
if last.len() > 990 {
result.push(String::new()); // Start a new uid set
}
}
}
result.retain(|s| !s.is_empty());
result
}
struct UidRange {
start: u32,
end: u32,
// If start == end, then this range represents a single number
}
impl std::fmt::Display for UidRange {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if self.start == self.end {
write!(f, "{}", self.start)
} else {
write!(f, "{}:{}", self.start, self.end)
}
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@@ -1730,4 +1789,64 @@ mod tests {
); );
assert_eq!(get_folder_meaning_by_name("xxx"), FolderMeaning::Unknown); assert_eq!(get_folder_meaning_by_name("xxx"), FolderMeaning::Unknown);
} }
#[test]
fn test_build_sequence_sets() {
let cases = vec![
(vec![], vec![]),
(vec![1], vec!["1"]),
(vec![3291], vec!["3291"]),
(vec![1, 3, 5, 7, 9, 11], vec!["1,3,5,7,9,11"]),
(vec![1, 2, 3], vec!["1:3"]),
(vec![1, 4, 5, 6], vec!["1,4:6"]),
((1..=500).collect(), vec!["1:500"]),
(vec![3, 4, 8, 9, 10, 11, 39, 50, 2], vec!["2:4,8:11,39,50"]),
];
for (input, output) in cases {
assert_eq!(build_sequence_sets(input), output);
}
let numbers: Vec<_> = (2..=500).step_by(2).collect();
let result = build_sequence_sets(numbers.clone());
for set in &result {
assert!(set.len() < 1010);
assert!(!set.ends_with(','));
assert!(!set.starts_with(','));
}
assert!(result.len() == 1); // these UIDs fit in one set
for number in &numbers {
assert!(result
.iter()
.any(|set| set.split(',').any(|n| n.parse::<u32>().unwrap() == *number)));
}
let numbers: Vec<_> = (1..=1000).step_by(3).collect();
let result = build_sequence_sets(numbers.clone());
for set in &result {
assert!(set.len() < 1010);
assert!(!set.ends_with(','));
assert!(!set.starts_with(','));
}
assert!(result.last().unwrap().ends_with("997,1000"));
assert!(result.len() == 2); // This time we need 2 sets
for number in &numbers {
assert!(result
.iter()
.any(|set| set.split(',').any(|n| n.parse::<u32>().unwrap() == *number)));
}
let numbers: Vec<_> = (30000000..=30002500).step_by(4).collect();
let result = build_sequence_sets(numbers.clone());
for set in &result {
assert!(set.len() < 1010);
assert!(!set.ends_with(','));
assert!(!set.starts_with(','));
}
assert_eq!(result.len(), 6);
for number in &numbers {
assert!(result
.iter()
.any(|set| set.split(',').any(|n| n.parse::<u32>().unwrap() == *number)));
}
}
} }