mirror of
https://github.com/chatmail/core.git
synced 2026-04-20 15:06:30 +03:00
File deduplication (#6332)
When receiving messages, blobs will be deduplicated with the new function `create_and_deduplicate_from_bytes()`. For sending files, this adds a new function `set_file_and_deduplicate()` instead of deduplicating by default. This is for https://github.com/deltachat/deltachat-core-rust/issues/6265; read the issue description there for more details. TODO: - [x] Set files as read-only - [x] Don't do a write when the file is already identical - [x] The first 32 chars or so of the 64-character hash are enough. I calculated that if 10b people (i.e. all of humanity) use DC, and each of them has 200k distinct blob files (I have 4k in my day-to-day account), and we used 20 chars, then the expected value for the number of name collisions would be ~0.0002 (and the probability that there is a least one name collision is lower than that) [^1]. I added 12 more characters to be on the super safe side, but this wouldn't be necessary and I could also make it 20 instead of 32. - Not 100% sure whether that's necessary at all - it would mainly be necessary if we might hit a length limit on some file systems (the blobdir is usually sth like `accounts/2ff9fc096d2f46b6832b24a1ed99c0d6/dc.db-blobs` (53 chars), plus 64 chars for the filename would be 117). - [x] "touch" the files to prevent them from being deleted - [x] TODOs in the code For later PRs: - Replace `BlobObject::create(…)` with `BlobObject::create_and_deduplicate(…)` in order to deduplicate everytime core creates a file - Modify JsonRPC to deduplicate blob files - Possibly rename BlobObject.name to BlobObject.file in order to prevent confusion (because `name` usually means "user-visible-name", not "name of the file on disk"). [^1]: Calculated with both https://printfn.github.io/fend/ and https://www.geogebra.org/calculator, both of which came to the same result ([1](https://github.com/user-attachments/assets/bbb62550-3781-48b5-88b1-ba0e29c28c0d), [2](https://github.com/user-attachments/assets/82171212-b797-4117-a39f-0e132eac7252)) --------- Co-authored-by: l <link2xt@testrun.org>
This commit is contained in:
@@ -286,6 +286,8 @@ impl Message {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::PathBuf;
|
||||
|
||||
use super::*;
|
||||
use crate::chat::ChatId;
|
||||
use crate::param::Param;
|
||||
@@ -305,62 +307,90 @@ mod tests {
|
||||
.unwrap();
|
||||
let some_text = " bla \t\n\tbla\n\t".to_string();
|
||||
|
||||
async fn write_file_to_blobdir(d: &TestContext) -> PathBuf {
|
||||
let bytes = &[38, 209, 39, 29]; // Just some random bytes
|
||||
let file = d.get_blobdir().join("random_filename_392438");
|
||||
tokio::fs::write(&file, bytes).await.unwrap();
|
||||
file
|
||||
}
|
||||
|
||||
let msg = Message::new_text(some_text.to_string());
|
||||
assert_summary_texts(&msg, ctx, "bla bla").await; // for simple text, the type is not added to the summary
|
||||
|
||||
let file = write_file_to_blobdir(&d).await;
|
||||
let mut msg = Message::new(Viewtype::Image);
|
||||
msg.set_file("foo.jpg", None);
|
||||
msg.set_file_and_deduplicate(&d, &file, Some("foo.jpg"), None)
|
||||
.unwrap();
|
||||
assert_summary_texts(&msg, ctx, "📷 Image").await; // file names are not added for images
|
||||
|
||||
let file = write_file_to_blobdir(&d).await;
|
||||
let mut msg = Message::new(Viewtype::Image);
|
||||
msg.set_text(some_text.to_string());
|
||||
msg.set_file("foo.jpg", None);
|
||||
msg.set_file_and_deduplicate(&d, &file, Some("foo.jpg"), None)
|
||||
.unwrap();
|
||||
assert_summary_texts(&msg, ctx, "📷 bla bla").await; // type is visible by emoji if text is set
|
||||
|
||||
let file = write_file_to_blobdir(&d).await;
|
||||
let mut msg = Message::new(Viewtype::Video);
|
||||
msg.set_file("foo.mp4", None);
|
||||
msg.set_file_and_deduplicate(&d, &file, Some("foo.mp4"), None)
|
||||
.unwrap();
|
||||
assert_summary_texts(&msg, ctx, "🎥 Video").await; // file names are not added for videos
|
||||
|
||||
let file = write_file_to_blobdir(&d).await;
|
||||
let mut msg = Message::new(Viewtype::Video);
|
||||
msg.set_text(some_text.to_string());
|
||||
msg.set_file("foo.mp4", None);
|
||||
msg.set_file_and_deduplicate(&d, &file, Some("foo.mp4"), None)
|
||||
.unwrap();
|
||||
assert_summary_texts(&msg, ctx, "🎥 bla bla").await; // type is visible by emoji if text is set
|
||||
|
||||
let file = write_file_to_blobdir(&d).await;
|
||||
let mut msg = Message::new(Viewtype::Gif);
|
||||
msg.set_file("foo.gif", None);
|
||||
msg.set_file_and_deduplicate(&d, &file, Some("foo.gif"), None)
|
||||
.unwrap();
|
||||
assert_summary_texts(&msg, ctx, "GIF").await; // file names are not added for GIFs
|
||||
|
||||
let file = write_file_to_blobdir(&d).await;
|
||||
let mut msg = Message::new(Viewtype::Gif);
|
||||
msg.set_text(some_text.to_string());
|
||||
msg.set_file("foo.gif", None);
|
||||
msg.set_file_and_deduplicate(&d, &file, Some("foo.gif"), None)
|
||||
.unwrap();
|
||||
assert_summary_texts(&msg, ctx, "GIF \u{2013} bla bla").await; // file names are not added for GIFs
|
||||
|
||||
let file = write_file_to_blobdir(&d).await;
|
||||
let mut msg = Message::new(Viewtype::Sticker);
|
||||
msg.set_file("foo.png", None);
|
||||
msg.set_file_and_deduplicate(&d, &file, Some("foo.png"), None)
|
||||
.unwrap();
|
||||
assert_summary_texts(&msg, ctx, "Sticker").await; // file names are not added for stickers
|
||||
|
||||
let file = write_file_to_blobdir(&d).await;
|
||||
let mut msg = Message::new(Viewtype::Voice);
|
||||
msg.set_file("foo.mp3", None);
|
||||
msg.set_file_and_deduplicate(&d, &file, Some("foo.mp3"), None)
|
||||
.unwrap();
|
||||
assert_summary_texts(&msg, ctx, "🎤 Voice message").await; // file names are not added for voice messages
|
||||
|
||||
let file = write_file_to_blobdir(&d).await;
|
||||
let mut msg = Message::new(Viewtype::Voice);
|
||||
msg.set_text(some_text.clone());
|
||||
msg.set_file("foo.mp3", None);
|
||||
msg.set_file_and_deduplicate(&d, &file, Some("foo.mp3"), None)
|
||||
.unwrap();
|
||||
assert_summary_texts(&msg, ctx, "🎤 bla bla").await;
|
||||
|
||||
let file = write_file_to_blobdir(&d).await;
|
||||
let mut msg = Message::new(Viewtype::Audio);
|
||||
msg.set_file("foo.mp3", None);
|
||||
msg.set_file_and_deduplicate(&d, &file, Some("foo.mp3"), None)
|
||||
.unwrap();
|
||||
assert_summary_texts(&msg, ctx, "🎵 foo.mp3").await; // file name is added for audio
|
||||
|
||||
let file = write_file_to_blobdir(&d).await;
|
||||
let mut msg = Message::new(Viewtype::Audio);
|
||||
msg.set_text(some_text.clone());
|
||||
msg.set_file("foo.mp3", None);
|
||||
msg.set_file_and_deduplicate(&d, &file, Some("foo.mp3"), None)
|
||||
.unwrap();
|
||||
assert_summary_texts(&msg, ctx, "🎵 foo.mp3 \u{2013} bla bla").await; // file name and text added for audio
|
||||
|
||||
let mut msg = Message::new(Viewtype::File);
|
||||
let bytes = include_bytes!("../test-data/webxdc/with-minimal-manifest.xdc");
|
||||
msg.set_file_from_bytes(ctx, "foo.xdc", bytes, None)
|
||||
.await
|
||||
.unwrap();
|
||||
chat_id.set_draft(ctx, Some(&mut msg)).await.unwrap();
|
||||
assert_eq!(msg.viewtype, Viewtype::Webxdc);
|
||||
@@ -369,24 +399,28 @@ mod tests {
|
||||
chat_id.set_draft(ctx, Some(&mut msg)).await.unwrap();
|
||||
assert_summary_texts(&msg, ctx, "nice app! \u{2013} bla bla").await;
|
||||
|
||||
let file = write_file_to_blobdir(&d).await;
|
||||
let mut msg = Message::new(Viewtype::File);
|
||||
msg.set_file("foo.bar", None);
|
||||
msg.set_file_and_deduplicate(&d, &file, Some("foo.bar"), None)
|
||||
.unwrap();
|
||||
assert_summary_texts(&msg, ctx, "📎 foo.bar").await; // file name is added for files
|
||||
|
||||
let file = write_file_to_blobdir(&d).await;
|
||||
let mut msg = Message::new(Viewtype::File);
|
||||
msg.set_text(some_text.clone());
|
||||
msg.set_file("foo.bar", None);
|
||||
msg.set_file_and_deduplicate(&d, &file, Some("foo.bar"), None)
|
||||
.unwrap();
|
||||
assert_summary_texts(&msg, ctx, "📎 foo.bar \u{2013} bla bla").await; // file name is added for files
|
||||
|
||||
let file = write_file_to_blobdir(&d).await;
|
||||
let mut msg = Message::new(Viewtype::VideochatInvitation);
|
||||
msg.set_text(some_text.clone());
|
||||
msg.set_file("foo.bar", None);
|
||||
msg.set_file_and_deduplicate(&d, &file, Some("foo.bar"), None)
|
||||
.unwrap();
|
||||
assert_summary_texts(&msg, ctx, "Video chat invitation").await; // text is not added for videochat invitations
|
||||
|
||||
let mut msg = Message::new(Viewtype::Vcard);
|
||||
msg.set_file_from_bytes(ctx, "foo.vcf", b"", None)
|
||||
.await
|
||||
.unwrap();
|
||||
msg.set_file_from_bytes(ctx, "foo.vcf", b"", None).unwrap();
|
||||
chat_id.set_draft(ctx, Some(&mut msg)).await.unwrap();
|
||||
// If a vCard can't be parsed, the message becomes `Viewtype::File`.
|
||||
assert_eq!(msg.viewtype, Viewtype::File);
|
||||
@@ -406,7 +440,6 @@ mod tests {
|
||||
END:VCARD",
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
chat_id.set_draft(ctx, Some(&mut msg)).await.unwrap();
|
||||
assert_eq!(msg.viewtype, Viewtype::Vcard);
|
||||
@@ -419,9 +452,11 @@ mod tests {
|
||||
assert_eq!(msg.get_summary_text(ctx).await, "Forwarded: bla bla"); // for simple text, the type is not added to the summary
|
||||
assert_eq!(msg.get_summary_text_without_prefix(ctx).await, "bla bla"); // skipping prefix used for reactions summaries
|
||||
|
||||
let file = write_file_to_blobdir(&d).await;
|
||||
let mut msg = Message::new(Viewtype::File);
|
||||
msg.set_text(some_text.clone());
|
||||
msg.set_file("foo.bar", None);
|
||||
msg.set_file_and_deduplicate(&d, &file, Some("foo.bar"), None)
|
||||
.unwrap();
|
||||
msg.param.set_int(Param::Forwarded, 1);
|
||||
assert_eq!(
|
||||
msg.get_summary_text(ctx).await,
|
||||
|
||||
Reference in New Issue
Block a user