mirror of
https://github.com/chatmail/core.git
synced 2026-04-20 15:06:30 +03:00
File deduplication (#6332)
When receiving messages, blobs will be deduplicated with the new function `create_and_deduplicate_from_bytes()`. For sending files, this adds a new function `set_file_and_deduplicate()` instead of deduplicating by default. This is for https://github.com/deltachat/deltachat-core-rust/issues/6265; read the issue description there for more details. TODO: - [x] Set files as read-only - [x] Don't do a write when the file is already identical - [x] The first 32 chars or so of the 64-character hash are enough. I calculated that if 10b people (i.e. all of humanity) use DC, and each of them has 200k distinct blob files (I have 4k in my day-to-day account), and we used 20 chars, then the expected value for the number of name collisions would be ~0.0002 (and the probability that there is a least one name collision is lower than that) [^1]. I added 12 more characters to be on the super safe side, but this wouldn't be necessary and I could also make it 20 instead of 32. - Not 100% sure whether that's necessary at all - it would mainly be necessary if we might hit a length limit on some file systems (the blobdir is usually sth like `accounts/2ff9fc096d2f46b6832b24a1ed99c0d6/dc.db-blobs` (53 chars), plus 64 chars for the filename would be 117). - [x] "touch" the files to prevent them from being deleted - [x] TODOs in the code For later PRs: - Replace `BlobObject::create(…)` with `BlobObject::create_and_deduplicate(…)` in order to deduplicate everytime core creates a file - Modify JsonRPC to deduplicate blob files - Possibly rename BlobObject.name to BlobObject.file in order to prevent confusion (because `name` usually means "user-visible-name", not "name of the file on disk"). [^1]: Calculated with both https://printfn.github.io/fend/ and https://www.geogebra.org/calculator, both of which came to the same result ([1](https://github.com/user-attachments/assets/bbb62550-3781-48b5-88b1-ba0e29c28c0d), [2](https://github.com/user-attachments/assets/82171212-b797-4117-a39f-0e132eac7252)) --------- Co-authored-by: l <link2xt@testrun.org>
This commit is contained in:
@@ -261,9 +261,6 @@ impl Context {
|
||||
/// Ensure that a file is an acceptable webxdc for sending.
|
||||
pub(crate) async fn ensure_sendable_webxdc_file(&self, path: &Path) -> Result<()> {
|
||||
let filename = path.to_str().unwrap_or_default();
|
||||
if !filename.ends_with(WEBXDC_SUFFIX) {
|
||||
bail!("{} is not a valid webxdc file", filename);
|
||||
}
|
||||
|
||||
let valid = match FsZipFileReader::new(path).await {
|
||||
Ok(archive) => {
|
||||
@@ -1047,9 +1044,9 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn create_webxdc_instance(t: &TestContext, name: &str, bytes: &[u8]) -> Result<Message> {
|
||||
fn create_webxdc_instance(t: &TestContext, name: &str, bytes: &[u8]) -> Result<Message> {
|
||||
let mut instance = Message::new(Viewtype::File);
|
||||
instance.set_file_from_bytes(t, name, bytes, None).await?;
|
||||
instance.set_file_from_bytes(t, name, bytes, None)?;
|
||||
Ok(instance)
|
||||
}
|
||||
|
||||
@@ -1058,8 +1055,7 @@ mod tests {
|
||||
t,
|
||||
"minimal.xdc",
|
||||
include_bytes!("../test-data/webxdc/minimal.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
let instance_msg_id = send_msg(t, chat_id, &mut instance).await?;
|
||||
assert_eq!(instance.viewtype, Viewtype::Webxdc);
|
||||
Message::load_from_db(t, instance_msg_id).await
|
||||
@@ -1078,9 +1074,7 @@ mod tests {
|
||||
|
||||
// sending using bad extension is not working, even when setting Viewtype to webxdc
|
||||
let mut instance = Message::new(Viewtype::Webxdc);
|
||||
instance
|
||||
.set_file_from_bytes(&t, "index.html", b"<html>ola!</html>", None)
|
||||
.await?;
|
||||
instance.set_file_from_bytes(&t, "index.html", b"<html>ola!</html>", None)?;
|
||||
assert!(send_msg(&t, chat_id, &mut instance).await.is_err());
|
||||
|
||||
Ok(())
|
||||
@@ -1096,8 +1090,7 @@ mod tests {
|
||||
&t,
|
||||
"invalid-no-zip-but-7z.xdc",
|
||||
include_bytes!("../test-data/webxdc/invalid-no-zip-but-7z.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
let instance_id = send_msg(&t, chat_id, &mut instance).await?;
|
||||
assert_eq!(instance.viewtype, Viewtype::File);
|
||||
let test = Message::load_from_db(&t, instance_id).await?;
|
||||
@@ -1105,14 +1098,12 @@ mod tests {
|
||||
|
||||
// sending invalid .xdc as Viewtype::Webxdc should fail already on sending
|
||||
let mut instance = Message::new(Viewtype::Webxdc);
|
||||
instance
|
||||
.set_file_from_bytes(
|
||||
&t,
|
||||
"invalid2.xdc",
|
||||
include_bytes!("../test-data/webxdc/invalid-no-zip-but-7z.xdc"),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
instance.set_file_from_bytes(
|
||||
&t,
|
||||
"invalid2.xdc",
|
||||
include_bytes!("../test-data/webxdc/invalid-no-zip-but-7z.xdc"),
|
||||
None,
|
||||
)?;
|
||||
assert!(send_msg(&t, chat_id, &mut instance).await.is_err());
|
||||
|
||||
Ok(())
|
||||
@@ -1128,8 +1119,7 @@ mod tests {
|
||||
&t,
|
||||
"chess.xdc",
|
||||
include_bytes!("../test-data/webxdc/chess.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
let instance_id = send_msg(&t, chat_id, &mut instance).await?;
|
||||
let instance = Message::load_from_db(&t, instance_id).await?;
|
||||
assert_eq!(instance.viewtype, Viewtype::Webxdc);
|
||||
@@ -1315,8 +1305,7 @@ mod tests {
|
||||
&alice,
|
||||
"chess.xdc",
|
||||
include_bytes!("../test-data/webxdc/chess.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
let sent1 = alice.send_msg(chat.id, &mut alice_instance).await;
|
||||
let alice_instance = sent1.load_from_db().await;
|
||||
alice
|
||||
@@ -1445,8 +1434,7 @@ mod tests {
|
||||
&t,
|
||||
"minimal.xdc",
|
||||
include_bytes!("../test-data/webxdc/minimal.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
chat_id.set_draft(&t, Some(&mut instance)).await?;
|
||||
let instance = chat_id.get_draft(&t).await?.unwrap();
|
||||
t.send_webxdc_status_update(instance.id, r#"{"payload": 42}"#)
|
||||
@@ -1882,8 +1870,7 @@ mod tests {
|
||||
&t,
|
||||
"minimal.xdc",
|
||||
include_bytes!("../test-data/webxdc/minimal.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
chat_id.set_draft(&t, Some(&mut instance)).await?;
|
||||
let (first, last) = (StatusUpdateSerial(1), StatusUpdateSerial::MAX);
|
||||
assert_eq!(
|
||||
@@ -2028,8 +2015,7 @@ mod tests {
|
||||
&alice,
|
||||
"minimal.xdc",
|
||||
include_bytes!("../test-data/webxdc/minimal.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
alice_chat_id
|
||||
.set_draft(&alice, Some(&mut alice_instance))
|
||||
.await?;
|
||||
@@ -2143,8 +2129,7 @@ mod tests {
|
||||
&t,
|
||||
"some-files.xdc",
|
||||
include_bytes!("../test-data/webxdc/some-files.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
chat_id.set_draft(&t, Some(&mut instance)).await?;
|
||||
|
||||
let buf = instance.get_webxdc_blob(&t, "index.html").await?;
|
||||
@@ -2243,8 +2228,7 @@ sth_for_the = "future""#
|
||||
&t,
|
||||
"with-min-api-1001.xdc",
|
||||
include_bytes!("../test-data/webxdc/with-min-api-1001.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
send_msg(&t, chat_id, &mut instance).await?;
|
||||
|
||||
let instance = t.get_last_msg().await;
|
||||
@@ -2270,8 +2254,7 @@ sth_for_the = "future""#
|
||||
&t,
|
||||
"with-manifest-empty-name.xdc",
|
||||
include_bytes!("../test-data/webxdc/with-manifest-empty-name.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
chat_id.set_draft(&t, Some(&mut instance)).await?;
|
||||
let info = instance.get_webxdc_info(&t).await?;
|
||||
assert_eq!(info.name, "with-manifest-empty-name.xdc");
|
||||
@@ -2281,8 +2264,7 @@ sth_for_the = "future""#
|
||||
&t,
|
||||
"with-manifest-no-name.xdc",
|
||||
include_bytes!("../test-data/webxdc/with-manifest-no-name.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
chat_id.set_draft(&t, Some(&mut instance)).await?;
|
||||
let info = instance.get_webxdc_info(&t).await?;
|
||||
assert_eq!(info.name, "with-manifest-no-name.xdc");
|
||||
@@ -2292,8 +2274,7 @@ sth_for_the = "future""#
|
||||
&t,
|
||||
"with-minimal-manifest.xdc",
|
||||
include_bytes!("../test-data/webxdc/with-minimal-manifest.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
chat_id.set_draft(&t, Some(&mut instance)).await?;
|
||||
let info = instance.get_webxdc_info(&t).await?;
|
||||
assert_eq!(info.name, "nice app!");
|
||||
@@ -2303,8 +2284,7 @@ sth_for_the = "future""#
|
||||
&t,
|
||||
"with-manifest-and-png-icon.xdc",
|
||||
include_bytes!("../test-data/webxdc/with-manifest-and-png-icon.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
chat_id.set_draft(&t, Some(&mut instance)).await?;
|
||||
let info = instance.get_webxdc_info(&t).await?;
|
||||
assert_eq!(info.name, "with some icon");
|
||||
@@ -2314,8 +2294,7 @@ sth_for_the = "future""#
|
||||
&t,
|
||||
"with-png-icon.xdc",
|
||||
include_bytes!("../test-data/webxdc/with-png-icon.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
chat_id.set_draft(&t, Some(&mut instance)).await?;
|
||||
let info = instance.get_webxdc_info(&t).await?;
|
||||
assert_eq!(info.name, "with-png-icon.xdc");
|
||||
@@ -2325,8 +2304,7 @@ sth_for_the = "future""#
|
||||
&t,
|
||||
"with-jpg-icon.xdc",
|
||||
include_bytes!("../test-data/webxdc/with-jpg-icon.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
chat_id.set_draft(&t, Some(&mut instance)).await?;
|
||||
let info = instance.get_webxdc_info(&t).await?;
|
||||
assert_eq!(info.name, "with-jpg-icon.xdc");
|
||||
@@ -2667,8 +2645,7 @@ sth_for_the = "future""#
|
||||
} else {
|
||||
include_bytes!("../test-data/webxdc/minimal.xdc")
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
let instance_id = send_msg(&t, chat_id, &mut instance).await?;
|
||||
t.send_webxdc_status_update(
|
||||
instance_id,
|
||||
@@ -2693,8 +2670,7 @@ sth_for_the = "future""#
|
||||
&t,
|
||||
"with-minimal-manifest.xdc",
|
||||
include_bytes!("../test-data/webxdc/with-minimal-manifest.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
send_msg(&t, chat_id, &mut instance).await?;
|
||||
|
||||
let chatlist = Chatlist::try_load(&t, 0, None, None).await?;
|
||||
@@ -2717,8 +2693,7 @@ sth_for_the = "future""#
|
||||
&alice,
|
||||
"minimal.xdc",
|
||||
include_bytes!("../test-data/webxdc/minimal.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
alice_instance.set_text("user added text".to_string());
|
||||
send_msg(&alice, alice_chat.id, &mut alice_instance).await?;
|
||||
let alice_instance = alice.get_last_msg().await;
|
||||
@@ -2821,8 +2796,7 @@ sth_for_the = "future""#
|
||||
&alice,
|
||||
"debug_logging.xdc",
|
||||
include_bytes!("../test-data/webxdc/minimal.xdc"),
|
||||
)
|
||||
.await?;
|
||||
)?;
|
||||
assert!(alice.debug_logging.read().unwrap().is_none());
|
||||
send_msg(&alice, chat_id, &mut instance).await?;
|
||||
assert!(alice.debug_logging.read().unwrap().is_some());
|
||||
|
||||
Reference in New Issue
Block a user