mirror of
https://github.com/chatmail/core.git
synced 2026-05-08 17:36:29 +03:00
@@ -4766,7 +4766,7 @@ void dc_msg_set_file (dc_msg_t* msg, const char* file,
|
|||||||
* otherwise it will be copied to the blobdir first.
|
* otherwise it will be copied to the blobdir first.
|
||||||
*
|
*
|
||||||
* In order to deduplicate files that contain the same data,
|
* In order to deduplicate files that contain the same data,
|
||||||
* the file will be named as a hash of the file data.
|
* the file will be named `<hash>.<extension>`, e.g. `ce940175885d7b78f7b7e9f1396611f.jpg`.
|
||||||
*
|
*
|
||||||
* NOTE:
|
* NOTE:
|
||||||
* - This function will rename the file. To get the new file path, call `get_file()`.
|
* - This function will rename the file. To get the new file path, call `get_file()`.
|
||||||
|
|||||||
80
src/blob.rs
80
src/blob.rs
@@ -150,11 +150,16 @@ impl<'a> BlobObject<'a> {
|
|||||||
/// otherwise it will be copied to the blobdir first.
|
/// otherwise it will be copied to the blobdir first.
|
||||||
///
|
///
|
||||||
/// In order to deduplicate files that contain the same data,
|
/// In order to deduplicate files that contain the same data,
|
||||||
/// the file will be named as the hash of the file data.
|
/// the file will be named `<hash>.<extension>`, e.g. `ce940175885d7b78f7b7e9f1396611f.jpg`.
|
||||||
|
/// The `original_name` param is only used to get the extension.
|
||||||
///
|
///
|
||||||
/// This is done in a in way which avoids race-conditions when multiple files are
|
/// This is done in a in way which avoids race-conditions when multiple files are
|
||||||
/// concurrently created.
|
/// concurrently created.
|
||||||
pub fn create_and_deduplicate(context: &'a Context, src: &Path) -> Result<BlobObject<'a>> {
|
pub fn create_and_deduplicate(
|
||||||
|
context: &'a Context,
|
||||||
|
src: &Path,
|
||||||
|
original_name: &str,
|
||||||
|
) -> Result<BlobObject<'a>> {
|
||||||
// `create_and_deduplicate{_from_bytes}()` do blocking I/O, but can still be called
|
// `create_and_deduplicate{_from_bytes}()` do blocking I/O, but can still be called
|
||||||
// from an async context thanks to `block_in_place()`.
|
// from an async context thanks to `block_in_place()`.
|
||||||
// Tokio's "async" I/O functions are also just thin wrappers around the blocking I/O syscalls,
|
// Tokio's "async" I/O functions are also just thin wrappers around the blocking I/O syscalls,
|
||||||
@@ -180,7 +185,25 @@ impl<'a> BlobObject<'a> {
|
|||||||
src_in_blobdir = &temp_path;
|
src_in_blobdir = &temp_path;
|
||||||
}
|
}
|
||||||
|
|
||||||
let blob = BlobObject::from_hash(blobdir, file_hash(src_in_blobdir)?);
|
let hash = file_hash(src_in_blobdir)?.to_hex();
|
||||||
|
let hash = hash.as_str();
|
||||||
|
let hash = hash.get(0..31).unwrap_or(hash);
|
||||||
|
let new_file = if let Some(extension) = Path::new(original_name)
|
||||||
|
.extension()
|
||||||
|
.filter(|e| e.len() <= 32)
|
||||||
|
{
|
||||||
|
format!(
|
||||||
|
"$BLOBDIR/{hash}.{}",
|
||||||
|
extension.to_string_lossy().to_lowercase()
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
format!("$BLOBDIR/{hash}")
|
||||||
|
};
|
||||||
|
|
||||||
|
let blob = BlobObject {
|
||||||
|
blobdir,
|
||||||
|
name: new_file,
|
||||||
|
};
|
||||||
let new_path = blob.to_abs_path();
|
let new_path = blob.to_abs_path();
|
||||||
|
|
||||||
// This will also replace an already-existing file.
|
// This will also replace an already-existing file.
|
||||||
@@ -194,7 +217,8 @@ impl<'a> BlobObject<'a> {
|
|||||||
|
|
||||||
/// Creates a new blob object with the file contents in `data`.
|
/// Creates a new blob object with the file contents in `data`.
|
||||||
/// In order to deduplicate files that contain the same data,
|
/// In order to deduplicate files that contain the same data,
|
||||||
/// the file will be renamed to a hash of the file data.
|
/// the file will be named `<hash>.<extension>`, e.g. `ce940175885d7b78f7b7e9f1396611f.jpg`.
|
||||||
|
/// The `original_name` param is only used to get the extension.
|
||||||
///
|
///
|
||||||
/// The `data` will be written into the file without race-conditions.
|
/// The `data` will be written into the file without race-conditions.
|
||||||
///
|
///
|
||||||
@@ -203,6 +227,7 @@ impl<'a> BlobObject<'a> {
|
|||||||
pub fn create_and_deduplicate_from_bytes(
|
pub fn create_and_deduplicate_from_bytes(
|
||||||
context: &'a Context,
|
context: &'a Context,
|
||||||
data: &[u8],
|
data: &[u8],
|
||||||
|
original_name: &str,
|
||||||
) -> Result<BlobObject<'a>> {
|
) -> Result<BlobObject<'a>> {
|
||||||
task::block_in_place(|| {
|
task::block_in_place(|| {
|
||||||
let blobdir = context.get_blobdir();
|
let blobdir = context.get_blobdir();
|
||||||
@@ -213,20 +238,10 @@ impl<'a> BlobObject<'a> {
|
|||||||
std::fs::write(&temp_path, data).context("writing new blobfile failed")?;
|
std::fs::write(&temp_path, data).context("writing new blobfile failed")?;
|
||||||
};
|
};
|
||||||
|
|
||||||
BlobObject::create_and_deduplicate(context, &temp_path)
|
BlobObject::create_and_deduplicate(context, &temp_path, original_name)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn from_hash(blobdir: &Path, hash: blake3::Hash) -> BlobObject<'_> {
|
|
||||||
let hash = hash.to_hex();
|
|
||||||
let hash = hash.as_str();
|
|
||||||
let hash = hash.get(0..31).unwrap_or(hash);
|
|
||||||
BlobObject {
|
|
||||||
blobdir,
|
|
||||||
name: format!("$BLOBDIR/{hash}"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Creates a blob from a file, possibly copying it to the blobdir.
|
/// Creates a blob from a file, possibly copying it to the blobdir.
|
||||||
///
|
///
|
||||||
/// If the source file is not a path to into the blob directory
|
/// If the source file is not a path to into the blob directory
|
||||||
@@ -674,7 +689,7 @@ impl<'a> BlobObject<'a> {
|
|||||||
encode_img(&img, ofmt, &mut encoded)?;
|
encode_img(&img, ofmt, &mut encoded)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.name = BlobObject::create_and_deduplicate_from_bytes(context, &encoded)
|
self.name = BlobObject::create_and_deduplicate_from_bytes(context, &encoded, &name)
|
||||||
.context("failed to write recoded blob to file")?
|
.context("failed to write recoded blob to file")?
|
||||||
.name;
|
.name;
|
||||||
}
|
}
|
||||||
@@ -905,8 +920,11 @@ mod tests {
|
|||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
async fn test_lowercase_ext() {
|
async fn test_lowercase_ext() {
|
||||||
let t = TestContext::new().await;
|
let t = TestContext::new().await;
|
||||||
let blob = BlobObject::create(&t, "foo.TXT", b"hello").await.unwrap();
|
let blob = BlobObject::create_and_deduplicate_from_bytes(&t, b"hello", "foo.TXT").unwrap();
|
||||||
assert_eq!(blob.as_name(), "$BLOBDIR/foo.txt");
|
assert!(
|
||||||
|
blob.as_name().ends_with(".txt"),
|
||||||
|
"Blob {blob:?} should end with .txt"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
@@ -980,10 +998,10 @@ mod tests {
|
|||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
async fn test_create_long_names() {
|
async fn test_create_long_names() {
|
||||||
let t = TestContext::new().await;
|
let t = TestContext::new().await;
|
||||||
let s = "1".repeat(150);
|
let s = format!("file.{}", "a".repeat(100));
|
||||||
let blob = BlobObject::create(&t, &s, b"data").await.unwrap();
|
let blob = BlobObject::create_and_deduplicate_from_bytes(&t, b"data", &s).unwrap();
|
||||||
let blobname = blob.as_name().split('/').last().unwrap();
|
let blobname = blob.as_name().split('/').last().unwrap();
|
||||||
assert!(blobname.len() < 128);
|
assert!(blobname.len() < 70);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
@@ -1618,28 +1636,28 @@ mod tests {
|
|||||||
|
|
||||||
let path = t.get_blobdir().join("anyfile.dat");
|
let path = t.get_blobdir().join("anyfile.dat");
|
||||||
fs::write(&path, b"bla").await?;
|
fs::write(&path, b"bla").await?;
|
||||||
let blob = BlobObject::create_and_deduplicate(&t, &path)?;
|
let blob = BlobObject::create_and_deduplicate(&t, &path, "anyfile.dat")?;
|
||||||
assert_eq!(blob.name, "$BLOBDIR/ce940175885d7b78f7b7e9f1396611f");
|
assert_eq!(blob.name, "$BLOBDIR/ce940175885d7b78f7b7e9f1396611f.dat");
|
||||||
assert_eq!(path.exists(), false);
|
assert_eq!(path.exists(), false);
|
||||||
|
|
||||||
assert_eq!(fs::read(&blob.to_abs_path()).await?, b"bla");
|
assert_eq!(fs::read(&blob.to_abs_path()).await?, b"bla");
|
||||||
|
|
||||||
fs::write(&path, b"bla").await?;
|
fs::write(&path, b"bla").await?;
|
||||||
let blob2 = BlobObject::create_and_deduplicate(&t, &path)?;
|
let blob2 = BlobObject::create_and_deduplicate(&t, &path, "anyfile.dat")?;
|
||||||
assert_eq!(blob2.name, blob.name);
|
assert_eq!(blob2.name, blob.name);
|
||||||
|
|
||||||
let path_outside_blobdir = t.dir.path().join("anyfile.dat");
|
let path_outside_blobdir = t.dir.path().join("anyfile.dat");
|
||||||
fs::write(&path_outside_blobdir, b"bla").await?;
|
fs::write(&path_outside_blobdir, b"bla").await?;
|
||||||
let blob3 = BlobObject::create_and_deduplicate(&t, &path_outside_blobdir)?;
|
let blob3 = BlobObject::create_and_deduplicate(&t, &path_outside_blobdir, "anyfile.dat")?;
|
||||||
assert!(path_outside_blobdir.exists());
|
assert!(path_outside_blobdir.exists());
|
||||||
assert_eq!(blob3.name, blob.name);
|
assert_eq!(blob3.name, blob.name);
|
||||||
|
|
||||||
fs::write(&path, b"blabla").await?;
|
fs::write(&path, b"blabla").await?;
|
||||||
let blob4 = BlobObject::create_and_deduplicate(&t, &path)?;
|
let blob4 = BlobObject::create_and_deduplicate(&t, &path, "anyfile.dat")?;
|
||||||
assert_ne!(blob4.name, blob.name);
|
assert_ne!(blob4.name, blob.name);
|
||||||
|
|
||||||
fs::remove_dir_all(t.get_blobdir()).await?;
|
fs::remove_dir_all(t.get_blobdir()).await?;
|
||||||
let blob5 = BlobObject::create_and_deduplicate(&t, &path_outside_blobdir)?;
|
let blob5 = BlobObject::create_and_deduplicate(&t, &path_outside_blobdir, "anyfile.dat")?;
|
||||||
assert_eq!(blob5.name, blob.name);
|
assert_eq!(blob5.name, blob.name);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -1650,7 +1668,7 @@ mod tests {
|
|||||||
let t = TestContext::new().await;
|
let t = TestContext::new().await;
|
||||||
|
|
||||||
fs::remove_dir(t.get_blobdir()).await?;
|
fs::remove_dir(t.get_blobdir()).await?;
|
||||||
let blob = BlobObject::create_and_deduplicate_from_bytes(&t, b"bla")?;
|
let blob = BlobObject::create_and_deduplicate_from_bytes(&t, b"bla", "file")?;
|
||||||
assert_eq!(blob.name, "$BLOBDIR/ce940175885d7b78f7b7e9f1396611f");
|
assert_eq!(blob.name, "$BLOBDIR/ce940175885d7b78f7b7e9f1396611f");
|
||||||
|
|
||||||
assert_eq!(fs::read(&blob.to_abs_path()).await?, b"bla");
|
assert_eq!(fs::read(&blob.to_abs_path()).await?, b"bla");
|
||||||
@@ -1662,7 +1680,7 @@ mod tests {
|
|||||||
// which we can't mock from our code.
|
// which we can't mock from our code.
|
||||||
tokio::time::sleep(Duration::from_millis(1100)).await;
|
tokio::time::sleep(Duration::from_millis(1100)).await;
|
||||||
|
|
||||||
let blob2 = BlobObject::create_and_deduplicate_from_bytes(&t, b"bla")?;
|
let blob2 = BlobObject::create_and_deduplicate_from_bytes(&t, b"bla", "file")?;
|
||||||
assert_eq!(blob2.name, blob.name);
|
assert_eq!(blob2.name, blob.name);
|
||||||
|
|
||||||
let modified2 = blob.to_abs_path().metadata()?.modified()?;
|
let modified2 = blob.to_abs_path().metadata()?.modified()?;
|
||||||
@@ -1675,7 +1693,7 @@ mod tests {
|
|||||||
sql::housekeeping(&t).await?;
|
sql::housekeeping(&t).await?;
|
||||||
assert_eq!(blob2.to_abs_path().exists(), false);
|
assert_eq!(blob2.to_abs_path().exists(), false);
|
||||||
|
|
||||||
let blob3 = BlobObject::create_and_deduplicate_from_bytes(&t, b"blabla")?;
|
let blob3 = BlobObject::create_and_deduplicate_from_bytes(&t, b"blabla", "file")?;
|
||||||
assert_ne!(blob3.name, blob.name);
|
assert_ne!(blob3.name, blob.name);
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -1683,7 +1701,7 @@ mod tests {
|
|||||||
// the correct content should be restored:
|
// the correct content should be restored:
|
||||||
fs::write(blob3.to_abs_path(), b"bloblo").await?;
|
fs::write(blob3.to_abs_path(), b"bloblo").await?;
|
||||||
|
|
||||||
let blob4 = BlobObject::create_and_deduplicate_from_bytes(&t, b"blabla")?;
|
let blob4 = BlobObject::create_and_deduplicate_from_bytes(&t, b"blabla", "file")?;
|
||||||
let blob4_content = fs::read(blob4.to_abs_path()).await?;
|
let blob4_content = fs::read(blob4.to_abs_path()).await?;
|
||||||
assert_eq!(blob4_content, b"blabla");
|
assert_eq!(blob4_content, b"blabla");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2832,7 +2832,7 @@ async fn test_blob_renaming() -> Result<()> {
|
|||||||
|
|
||||||
// the file bob receives should not contain BIDI-control characters
|
// the file bob receives should not contain BIDI-control characters
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
Some("$BLOBDIR/30c0f9c6a167fc2a91285c85be7ea34"),
|
Some("$BLOBDIR/30c0f9c6a167fc2a91285c85be7ea34.exe"),
|
||||||
msg.param.get(Param::File),
|
msg.param.get(Param::File),
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|||||||
@@ -431,7 +431,7 @@ mod tests {
|
|||||||
let path = msg.get_file(&ctx1).unwrap();
|
let path = msg.get_file(&ctx1).unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
// That's the hash of the file:
|
// That's the hash of the file:
|
||||||
path.with_file_name("ac1d2d284757656a8d41dc40aae4136"),
|
path.with_file_name("ac1d2d284757656a8d41dc40aae4136.txt"),
|
||||||
path
|
path
|
||||||
);
|
);
|
||||||
assert_eq!("hello.txt", msg.get_filename().unwrap());
|
assert_eq!("hello.txt", msg.get_filename().unwrap());
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
//! # Messages and their identifiers.
|
//! # Messages and their identifiers.
|
||||||
|
|
||||||
use std::collections::BTreeSet;
|
use std::collections::BTreeSet;
|
||||||
use std::ffi::OsStr;
|
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
@@ -1095,7 +1094,7 @@ impl Message {
|
|||||||
/// otherwise it will be copied to the blobdir first.
|
/// otherwise it will be copied to the blobdir first.
|
||||||
///
|
///
|
||||||
/// In order to deduplicate files that contain the same data,
|
/// In order to deduplicate files that contain the same data,
|
||||||
/// the file will be named as a hash of the file data.
|
/// the file will be named `<hash>.<extension>`, e.g. `ce940175885d7b78f7b7e9f1396611f.jpg`.
|
||||||
///
|
///
|
||||||
/// NOTE:
|
/// NOTE:
|
||||||
/// - This function will rename the file. To get the new file path, call `get_file()`.
|
/// - This function will rename the file. To get the new file path, call `get_file()`.
|
||||||
@@ -1107,14 +1106,18 @@ impl Message {
|
|||||||
name: Option<&str>,
|
name: Option<&str>,
|
||||||
filemime: Option<&str>,
|
filemime: Option<&str>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let blob = BlobObject::create_and_deduplicate(context, file)?;
|
let name = if let Some(name) = name {
|
||||||
if let Some(name) = name {
|
name.to_string()
|
||||||
self.param.set(Param::Filename, name);
|
|
||||||
} else {
|
} else {
|
||||||
let file_name = file.file_name().map(OsStr::to_string_lossy);
|
file.file_name()
|
||||||
self.param.set_optional(Param::Filename, file_name);
|
.map(|s| s.to_string_lossy().to_string())
|
||||||
}
|
.unwrap_or_else(|| "unknown_file".to_string())
|
||||||
|
};
|
||||||
|
|
||||||
|
let blob = BlobObject::create_and_deduplicate(context, file, &name)?;
|
||||||
self.param.set(Param::File, blob.as_name());
|
self.param.set(Param::File, blob.as_name());
|
||||||
|
|
||||||
|
self.param.set(Param::Filename, name);
|
||||||
self.param.set_optional(Param::MimeType, filemime);
|
self.param.set_optional(Param::MimeType, filemime);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -1123,7 +1126,7 @@ impl Message {
|
|||||||
/// Creates a new blob and sets it as a file associated with a message.
|
/// Creates a new blob and sets it as a file associated with a message.
|
||||||
///
|
///
|
||||||
/// In order to deduplicate files that contain the same data,
|
/// In order to deduplicate files that contain the same data,
|
||||||
/// the filename will be a hash of the file data.
|
/// the file will be named `<hash>.<extension>`, e.g. `ce940175885d7b78f7b7e9f1396611f.jpg`.
|
||||||
///
|
///
|
||||||
/// NOTE: The file must not be modified after this function was called.
|
/// NOTE: The file must not be modified after this function was called.
|
||||||
pub fn set_file_from_bytes(
|
pub fn set_file_from_bytes(
|
||||||
@@ -1133,7 +1136,7 @@ impl Message {
|
|||||||
data: &[u8],
|
data: &[u8],
|
||||||
filemime: Option<&str>,
|
filemime: Option<&str>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let blob = BlobObject::create_and_deduplicate_from_bytes(context, data)?;
|
let blob = BlobObject::create_and_deduplicate_from_bytes(context, data, name)?;
|
||||||
self.param.set(Param::Filename, name);
|
self.param.set(Param::Filename, name);
|
||||||
self.param.set(Param::File, blob.as_name());
|
self.param.set(Param::File, blob.as_name());
|
||||||
self.param.set_optional(Param::MimeType, filemime);
|
self.param.set_optional(Param::MimeType, filemime);
|
||||||
|
|||||||
@@ -1384,16 +1384,17 @@ impl MimeMessage {
|
|||||||
/* we have a regular file attachment,
|
/* we have a regular file attachment,
|
||||||
write decoded data to new blob object */
|
write decoded data to new blob object */
|
||||||
|
|
||||||
let blob = match BlobObject::create_and_deduplicate_from_bytes(context, decoded_data) {
|
let blob =
|
||||||
Ok(blob) => blob,
|
match BlobObject::create_and_deduplicate_from_bytes(context, decoded_data, filename) {
|
||||||
Err(err) => {
|
Ok(blob) => blob,
|
||||||
error!(
|
Err(err) => {
|
||||||
context,
|
error!(
|
||||||
"Could not add blob for mime part {}, error {:#}", filename, err
|
context,
|
||||||
);
|
"Could not add blob for mime part {}, error {:#}", filename, err
|
||||||
return Ok(());
|
);
|
||||||
}
|
return Ok(());
|
||||||
};
|
}
|
||||||
|
};
|
||||||
info!(context, "added blobfile: {:?}", blob.as_name());
|
info!(context, "added blobfile: {:?}", blob.as_name());
|
||||||
|
|
||||||
if mime_type.type_() == mime::IMAGE {
|
if mime_type.type_() == mime::IMAGE {
|
||||||
|
|||||||
@@ -1667,7 +1667,7 @@ async fn test_pdf_filename_simple() {
|
|||||||
assert_eq!(
|
assert_eq!(
|
||||||
file_path,
|
file_path,
|
||||||
// That's the blake3 hash of the file content:
|
// That's the blake3 hash of the file content:
|
||||||
"$BLOBDIR/24a6af459cec5d733374aeaa19a6133"
|
"$BLOBDIR/24a6af459cec5d733374aeaa19a6133.pdf"
|
||||||
);
|
);
|
||||||
assert_eq!(msg.param.get(Param::Filename).unwrap(), "simple.pdf");
|
assert_eq!(msg.param.get(Param::Filename).unwrap(), "simple.pdf");
|
||||||
}
|
}
|
||||||
@@ -3268,7 +3268,7 @@ async fn test_weird_and_duplicated_filenames() -> Result<()> {
|
|||||||
msg.save_file(t, &path2).await.unwrap();
|
msg.save_file(t, &path2).await.unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
path.file_name().unwrap().to_str().unwrap(),
|
path.file_name().unwrap().to_str().unwrap(),
|
||||||
"79402cb76f44c5761888f9036992a76",
|
"79402cb76f44c5761888f9036992a76.gz",
|
||||||
"The hash of the content should always be the same"
|
"The hash of the content should always be the same"
|
||||||
);
|
);
|
||||||
assert_eq!(fs::read_to_string(&path).await.unwrap(), content);
|
assert_eq!(fs::read_to_string(&path).await.unwrap(), content);
|
||||||
|
|||||||
Reference in New Issue
Block a user