File deduplication (#6332)

When receiving messages, blobs will be deduplicated with the new
function `create_and_deduplicate_from_bytes()`. For sending files, this
adds a new function `set_file_and_deduplicate()` instead of
deduplicating by default.

This is for
https://github.com/deltachat/deltachat-core-rust/issues/6265; read the
issue description there for more details.

TODO:
- [x] Set files as read-only
- [x] Don't do a write when the file is already identical
- [x] The first 32 chars or so of the 64-character hash are enough. I
calculated that if 10b people (i.e. all of humanity) use DC, and each of
them has 200k distinct blob files (I have 4k in my day-to-day account),
and we used 20 chars, then the expected value for the number of name
collisions would be ~0.0002 (and the probability that there is a least
one name collision is lower than that) [^1]. I added 12 more characters
to be on the super safe side, but this wouldn't be necessary and I could
also make it 20 instead of 32.
- Not 100% sure whether that's necessary at all - it would mainly be
necessary if we might hit a length limit on some file systems (the
blobdir is usually sth like
`accounts/2ff9fc096d2f46b6832b24a1ed99c0d6/dc.db-blobs` (53 chars), plus
64 chars for the filename would be 117).
- [x] "touch" the files to prevent them from being deleted
- [x] TODOs in the code

For later PRs:
- Replace `BlobObject::create(…)` with
`BlobObject::create_and_deduplicate(…)` in order to deduplicate
everytime core creates a file
- Modify JsonRPC to deduplicate blob files
- Possibly rename BlobObject.name to BlobObject.file in order to prevent
confusion (because `name` usually means "user-visible-name", not "name
of the file on disk").

[^1]: Calculated with both https://printfn.github.io/fend/ and
https://www.geogebra.org/calculator, both of which came to the same
result
([1](https://github.com/user-attachments/assets/bbb62550-3781-48b5-88b1-ba0e29c28c0d),

[2](https://github.com/user-attachments/assets/82171212-b797-4117-a39f-0e132eac7252))

---------

Co-authored-by: l <link2xt@testrun.org>
This commit is contained in:
Hocuri
2025-01-21 19:42:19 +01:00
committed by GitHub
parent 22a7cfe9c3
commit 65a9c4b79b
23 changed files with 583 additions and 240 deletions

9
Cargo.lock generated
View File

@@ -571,9 +571,9 @@ dependencies = [
[[package]]
name = "blake3"
version = "1.5.4"
version = "1.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7"
checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e"
dependencies = [
"arrayref",
"arrayvec",
@@ -984,9 +984,9 @@ dependencies = [
[[package]]
name = "constant_time_eq"
version = "0.3.0"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2"
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
[[package]]
name = "convert_case"
@@ -1316,6 +1316,7 @@ dependencies = [
"async-smtp",
"async_zip",
"base64 0.22.1",
"blake3",
"brotli",
"bytes",
"chrono",

View File

@@ -110,6 +110,7 @@ toml = "0.8"
url = "2"
uuid = { version = "1", features = ["serde", "v4"] }
webpki-roots = "0.26.7"
blake3 = "1.5.5"
[dev-dependencies]
anyhow = { workspace = true, features = ["backtrace"] } # Enable `backtrace` feature in tests.

View File

@@ -4756,6 +4756,31 @@ void dc_msg_set_override_sender_name(dc_msg_t* msg, const char* name)
void dc_msg_set_file (dc_msg_t* msg, const char* file, const char* filemime);
/**
* Sets the file associated with a message.
*
* If `name` is non-null, it is used as the file name
* and the actual current name of the file is ignored.
*
* If the source file is already in the blobdir, it will be renamed,
* otherwise it will be copied to the blobdir first.
*
* In order to deduplicate files that contain the same data,
* the file will be named as a hash of the file data.
*
* NOTE:
* - This function will rename the file. To get the new file path, call `get_file()`.
* - The file must not be modified after this function was called.
*
* @memberof dc_msg_t
* @param msg The message object. Must not be NULL.
* @param file The path of the file to attach. Must not be NULL.
* @param name The original filename of the attachment. If NULL, the current name of `file` will be used instead.
* @param filemime The MIME type of the file. NULL if you don't know or don't care.
*/
void dc_msg_set_file_and_deduplicate(dc_msg_t* msg, const char* file, const char* name, const char* filemime);
/**
* Set the dimensions associated with message object.
* Typically this is the width and the height of an image or video associated using dc_msg_set_file().

View File

@@ -3835,6 +3835,33 @@ pub unsafe extern "C" fn dc_msg_set_file(
)
}
#[no_mangle]
pub unsafe extern "C" fn dc_msg_set_file_and_deduplicate(
msg: *mut dc_msg_t,
file: *const libc::c_char,
name: *const libc::c_char,
filemime: *const libc::c_char,
) {
if msg.is_null() || file.is_null() {
eprintln!("ignoring careless call to dc_msg_set_file_and_deduplicate()");
return;
}
let ffi_msg = &mut *msg;
let ctx = &*ffi_msg.context;
ffi_msg
.message
.set_file_and_deduplicate(
ctx,
as_path(file),
to_opt_string_lossy(name).as_deref(),
to_opt_string_lossy(filemime).as_deref(),
)
.context("Failed to set file")
.log_err(&*ffi_msg.context)
.ok();
}
#[no_mangle]
pub unsafe extern "C" fn dc_msg_set_dimension(
msg: *mut dc_msg_t,

View File

@@ -108,7 +108,9 @@ class Message:
@props.with_doc
def filename(self):
"""filename if there was an attachment, otherwise empty string."""
"""file path if there was an attachment, otherwise empty string.
If you want to get the file extension or a user-visible string,
use `basename` instead."""
return from_dc_charpointer(lib.dc_msg_get_file(self._dc_msg))
def set_file(self, path, mime_type=None):
@@ -120,7 +122,8 @@ class Message:
@props.with_doc
def basename(self) -> str:
"""basename of the attachment if it exists, otherwise empty string."""
"""The user-visible name of the attachment (incl. extension)
if it exists, otherwise empty string."""
# FIXME, it does not return basename
return from_dc_charpointer(lib.dc_msg_get_filename(self._dc_msg))

View File

@@ -181,15 +181,16 @@ def test_send_file_twice_unicode_filename_mangling(tmp_path, acfactory, lp):
msg = send_and_receive_message()
assert msg.text == "withfile"
assert open(msg.filename).read() == "some data"
msg.filename.index(basename)
assert msg.filename.endswith(ext)
msg.basename.index(basename)
assert msg.basename.endswith(ext)
msg2 = send_and_receive_message()
assert msg2.text == "withfile"
assert open(msg2.filename).read() == "some data"
msg2.filename.index(basename)
assert msg2.filename.endswith(ext)
assert msg.filename != msg2.filename
msg2.basename.index(basename)
assert msg2.basename.endswith(ext)
assert msg.filename == msg2.filename # The file is deduplicated
assert msg.basename == msg2.basename
def test_send_file_html_attachment(tmp_path, acfactory, lp):
@@ -214,8 +215,8 @@ def test_send_file_html_attachment(tmp_path, acfactory, lp):
msg = ac2.get_message_by_id(ev.data2)
assert open(msg.filename).read() == content
msg.filename.index(basename)
assert msg.filename.endswith(ext)
msg.basename.index(basename)
assert msg.basename.endswith(ext)
def test_html_message(acfactory, lp):

View File

@@ -16,7 +16,7 @@ use image::ImageReader;
use image::{DynamicImage, GenericImage, GenericImageView, ImageFormat, Pixel, Rgba};
use num_traits::FromPrimitive;
use tokio::io::AsyncWriteExt;
use tokio::{fs, io};
use tokio::{fs, io, task};
use tokio_stream::wrappers::ReadDirStream;
use crate::config::Config;
@@ -34,6 +34,10 @@ use crate::log::LogExt;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BlobObject<'a> {
blobdir: &'a Path,
/// The name of the file on the disc.
/// Note that this is NOT the user-visible filename,
/// which is only stored in Param::Filename on the message.
name: String,
}
@@ -74,7 +78,7 @@ impl<'a> BlobObject<'a> {
Ok(blob)
}
// Creates a new file, returning a tuple of the name and the handle.
/// Creates a new file, returning a tuple of the name and the handle.
async fn create_new_file(
context: &Context,
dir: &Path,
@@ -88,6 +92,8 @@ impl<'a> BlobObject<'a> {
attempt += 1;
let path = dir.join(&name);
match fs::OpenOptions::new()
// Using `create_new(true)` in order to avoid race conditions
// when creating multiple files with the same name.
.create_new(true)
.write(true)
.open(&path)
@@ -139,6 +145,88 @@ impl<'a> BlobObject<'a> {
Ok(blob)
}
/// Creates a blob object by copying or renaming an existing file.
/// If the source file is already in the blobdir, it will be renamed,
/// otherwise it will be copied to the blobdir first.
///
/// In order to deduplicate files that contain the same data,
/// the file will be named as the hash of the file data.
///
/// This is done in a in way which avoids race-conditions when multiple files are
/// concurrently created.
pub fn create_and_deduplicate(context: &'a Context, src: &Path) -> Result<BlobObject<'a>> {
// `create_and_deduplicate{_from_bytes}()` do blocking I/O, but can still be called
// from an async context thanks to `block_in_place()`.
// Tokio's "async" I/O functions are also just thin wrappers around the blocking I/O syscalls,
// so we are doing essentially the same here.
task::block_in_place(|| {
let temp_path;
let src_in_blobdir: &Path;
let blobdir = context.get_blobdir();
if src.starts_with(blobdir) || src.starts_with("$BLOBDIR/") {
src_in_blobdir = src;
} else {
info!(
context,
"Source file not in blobdir. Copying instead of moving in order to prevent moving a file that was still needed."
);
temp_path = blobdir.join(format!("tmp-{}", rand::random::<u64>()));
if std::fs::copy(src, &temp_path).is_err() {
// Maybe the blobdir didn't exist
std::fs::create_dir_all(blobdir).log_err(context).ok();
std::fs::copy(src, &temp_path).context("Copying new blobfile failed")?;
};
src_in_blobdir = &temp_path;
}
let blob = BlobObject::from_hash(blobdir, file_hash(src_in_blobdir)?);
let new_path = blob.to_abs_path();
// This will also replace an already-existing file.
// Renaming is atomic, so this will avoid race conditions.
std::fs::rename(src_in_blobdir, &new_path)?;
context.emit_event(EventType::NewBlobFile(blob.as_name().to_string()));
Ok(blob)
})
}
/// Creates a new blob object with the file contents in `data`.
/// In order to deduplicate files that contain the same data,
/// the file will be renamed to a hash of the file data.
///
/// The `data` will be written into the file without race-conditions.
///
/// This function does blocking I/O, but it can still be called from an async context
/// because `block_in_place()` is used to leave the async runtime if necessary.
pub fn create_and_deduplicate_from_bytes(
context: &'a Context,
data: &[u8],
) -> Result<BlobObject<'a>> {
task::block_in_place(|| {
let blobdir = context.get_blobdir();
let temp_path = blobdir.join(format!("tmp-{}", rand::random::<u64>()));
if std::fs::write(&temp_path, data).is_err() {
// Maybe the blobdir didn't exist
std::fs::create_dir_all(blobdir).log_err(context).ok();
std::fs::write(&temp_path, data).context("writing new blobfile failed")?;
};
BlobObject::create_and_deduplicate(context, &temp_path)
})
}
fn from_hash(blobdir: &Path, hash: blake3::Hash) -> BlobObject<'_> {
let hash = hash.to_hex();
let hash = hash.as_str();
let hash = hash.get(0..31).unwrap_or(hash);
BlobObject {
blobdir,
name: format!("$BLOBDIR/{hash}"),
}
}
/// Creates a blob from a file, possibly copying it to the blobdir.
///
/// If the source file is not a path to into the blob directory
@@ -210,6 +298,9 @@ impl<'a> BlobObject<'a> {
/// this string in the database or [Params]. Eventually even
/// those conversions should be handled by the type system.
///
/// Note that this is NOT the user-visible filename,
/// which is only stored in Param::Filename on the message.
///
/// [Params]: crate::param::Params
pub fn as_name(&self) -> &str {
&self.name
@@ -356,8 +447,6 @@ impl<'a> BlobObject<'a> {
}
pub async fn recode_to_avatar_size(&mut self, context: &Context) -> Result<()> {
let blob_abs = self.to_abs_path();
let img_wh =
match MediaQuality::from_i32(context.get_config_int(Config::MediaQuality).await?)
.unwrap_or_default()
@@ -370,16 +459,15 @@ impl<'a> BlobObject<'a> {
let strict_limits = true;
// max_bytes is 20_000 bytes: Outlook servers don't allow headers larger than 32k.
// 32 / 4 * 3 = 24k if you account for base64 encoding. To be safe, we reduced this to 20k.
if let Some(new_name) = self.recode_to_size(
self.recode_to_size(
context,
blob_abs,
"".to_string(), // The name of an avatar doesn't matter
maybe_sticker,
img_wh,
20_000,
strict_limits,
)? {
self.name = new_name;
}
)?;
Ok(())
}
@@ -393,9 +481,9 @@ impl<'a> BlobObject<'a> {
pub async fn recode_to_image_size(
&mut self,
context: &Context,
name: String,
maybe_sticker: &mut bool,
) -> Result<()> {
let blob_abs = self.to_abs_path();
) -> Result<String> {
let (img_wh, max_bytes) =
match MediaQuality::from_i32(context.get_config_int(Config::MediaQuality).await?)
.unwrap_or_default()
@@ -407,35 +495,43 @@ impl<'a> BlobObject<'a> {
MediaQuality::Worse => (constants::WORSE_IMAGE_SIZE, constants::WORSE_IMAGE_BYTES),
};
let strict_limits = false;
if let Some(new_name) = self.recode_to_size(
let new_name = self.recode_to_size(
context,
blob_abs,
name,
maybe_sticker,
img_wh,
max_bytes,
strict_limits,
)? {
self.name = new_name;
}
Ok(())
)?;
Ok(new_name)
}
/// If `!strict_limits`, then if `max_bytes` is exceeded, reduce the image to `img_wh` and just
/// proceed with the result.
///
/// This modifies the blob object in-place.
///
/// Additionally, if you pass the user-visible filename as `name`
/// then the updated user-visible filename will be returned;
/// this may be necessary because the format may be changed to JPG,
/// i.e. "image.png" -> "image.jpg".
/// Pass an empty string if you don't care.
fn recode_to_size(
&mut self,
context: &Context,
mut blob_abs: PathBuf,
mut name: String,
maybe_sticker: &mut bool,
mut img_wh: u32,
max_bytes: usize,
strict_limits: bool,
) -> Result<Option<String>> {
) -> Result<String> {
// Add white background only to avatars to spare the CPU.
let mut add_white_bg = img_wh <= constants::BALANCED_AVATAR_SIZE;
let mut no_exif = false;
let no_exif_ref = &mut no_exif;
let res = tokio::task::block_in_place(move || {
let original_name = name.clone();
let res: Result<String> = tokio::task::block_in_place(move || {
let mut file = std::fs::File::open(self.to_abs_path())?;
let (nr_bytes, exif) = image_metadata(&file)?;
*no_exif_ref = exif.is_none();
@@ -449,7 +545,7 @@ impl<'a> BlobObject<'a> {
file.rewind()?;
ImageReader::with_format(
std::io::BufReader::new(&file),
ImageFormat::from_path(&blob_abs)?,
ImageFormat::from_path(self.to_abs_path())?,
)
}
};
@@ -457,7 +553,6 @@ impl<'a> BlobObject<'a> {
let mut img = imgreader.decode().context("image decode failure")?;
let orientation = exif.as_ref().map(|exif| exif_orientation(exif, context));
let mut encoded = Vec::new();
let mut changed_name = None;
if *maybe_sticker {
let x_max = img.width().saturating_sub(1);
@@ -469,7 +564,7 @@ impl<'a> BlobObject<'a> {
|| img.get_pixel(x_max, y_max).0[3] == 0);
}
if *maybe_sticker && exif.is_none() {
return Ok(None);
return Ok(name);
}
img = match orientation {
@@ -566,10 +661,10 @@ impl<'a> BlobObject<'a> {
if !matches!(fmt, ImageFormat::Jpeg)
&& matches!(ofmt, ImageOutputFormat::Jpeg { .. })
{
blob_abs = blob_abs.with_extension("jpg");
let file_name = blob_abs.file_name().context("No image file name (???)")?;
let file_name = file_name.to_str().context("Filename is no UTF-8 (???)")?;
changed_name = Some(format!("$BLOBDIR/{file_name}"));
name = Path::new(&name)
.with_extension("jpg")
.to_string_lossy()
.into_owned();
}
if encoded.is_empty() {
@@ -579,11 +674,12 @@ impl<'a> BlobObject<'a> {
encode_img(&img, ofmt, &mut encoded)?;
}
std::fs::write(&blob_abs, &encoded)
.context("failed to write recoded blob to file")?;
self.name = BlobObject::create_and_deduplicate_from_bytes(context, &encoded)
.context("failed to write recoded blob to file")?
.name;
}
Ok(changed_name)
Ok(name)
});
match res {
Ok(_) => res,
@@ -593,7 +689,7 @@ impl<'a> BlobObject<'a> {
context,
"Cannot recode image, using original data: {err:#}.",
);
Ok(None)
Ok(original_name)
} else {
Err(err)
}
@@ -602,6 +698,17 @@ impl<'a> BlobObject<'a> {
}
}
fn file_hash(src: &Path) -> Result<blake3::Hash> {
let mut hasher = blake3::Hasher::new();
let mut src_file = std::fs::File::open(src)
.with_context(|| format!("Failed to open file {}", src.display()))?;
hasher
.update_reader(&mut src_file)
.context("update_reader")?;
let hash = hasher.finalize();
Ok(hash)
}
/// Returns image file size and Exif.
pub fn image_metadata(file: &std::fs::File) -> Result<(u64, Option<exif::Exif>)> {
let len = file.metadata()?.len();
@@ -762,15 +869,22 @@ fn add_white_bg(img: &mut DynamicImage) {
#[cfg(test)]
mod tests {
use fs::File;
use std::time::Duration;
use super::*;
use crate::message::{Message, Viewtype};
use crate::sql;
use crate::test_utils::{self, TestContext};
use crate::tools::SystemTime;
fn check_image_size(path: impl AsRef<Path>, width: u32, height: u32) -> image::DynamicImage {
tokio::task::block_in_place(move || {
let img = image::open(path).expect("failed to open image");
let img = ImageReader::open(path)
.expect("failed to open image")
.with_guessed_format()
.expect("failed to guess format")
.decode()
.expect("failed to decode image");
assert_eq!(img.width(), width, "invalid width");
assert_eq!(img.height(), height, "invalid height");
img
@@ -1008,7 +1122,7 @@ mod tests {
let strict_limits = true;
blob.recode_to_size(
&t,
blob.to_abs_path(),
"avatar.png".to_string(),
maybe_sticker,
img_wh,
20_000,
@@ -1016,7 +1130,12 @@ mod tests {
)
.unwrap();
tokio::task::block_in_place(move || {
let img = image::open(blob.to_abs_path()).unwrap();
let img = ImageReader::open(blob.to_abs_path())
.unwrap()
.with_guessed_format()
.unwrap()
.decode()
.unwrap();
assert!(img.width() == img_wh);
assert!(img.height() == img_wh);
assert_eq!(img.get_pixel(0, 0), Rgba(color));
@@ -1026,19 +1145,25 @@ mod tests {
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_selfavatar_outside_blobdir() {
async fn file_size(path_buf: &Path) -> u64 {
fs::metadata(path_buf).await.unwrap().len()
}
let t = TestContext::new().await;
let avatar_src = t.dir.path().join("avatar.jpg");
let avatar_bytes = include_bytes!("../test-data/image/avatar1000x1000.jpg");
fs::write(&avatar_src, avatar_bytes).await.unwrap();
let avatar_blob = t.get_blobdir().join("avatar.jpg");
assert!(!avatar_blob.exists());
t.set_config(Config::Selfavatar, Some(avatar_src.to_str().unwrap()))
.await
.unwrap();
assert!(avatar_blob.exists());
assert!(fs::metadata(&avatar_blob).await.unwrap().len() < avatar_bytes.len() as u64);
let avatar_cfg = t.get_config(Config::Selfavatar).await.unwrap();
assert_eq!(avatar_cfg, avatar_blob.to_str().map(|s| s.to_string()));
let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap();
let avatar_path = Path::new(&avatar_blob);
assert!(
avatar_blob.ends_with("d98cd30ed8f2129bf3968420208849d"),
"The avatar filename should be its hash, put instead it's {avatar_blob}"
);
let scaled_avatar_size = file_size(avatar_path).await;
assert!(scaled_avatar_size < avatar_bytes.len() as u64);
check_image_size(avatar_src, 1000, 1000);
check_image_size(
@@ -1047,27 +1172,32 @@ mod tests {
constants::BALANCED_AVATAR_SIZE,
);
async fn file_size(path_buf: &Path) -> u64 {
let file = File::open(path_buf).await.unwrap();
file.metadata().await.unwrap().len()
}
let mut blob = BlobObject::new_from_path(&t, &avatar_blob).await.unwrap();
let mut blob = BlobObject::new_from_path(&t, avatar_path).await.unwrap();
let maybe_sticker = &mut false;
let strict_limits = true;
blob.recode_to_size(
&t,
blob.to_abs_path(),
"avatar.jpg".to_string(),
maybe_sticker,
1000,
3000,
strict_limits,
)
.unwrap();
assert!(file_size(&avatar_blob).await <= 3000);
assert!(file_size(&avatar_blob).await > 2000);
let new_file_size = file_size(&blob.to_abs_path()).await;
assert!(new_file_size <= 3000);
assert!(new_file_size > 2000);
// The new file should be smaller:
assert!(new_file_size < scaled_avatar_size);
// And the original file should not be touched:
assert_eq!(file_size(avatar_path).await, scaled_avatar_size);
tokio::task::block_in_place(move || {
let img = image::open(avatar_blob).unwrap();
let img = ImageReader::open(blob.to_abs_path())
.unwrap()
.with_guessed_format()
.unwrap()
.decode()
.unwrap();
assert!(img.width() > 130);
assert_eq!(img.width(), img.height());
});
@@ -1087,9 +1217,9 @@ mod tests {
.await
.unwrap();
let avatar_cfg = t.get_config(Config::Selfavatar).await.unwrap().unwrap();
assert_eq!(
avatar_cfg,
avatar_src.with_extension("png").to_str().unwrap()
assert!(
avatar_cfg.ends_with("9e7f409ac5c92b942cc4f31cee2770a"),
"Avatar file name {avatar_cfg} should end with its hash"
);
check_image_size(
@@ -1373,6 +1503,7 @@ mod tests {
.set_config(Config::MediaQuality, Some(media_quality_config))
.await?;
let file = alice.get_blobdir().join("file").with_extension(extension);
let file_name = format!("file.{extension}");
fs::write(&file, &bytes)
.await
@@ -1388,7 +1519,7 @@ mod tests {
}
let mut msg = Message::new(viewtype);
msg.set_file(file.to_str().unwrap(), None);
msg.set_file_and_deduplicate(&alice, &file, Some(&file_name), None)?;
let chat = alice.create_chat(&bob).await;
if set_draft {
chat.id.set_draft(&alice, Some(&mut msg)).await.unwrap();
@@ -1444,7 +1575,7 @@ mod tests {
.await
.context("failed to write file")?;
let mut msg = Message::new(Viewtype::Image);
msg.set_file(file.to_str().unwrap(), None);
msg.set_file_and_deduplicate(&alice, &file, Some("file.gif"), None)?;
let chat = alice.create_chat(&bob).await;
let sent = alice.send_msg(chat.id, &mut msg).await;
let bob_msg = bob.recv_msg(&sent).await;
@@ -1480,4 +1611,83 @@ mod tests {
assert_eq!(msg.get_viewtype(), Viewtype::Sticker);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_create_and_deduplicate() -> Result<()> {
let t = TestContext::new().await;
let path = t.get_blobdir().join("anyfile.dat");
fs::write(&path, b"bla").await?;
let blob = BlobObject::create_and_deduplicate(&t, &path)?;
assert_eq!(blob.name, "$BLOBDIR/ce940175885d7b78f7b7e9f1396611f");
assert_eq!(path.exists(), false);
assert_eq!(fs::read(&blob.to_abs_path()).await?, b"bla");
fs::write(&path, b"bla").await?;
let blob2 = BlobObject::create_and_deduplicate(&t, &path)?;
assert_eq!(blob2.name, blob.name);
let path_outside_blobdir = t.dir.path().join("anyfile.dat");
fs::write(&path_outside_blobdir, b"bla").await?;
let blob3 = BlobObject::create_and_deduplicate(&t, &path_outside_blobdir)?;
assert!(path_outside_blobdir.exists());
assert_eq!(blob3.name, blob.name);
fs::write(&path, b"blabla").await?;
let blob4 = BlobObject::create_and_deduplicate(&t, &path)?;
assert_ne!(blob4.name, blob.name);
fs::remove_dir_all(t.get_blobdir()).await?;
let blob5 = BlobObject::create_and_deduplicate(&t, &path_outside_blobdir)?;
assert_eq!(blob5.name, blob.name);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_create_and_deduplicate_from_bytes() -> Result<()> {
let t = TestContext::new().await;
fs::remove_dir(t.get_blobdir()).await?;
let blob = BlobObject::create_and_deduplicate_from_bytes(&t, b"bla")?;
assert_eq!(blob.name, "$BLOBDIR/ce940175885d7b78f7b7e9f1396611f");
assert_eq!(fs::read(&blob.to_abs_path()).await?, b"bla");
let modified1 = blob.to_abs_path().metadata()?.modified()?;
// Test that the modification time of the file is updated when a new file is created
// so that it's not deleted during housekeeping.
// We can't use SystemTime::shift() here because file creation uses the actual OS time,
// which we can't mock from our code.
tokio::time::sleep(Duration::from_millis(1100)).await;
let blob2 = BlobObject::create_and_deduplicate_from_bytes(&t, b"bla")?;
assert_eq!(blob2.name, blob.name);
let modified2 = blob.to_abs_path().metadata()?.modified()?;
assert_ne!(modified1, modified2);
sql::housekeeping(&t).await?;
assert!(blob2.to_abs_path().exists());
// If we do shift the time by more than 1h, the blob file will be deleted during housekeeping:
SystemTime::shift(Duration::from_secs(65 * 60));
sql::housekeeping(&t).await?;
assert_eq!(blob2.to_abs_path().exists(), false);
let blob3 = BlobObject::create_and_deduplicate_from_bytes(&t, b"blabla")?;
assert_ne!(blob3.name, blob.name);
{
// If something goes wrong and the blob file is overwritten,
// the correct content should be restored:
fs::write(blob3.to_abs_path(), b"bloblo").await?;
let blob4 = BlobObject::create_and_deduplicate_from_bytes(&t, b"blabla")?;
let blob4_content = fs::read(blob4.to_abs_path()).await?;
assert_eq!(blob4_content, b"blabla");
}
Ok(())
}
}

View File

@@ -896,13 +896,12 @@ impl ChatId {
.context("no file stored in params")?;
msg.param.set(Param::File, blob.as_name());
if msg.viewtype == Viewtype::File {
if let Some((better_type, _)) =
message::guess_msgtype_from_suffix(&blob.to_abs_path())
// We do not do an automatic conversion to other viewtypes here so that
// users can send images as "files" to preserve the original quality
// (usually we compress images). The remaining conversions are done by
// `prepare_msg_blob()` later.
.filter(|&(vt, _)| vt == Viewtype::Webxdc || vt == Viewtype::Vcard)
if let Some((better_type, _)) = message::guess_msgtype_from_suffix(msg)
// We do not do an automatic conversion to other viewtypes here so that
// users can send images as "files" to preserve the original quality
// (usually we compress images). The remaining conversions are done by
// `prepare_msg_blob()` later.
.filter(|&(vt, _)| vt == Viewtype::Webxdc || vt == Viewtype::Vcard)
{
msg.viewtype = better_type;
}
@@ -2722,8 +2721,7 @@ async fn prepare_msg_blob(context: &Context, msg: &mut Message) -> Result<()> {
// Typical conversions:
// - from FILE to AUDIO/VIDEO/IMAGE
// - from FILE/IMAGE to GIF */
if let Some((better_type, _)) = message::guess_msgtype_from_suffix(&blob.to_abs_path())
{
if let Some((better_type, _)) = message::guess_msgtype_from_suffix(msg) {
if msg.viewtype == Viewtype::Sticker {
if better_type != Viewtype::Image {
// UIs don't want conversions of `Sticker` to anything other than `Image`.
@@ -2753,8 +2751,14 @@ async fn prepare_msg_blob(context: &Context, msg: &mut Message) -> Result<()> {
&& (msg.viewtype == Viewtype::Image
|| maybe_sticker && !msg.param.exists(Param::ForceSticker))
{
blob.recode_to_image_size(context, &mut maybe_sticker)
let new_name = blob
.recode_to_image_size(
context,
msg.get_filename().unwrap_or_else(|| "file".to_string()),
&mut maybe_sticker,
)
.await?;
msg.param.set(Param::Filename, new_name);
if !maybe_sticker {
msg.viewtype = Viewtype::Image;
@@ -2771,7 +2775,7 @@ async fn prepare_msg_blob(context: &Context, msg: &mut Message) -> Result<()> {
}
if !msg.param.exists(Param::MimeType) {
if let Some((_, mime)) = message::guess_msgtype_from_suffix(&blob.to_abs_path()) {
if let Some((_, mime)) = message::guess_msgtype_from_suffix(msg) {
msg.param.set(Param::MimeType, mime);
}
}

View File

@@ -1825,7 +1825,7 @@ async fn test_sticker(
tokio::fs::write(&file, bytes).await?;
let mut msg = Message::new(Viewtype::Sticker);
msg.set_file(file.to_str().unwrap(), None);
msg.set_file_and_deduplicate(&alice, &file, Some(filename), None)?;
let sent_msg = alice.send_msg(alice_chat.id, &mut msg).await;
let mime = sent_msg.payload();
@@ -1889,14 +1889,17 @@ async fn test_sticker_jpeg_force() {
// Images without force_sticker should be turned into [Viewtype::Image]
let mut msg = Message::new(Viewtype::Sticker);
msg.set_file(file.to_str().unwrap(), None);
msg.set_file_and_deduplicate(&alice, &file, Some("sticker.jpg"), None)
.unwrap();
let file = msg.get_file(&alice).unwrap();
let sent_msg = alice.send_msg(alice_chat.id, &mut msg).await;
let msg = bob.recv_msg(&sent_msg).await;
assert_eq!(msg.get_viewtype(), Viewtype::Image);
// Images with `force_sticker = true` should keep [Viewtype::Sticker]
let mut msg = Message::new(Viewtype::Sticker);
msg.set_file(file.to_str().unwrap(), None);
msg.set_file_and_deduplicate(&alice, &file, Some("sticker.jpg"), None)
.unwrap();
msg.force_sticker();
let sent_msg = alice.send_msg(alice_chat.id, &mut msg).await;
let msg = bob.recv_msg(&sent_msg).await;
@@ -1905,7 +1908,8 @@ async fn test_sticker_jpeg_force() {
// Images with `force_sticker = true` should keep [Viewtype::Sticker]
// even on drafted messages
let mut msg = Message::new(Viewtype::Sticker);
msg.set_file(file.to_str().unwrap(), None);
msg.set_file_and_deduplicate(&alice, &file, Some("sticker.jpg"), None)
.unwrap();
msg.force_sticker();
alice_chat
.id
@@ -1944,7 +1948,7 @@ async fn test_sticker_forward() -> Result<()> {
let file = alice.get_blobdir().join(file_name);
tokio::fs::write(&file, bytes).await?;
let mut msg = Message::new(Viewtype::Sticker);
msg.set_file(file.to_str().unwrap(), None);
msg.set_file_and_deduplicate(&alice, &file, Some("sticker.jpg"), None)?;
// send sticker to bob
let sent_msg = alice.send_msg(alice_chat.get_id(), &mut msg).await;
@@ -2669,7 +2673,7 @@ async fn test_get_chat_media() -> Result<()> {
let file = t.get_blobdir().join(name);
tokio::fs::write(&file, bytes).await?;
let mut msg = Message::new(msg_type);
msg.set_file(file.to_str().unwrap(), None);
msg.set_file_and_deduplicate(t, &file, Some(name), None)?;
send_msg(t, chat_id, &mut msg).await
}
@@ -2820,18 +2824,21 @@ async fn test_blob_renaming() -> Result<()> {
Contact::create(&alice, "bob", "bob@example.net").await?,
)
.await?;
let dir = tempfile::tempdir()?;
let file = dir.path().join("harmless_file.\u{202e}txt.exe");
let file = alice.get_blobdir().join("harmless_file.\u{202e}txt.exe");
fs::write(&file, "aaa").await?;
let mut msg = Message::new(Viewtype::File);
msg.set_file(file.to_str().unwrap(), None);
msg.set_file_and_deduplicate(&alice, &file, Some("harmless_file.\u{202e}txt.exe"), None)?;
let msg = bob.recv_msg(&alice.send_msg(chat_id, &mut msg).await).await;
// the file bob receives should not contain BIDI-control characters
assert_eq!(
Some("$BLOBDIR/harmless_file.txt.exe"),
Some("$BLOBDIR/30c0f9c6a167fc2a91285c85be7ea34"),
msg.param.get(Param::File),
);
assert_eq!(
Some("harmless_file.txt.exe"),
msg.param.get(Param::Filename),
);
Ok(())
}
@@ -3156,7 +3163,7 @@ async fn test_jpeg_with_png_ext() -> Result<()> {
let file = alice.get_blobdir().join("screenshot.png");
tokio::fs::write(&file, bytes).await?;
let mut msg = Message::new(Viewtype::Image);
msg.set_file(file.to_str().unwrap(), None);
msg.set_file_and_deduplicate(&alice, &file, Some("screenshot.png"), None)?;
let alice_chat = alice.create_chat(&bob).await;
let sent_msg = alice.send_msg(alice_chat.get_id(), &mut msg).await;

View File

@@ -100,7 +100,9 @@ pub async fn maybe_set_logging_xdc(
context,
msg.get_viewtype(),
chat_id,
msg.param.get_path(Param::File, context).unwrap_or_default(),
msg.param
.get_path(Param::Filename, context)
.unwrap_or_default(),
msg.get_id(),
)
.await?;
@@ -113,11 +115,11 @@ pub async fn maybe_set_logging_xdc_inner(
context: &Context,
viewtype: Viewtype,
chat_id: ChatId,
file: Option<PathBuf>,
filename: Option<PathBuf>,
msg_id: MsgId,
) -> anyhow::Result<()> {
if viewtype == Viewtype::Webxdc {
if let Some(file) = file {
if let Some(file) = filename {
if let Some(file_name) = file.file_name().and_then(|name| name.to_str()) {
if file_name.starts_with("debug_logging")
&& file_name.ends_with(".xdc")

View File

@@ -440,7 +440,7 @@ mod tests {
let file = alice.get_blobdir().join("minimal.xdc");
tokio::fs::write(&file, include_bytes!("../test-data/webxdc/minimal.xdc")).await?;
let mut instance = Message::new(Viewtype::File);
instance.set_file(file.to_str().unwrap(), None);
instance.set_file_and_deduplicate(&alice, &file, None, None)?;
let _sent1 = alice.send_msg(chat_id, &mut instance).await;
alice

View File

@@ -394,7 +394,8 @@ mod tests {
let file = ctx0.get_blobdir().join("hello.txt");
fs::write(&file, "i am attachment").await.unwrap();
let mut msg = Message::new(Viewtype::File);
msg.set_file(file.to_str().unwrap(), Some("text/plain"));
msg.set_file_and_deduplicate(&ctx0, &file, Some("hello.txt"), Some("text/plain"))
.unwrap();
send_msg(&ctx0, self_chat.id, &mut msg).await.unwrap();
// Prepare to transfer backup.
@@ -428,7 +429,12 @@ mod tests {
let msg = Message::load_from_db(&ctx1, *msgid).await.unwrap();
let path = msg.get_file(&ctx1).unwrap();
assert_eq!(path.with_file_name("hello.txt"), path);
assert_eq!(
// That's the hash of the file:
path.with_file_name("ac1d2d284757656a8d41dc40aae4136"),
path
);
assert_eq!("hello.txt", msg.get_filename().unwrap());
let text = fs::read_to_string(&path).await.unwrap();
assert_eq!(text, "i am attachment");

View File

@@ -1074,7 +1074,7 @@ Content-Disposition: attachment; filename="location.kml"
let file = alice.get_blobdir().join(file_name);
tokio::fs::write(&file, bytes).await?;
let mut msg = Message::new(Viewtype::Image);
msg.set_file(file.to_str().unwrap(), None);
msg.set_file_and_deduplicate(&alice, &file, Some("logo.png"), None)?;
let sent = alice.send_msg(alice_chat.id, &mut msg).await;
let alice_msg = Message::load_from_db(&alice, sent.sender_msg_id).await?;
assert_eq!(alice_msg.has_location(), false);

View File

@@ -1,6 +1,7 @@
//! # Messages and their identifiers.
use std::collections::BTreeSet;
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
use std::str;
@@ -623,8 +624,8 @@ impl Message {
pub fn get_filemime(&self) -> Option<String> {
if let Some(m) = self.param.get(Param::MimeType) {
return Some(m.to_string());
} else if let Some(file) = self.param.get(Param::File) {
if let Some((_, mime)) = guess_msgtype_from_suffix(Path::new(file)) {
} else if self.param.exists(Param::File) {
if let Some((_, mime)) = guess_msgtype_from_suffix(self) {
return Some(mime.to_string());
}
// we have a file but no mimetype, let's use a generic one
@@ -1085,18 +1086,58 @@ impl Message {
self.param.set_optional(Param::MimeType, filemime);
}
/// Creates a new blob and sets it as a file associated with a message.
pub async fn set_file_from_bytes(
/// Sets the file associated with a message, deduplicating files with the same name.
///
/// If `name` is Some, it is used as the file name
/// and the actual current name of the file is ignored.
///
/// If the source file is already in the blobdir, it will be renamed,
/// otherwise it will be copied to the blobdir first.
///
/// In order to deduplicate files that contain the same data,
/// the file will be named as a hash of the file data.
///
/// NOTE:
/// - This function will rename the file. To get the new file path, call `get_file()`.
/// - The file must not be modified after this function was called.
pub fn set_file_and_deduplicate(
&mut self,
context: &Context,
suggested_name: &str,
file: &Path,
name: Option<&str>,
filemime: Option<&str>,
) -> Result<()> {
let blob = BlobObject::create_and_deduplicate(context, file)?;
if let Some(name) = name {
self.param.set(Param::Filename, name);
} else {
let file_name = file.file_name().map(OsStr::to_string_lossy);
self.param.set_optional(Param::Filename, file_name);
}
self.param.set(Param::File, blob.as_name());
self.param.set_optional(Param::MimeType, filemime);
Ok(())
}
/// Creates a new blob and sets it as a file associated with a message.
///
/// In order to deduplicate files that contain the same data,
/// the filename will be a hash of the file data.
///
/// NOTE: The file must not be modified after this function was called.
pub fn set_file_from_bytes(
&mut self,
context: &Context,
name: &str,
data: &[u8],
filemime: Option<&str>,
) -> Result<()> {
let blob = BlobObject::create(context, suggested_name, data).await?;
self.param.set(Param::Filename, suggested_name);
let blob = BlobObject::create_and_deduplicate_from_bytes(context, data)?;
self.param.set(Param::Filename, name);
self.param.set(Param::File, blob.as_name());
self.param.set_optional(Param::MimeType, filemime);
Ok(())
}
@@ -1109,7 +1150,6 @@ impl Message {
);
let vcard = contact::make_vcard(context, contacts).await?;
self.set_file_from_bytes(context, "vcard.vcf", vcard.as_bytes(), None)
.await
}
/// Updates message state from the vCard attachment.
@@ -1467,7 +1507,14 @@ pub async fn get_msg_read_receipts(
.await
}
pub(crate) fn guess_msgtype_from_suffix(path: &Path) -> Option<(Viewtype, &str)> {
pub(crate) fn guess_msgtype_from_suffix(msg: &Message) -> Option<(Viewtype, &'static str)> {
msg.param
.get(Param::Filename)
.or_else(|| msg.param.get(Param::File))
.and_then(|file| guess_msgtype_from_path_suffix(Path::new(file)))
}
pub(crate) fn guess_msgtype_from_path_suffix(path: &Path) -> Option<(Viewtype, &'static str)> {
let extension: &str = &path.extension()?.to_str()?.to_lowercase();
let info = match extension {
// before using viewtype other than Viewtype::File,
@@ -2213,15 +2260,15 @@ mod tests {
#[test]
fn test_guess_msgtype_from_suffix() {
assert_eq!(
guess_msgtype_from_suffix(Path::new("foo/bar-sth.mp3")),
guess_msgtype_from_path_suffix(Path::new("foo/bar-sth.mp3")),
Some((Viewtype::Audio, "audio/mpeg"))
);
assert_eq!(
guess_msgtype_from_suffix(Path::new("foo/file.html")),
guess_msgtype_from_path_suffix(Path::new("foo/file.html")),
Some((Viewtype::File, "text/html"))
);
assert_eq!(
guess_msgtype_from_suffix(Path::new("foo/file.xdc")),
guess_msgtype_from_path_suffix(Path::new("foo/file.xdc")),
Some((Viewtype::Webxdc, "application/webxdc+zip"))
);
}
@@ -2627,8 +2674,7 @@ mod tests {
let file_bytes = include_bytes!("../test-data/image/screenshot.png");
let mut msg = Message::new(Viewtype::Image);
msg.set_file_from_bytes(bob, "a.jpg", file_bytes, None)
.await?;
msg.set_file_from_bytes(bob, "a.jpg", file_bytes, None)?;
let sent_msg = bob.send_msg(bob_chat_id, &mut msg).await;
let msg = alice.recv_msg(&sent_msg).await;
assert_eq!(msg.download_state, DownloadState::Available);
@@ -2697,8 +2743,7 @@ mod tests {
let file_bytes = include_bytes!("../test-data/image/screenshot.png");
let mut msg = Message::new(Viewtype::Image);
msg.set_file_from_bytes(bob, "a.jpg", file_bytes, None)
.await?;
msg.set_file_from_bytes(bob, "a.jpg", file_bytes, None)?;
let sent_msg = bob.send_msg(bob_chat_id, &mut msg).await;
let msg = alice.recv_msg(&sent_msg).await;
assert_eq!(msg.download_state, DownloadState::Available);

View File

@@ -1,6 +1,7 @@
//! # MIME message production.
use std::collections::HashSet;
use std::path::Path;
use anyhow::{bail, Context as _, Result};
use base64::Engine as _;
@@ -1605,12 +1606,17 @@ pub(crate) fn wrapped_base64_encode(buf: &[u8]) -> String {
}
async fn build_body_file(context: &Context, msg: &Message) -> Result<PartBuilder> {
let file_name = msg.get_filename().context("msg has no file")?;
let suffix = Path::new(&file_name)
.extension()
.and_then(|e| e.to_str())
.unwrap_or("dat");
let blob = msg
.param
.get_blob(Param::File, context)
.await?
.context("msg has no file")?;
let suffix = blob.suffix().unwrap_or("dat");
// Get file name to use for sending. For privacy purposes, we do
// not transfer the original filenames eg. for images; these names
@@ -1650,18 +1656,14 @@ async fn build_body_file(context: &Context, msg: &Message) -> Result<PartBuilder
),
&suffix
),
_ => msg
.param
.get(Param::Filename)
.unwrap_or_else(|| blob.as_file_name())
.to_string(),
_ => file_name,
};
/* check mimetype */
let mimetype: mime::Mime = match msg.param.get(Param::MimeType) {
Some(mtype) => mtype.parse()?,
None => {
if let Some(res) = message::guess_msgtype_from_suffix(blob.as_rel_path()) {
if let Some(res) = message::guess_msgtype_from_suffix(msg) {
res.1.parse()?
} else {
mime::APPLICATION_OCTET_STREAM
@@ -2624,8 +2626,7 @@ mod tests {
// Long messages are truncated and MimeMessage::decoded_data is set for them. We need
// decoded_data to check presence of the necessary headers.
msg.set_text("a".repeat(constants::DC_DESIRED_TEXT_LEN + 1));
msg.set_file_from_bytes(&bob, "foo.bar", "content".as_bytes(), None)
.await?;
msg.set_file_from_bytes(&bob, "foo.bar", "content".as_bytes(), None)?;
let sent = bob.send_msg(chat, &mut msg).await;
assert!(msg.get_showpadlock());
assert!(sent.payload.contains("\r\nSubject: [...]\r\n"));

View File

@@ -1384,7 +1384,7 @@ impl MimeMessage {
/* we have a regular file attachment,
write decoded data to new blob object */
let blob = match BlobObject::create(context, filename, decoded_data).await {
let blob = match BlobObject::create_and_deduplicate_from_bytes(context, decoded_data) {
Ok(blob) => blob,
Err(err) => {
error!(
@@ -2075,10 +2075,12 @@ fn get_mime_type(
}
mime::APPLICATION => match mimetype.subtype() {
mime::OCTET_STREAM => match filename {
Some(filename) => match message::guess_msgtype_from_suffix(Path::new(&filename)) {
Some((viewtype, _)) => viewtype,
None => Viewtype::File,
},
Some(filename) => {
match message::guess_msgtype_from_path_suffix(Path::new(&filename)) {
Some((viewtype, _)) => viewtype,
None => Viewtype::File,
}
}
None => Viewtype::File,
},
_ => Viewtype::File,

View File

@@ -1373,8 +1373,8 @@ async fn test_x_microsoft_original_message_id() {
\n\
Does it work with outlook now?\n\
", None)
.await
.unwrap();
.await
.unwrap();
assert_eq!(
message.get_rfc724_mid(),
Some("Mr.6Dx7ITn4w38.n9j7epIcuQI@outlook.com".to_string())
@@ -1505,8 +1505,8 @@ async fn test_ignore_read_receipt_to_self() -> Result<()> {
// Due to a bug in the old version running on the other device, Alice receives a read
// receipt from self.
receive_imf(
&alice,
"Received: (Postfix, from userid 1000); Mon, 4 Dec 2006 14:51:39 +0100 (CET)\n\
&alice,
"Received: (Postfix, from userid 1000); Mon, 4 Dec 2006 14:51:39 +0100 (CET)\n\
From: alice@example.org\n\
To: alice@example.org\n\
Subject: message opened\n\
@@ -1532,10 +1532,10 @@ async fn test_ignore_read_receipt_to_self() -> Result<()> {
\n\
\n\
--SNIPP--"
.as_bytes(),
false,
)
.await?;
.as_bytes(),
false,
)
.await?;
// Check that the state has not changed to `MessageState::OutMdnRcvd`.
let msg = Message::load_from_db(&alice, msg.id).await?;
@@ -1601,8 +1601,8 @@ async fn test_receive_eml() -> Result<()> {
"this is a classic email I attached the .EML file".to_string()
);
assert_eq!(
mime_message.parts[0].param.get(Param::File),
Some("$BLOBDIR/.eml")
mime_message.parts[0].param.get(Param::Filename),
Some(".eml")
);
assert_eq!(mime_message.parts[0].org_filename, Some(".eml".to_string()));

View File

@@ -628,7 +628,6 @@ mod tests {
include_bytes!("../test-data/webxdc/minimal.xdc"),
None,
)
.await
.unwrap();
send_msg(alice, alice_chat.id, &mut instance).await.unwrap();
@@ -800,7 +799,6 @@ mod tests {
include_bytes!("../test-data/webxdc/minimal.xdc"),
None,
)
.await
.unwrap();
send_msg(alice, alice_chat.id, &mut instance).await.unwrap();
@@ -984,7 +982,6 @@ mod tests {
include_bytes!("../test-data/webxdc/minimal.xdc"),
None,
)
.await
.unwrap();
send_msg(alice, alice_chat.id, &mut instance).await.unwrap();
let alice_webxdc = alice.get_last_msg().await;

View File

@@ -1664,8 +1664,12 @@ async fn test_pdf_filename_simple() {
assert_eq!(msg.viewtype, Viewtype::File);
assert_eq!(msg.text, "mail body");
let file_path = msg.param.get(Param::File).unwrap();
assert!(file_path.starts_with("$BLOBDIR/simple"));
assert!(file_path.ends_with(".pdf"));
assert_eq!(
file_path,
// That's the blake3 hash of the file content:
"$BLOBDIR/24a6af459cec5d733374aeaa19a6133"
);
assert_eq!(msg.param.get(Param::Filename).unwrap(), "simple.pdf");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -1680,8 +1684,8 @@ async fn test_pdf_filename_continuation() {
assert_eq!(msg.viewtype, Viewtype::File);
assert_eq!(msg.text, "mail body");
let file_path = msg.param.get(Param::File).unwrap();
assert!(file_path.starts_with("$BLOBDIR/test pdf äöüß"));
assert!(file_path.ends_with(".pdf"));
assert!(file_path.starts_with("$BLOBDIR/"));
assert_eq!(msg.get_filename().unwrap(), "test pdf äöüß.pdf");
}
/// HTML-images may come with many embedded images, eg. tiny icons, corners for formatting,
@@ -3244,11 +3248,11 @@ async fn test_weird_and_duplicated_filenames() -> Result<()> {
"a. tar.tar.gz",
] {
let attachment = alice.blobdir.join(filename_sent);
let content = format!("File content of {filename_sent}");
let content = "File content of tar.gz archive".to_string();
tokio::fs::write(&attachment, content.as_bytes()).await?;
let mut msg_alice = Message::new(Viewtype::File);
msg_alice.set_file(attachment.to_str().unwrap(), None);
msg_alice.set_file_and_deduplicate(&alice, &attachment, None, None)?;
let alice_chat = alice.create_chat(&bob).await;
let sent = alice.send_msg(alice_chat.id, &mut msg_alice).await;
println!("{}", sent.payload());
@@ -3262,9 +3266,10 @@ async fn test_weird_and_duplicated_filenames() -> Result<()> {
let path = msg.get_file(t).unwrap();
let path2 = path.with_file_name("saved.txt");
msg.save_file(t, &path2).await.unwrap();
assert!(
path.to_str().unwrap().ends_with(".tar.gz"),
"path {path:?} doesn't end with .tar.gz"
assert_eq!(
path.file_name().unwrap().to_str().unwrap(),
"79402cb76f44c5761888f9036992a76",
"The hash of the content should always be the same"
);
assert_eq!(fs::read_to_string(&path).await.unwrap(), content);
assert_eq!(fs::read_to_string(&path2).await.unwrap(), content);
@@ -4700,8 +4705,7 @@ async fn test_create_group_with_big_msg() -> Result<()> {
let bob_grp_id = create_group_chat(&bob, ProtectionStatus::Unprotected, "Group").await?;
add_contact_to_chat(&bob, bob_grp_id, ba_contact).await?;
let mut msg = Message::new(Viewtype::Image);
msg.set_file_from_bytes(&bob, "a.jpg", file_bytes, None)
.await?;
msg.set_file_from_bytes(&bob, "a.jpg", file_bytes, None)?;
let sent_msg = bob.send_msg(bob_grp_id, &mut msg).await;
assert!(!msg.get_showpadlock());
@@ -4737,8 +4741,7 @@ async fn test_create_group_with_big_msg() -> Result<()> {
let bob_grp_id = create_group_chat(&bob, ProtectionStatus::Unprotected, "Group1").await?;
add_contact_to_chat(&bob, bob_grp_id, ba_contact).await?;
let mut msg = Message::new(Viewtype::Image);
msg.set_file_from_bytes(&bob, "a.jpg", file_bytes, None)
.await?;
msg.set_file_from_bytes(&bob, "a.jpg", file_bytes, None)?;
let sent_msg = bob.send_msg(bob_grp_id, &mut msg).await;
assert!(msg.get_showpadlock());
@@ -5179,8 +5182,7 @@ async fn test_prefer_references_to_downloaded_msgs() -> Result<()> {
let file_bytes = include_bytes!("../../test-data/image/screenshot.gif");
let mut msg = Message::new(Viewtype::File);
msg.set_file_from_bytes(alice, "file", file_bytes, None)
.await?;
msg.set_file_from_bytes(alice, "file", file_bytes, None)?;
let mut sent = alice.send_msg(alice_chat_id, &mut msg).await;
sent.payload = sent
.payload
@@ -5192,8 +5194,7 @@ async fn test_prefer_references_to_downloaded_msgs() -> Result<()> {
assert_eq!(received.chat_id, bob.get_chat(alice).await.id);
let mut msg = Message::new(Viewtype::File);
msg.set_file_from_bytes(alice, "file", file_bytes, None)
.await?;
msg.set_file_from_bytes(alice, "file", file_bytes, None)?;
let sent = alice.send_msg(alice_chat_id, &mut msg).await;
let received = bob.recv_msg(&sent).await;
assert_eq!(received.download_state, DownloadState::Available);
@@ -5252,7 +5253,6 @@ async fn test_receive_vcard() -> Result<()> {
.as_bytes(),
None,
)
.await
.unwrap();
let alice_bob_chat = alice.create_chat(bob).await;

View File

@@ -254,9 +254,13 @@ impl Sql {
let mut blob = BlobObject::new_from_path(context, avatar.as_ref()).await?;
match blob.recode_to_avatar_size(context).await {
Ok(()) => {
context
.set_config_internal(Config::Selfavatar, Some(&avatar))
.await?
if let Some(path) = blob.to_abs_path().to_str() {
context
.set_config_internal(Config::Selfavatar, Some(path))
.await?;
} else {
warn!(context, "Setting selfavatar failed: non-UTF-8 filename");
}
}
Err(e) => {
warn!(context, "Migrations can't recode avatar, removing. {:#}", e);

View File

@@ -286,6 +286,8 @@ impl Message {
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use super::*;
use crate::chat::ChatId;
use crate::param::Param;
@@ -305,62 +307,90 @@ mod tests {
.unwrap();
let some_text = " bla \t\n\tbla\n\t".to_string();
async fn write_file_to_blobdir(d: &TestContext) -> PathBuf {
let bytes = &[38, 209, 39, 29]; // Just some random bytes
let file = d.get_blobdir().join("random_filename_392438");
tokio::fs::write(&file, bytes).await.unwrap();
file
}
let msg = Message::new_text(some_text.to_string());
assert_summary_texts(&msg, ctx, "bla bla").await; // for simple text, the type is not added to the summary
let file = write_file_to_blobdir(&d).await;
let mut msg = Message::new(Viewtype::Image);
msg.set_file("foo.jpg", None);
msg.set_file_and_deduplicate(&d, &file, Some("foo.jpg"), None)
.unwrap();
assert_summary_texts(&msg, ctx, "📷 Image").await; // file names are not added for images
let file = write_file_to_blobdir(&d).await;
let mut msg = Message::new(Viewtype::Image);
msg.set_text(some_text.to_string());
msg.set_file("foo.jpg", None);
msg.set_file_and_deduplicate(&d, &file, Some("foo.jpg"), None)
.unwrap();
assert_summary_texts(&msg, ctx, "📷 bla bla").await; // type is visible by emoji if text is set
let file = write_file_to_blobdir(&d).await;
let mut msg = Message::new(Viewtype::Video);
msg.set_file("foo.mp4", None);
msg.set_file_and_deduplicate(&d, &file, Some("foo.mp4"), None)
.unwrap();
assert_summary_texts(&msg, ctx, "🎥 Video").await; // file names are not added for videos
let file = write_file_to_blobdir(&d).await;
let mut msg = Message::new(Viewtype::Video);
msg.set_text(some_text.to_string());
msg.set_file("foo.mp4", None);
msg.set_file_and_deduplicate(&d, &file, Some("foo.mp4"), None)
.unwrap();
assert_summary_texts(&msg, ctx, "🎥 bla bla").await; // type is visible by emoji if text is set
let file = write_file_to_blobdir(&d).await;
let mut msg = Message::new(Viewtype::Gif);
msg.set_file("foo.gif", None);
msg.set_file_and_deduplicate(&d, &file, Some("foo.gif"), None)
.unwrap();
assert_summary_texts(&msg, ctx, "GIF").await; // file names are not added for GIFs
let file = write_file_to_blobdir(&d).await;
let mut msg = Message::new(Viewtype::Gif);
msg.set_text(some_text.to_string());
msg.set_file("foo.gif", None);
msg.set_file_and_deduplicate(&d, &file, Some("foo.gif"), None)
.unwrap();
assert_summary_texts(&msg, ctx, "GIF \u{2013} bla bla").await; // file names are not added for GIFs
let file = write_file_to_blobdir(&d).await;
let mut msg = Message::new(Viewtype::Sticker);
msg.set_file("foo.png", None);
msg.set_file_and_deduplicate(&d, &file, Some("foo.png"), None)
.unwrap();
assert_summary_texts(&msg, ctx, "Sticker").await; // file names are not added for stickers
let file = write_file_to_blobdir(&d).await;
let mut msg = Message::new(Viewtype::Voice);
msg.set_file("foo.mp3", None);
msg.set_file_and_deduplicate(&d, &file, Some("foo.mp3"), None)
.unwrap();
assert_summary_texts(&msg, ctx, "🎤 Voice message").await; // file names are not added for voice messages
let file = write_file_to_blobdir(&d).await;
let mut msg = Message::new(Viewtype::Voice);
msg.set_text(some_text.clone());
msg.set_file("foo.mp3", None);
msg.set_file_and_deduplicate(&d, &file, Some("foo.mp3"), None)
.unwrap();
assert_summary_texts(&msg, ctx, "🎤 bla bla").await;
let file = write_file_to_blobdir(&d).await;
let mut msg = Message::new(Viewtype::Audio);
msg.set_file("foo.mp3", None);
msg.set_file_and_deduplicate(&d, &file, Some("foo.mp3"), None)
.unwrap();
assert_summary_texts(&msg, ctx, "🎵 foo.mp3").await; // file name is added for audio
let file = write_file_to_blobdir(&d).await;
let mut msg = Message::new(Viewtype::Audio);
msg.set_text(some_text.clone());
msg.set_file("foo.mp3", None);
msg.set_file_and_deduplicate(&d, &file, Some("foo.mp3"), None)
.unwrap();
assert_summary_texts(&msg, ctx, "🎵 foo.mp3 \u{2013} bla bla").await; // file name and text added for audio
let mut msg = Message::new(Viewtype::File);
let bytes = include_bytes!("../test-data/webxdc/with-minimal-manifest.xdc");
msg.set_file_from_bytes(ctx, "foo.xdc", bytes, None)
.await
.unwrap();
chat_id.set_draft(ctx, Some(&mut msg)).await.unwrap();
assert_eq!(msg.viewtype, Viewtype::Webxdc);
@@ -369,24 +399,28 @@ mod tests {
chat_id.set_draft(ctx, Some(&mut msg)).await.unwrap();
assert_summary_texts(&msg, ctx, "nice app! \u{2013} bla bla").await;
let file = write_file_to_blobdir(&d).await;
let mut msg = Message::new(Viewtype::File);
msg.set_file("foo.bar", None);
msg.set_file_and_deduplicate(&d, &file, Some("foo.bar"), None)
.unwrap();
assert_summary_texts(&msg, ctx, "📎 foo.bar").await; // file name is added for files
let file = write_file_to_blobdir(&d).await;
let mut msg = Message::new(Viewtype::File);
msg.set_text(some_text.clone());
msg.set_file("foo.bar", None);
msg.set_file_and_deduplicate(&d, &file, Some("foo.bar"), None)
.unwrap();
assert_summary_texts(&msg, ctx, "📎 foo.bar \u{2013} bla bla").await; // file name is added for files
let file = write_file_to_blobdir(&d).await;
let mut msg = Message::new(Viewtype::VideochatInvitation);
msg.set_text(some_text.clone());
msg.set_file("foo.bar", None);
msg.set_file_and_deduplicate(&d, &file, Some("foo.bar"), None)
.unwrap();
assert_summary_texts(&msg, ctx, "Video chat invitation").await; // text is not added for videochat invitations
let mut msg = Message::new(Viewtype::Vcard);
msg.set_file_from_bytes(ctx, "foo.vcf", b"", None)
.await
.unwrap();
msg.set_file_from_bytes(ctx, "foo.vcf", b"", None).unwrap();
chat_id.set_draft(ctx, Some(&mut msg)).await.unwrap();
// If a vCard can't be parsed, the message becomes `Viewtype::File`.
assert_eq!(msg.viewtype, Viewtype::File);
@@ -406,7 +440,6 @@ mod tests {
END:VCARD",
None,
)
.await
.unwrap();
chat_id.set_draft(ctx, Some(&mut msg)).await.unwrap();
assert_eq!(msg.viewtype, Viewtype::Vcard);
@@ -419,9 +452,11 @@ mod tests {
assert_eq!(msg.get_summary_text(ctx).await, "Forwarded: bla bla"); // for simple text, the type is not added to the summary
assert_eq!(msg.get_summary_text_without_prefix(ctx).await, "bla bla"); // skipping prefix used for reactions summaries
let file = write_file_to_blobdir(&d).await;
let mut msg = Message::new(Viewtype::File);
msg.set_text(some_text.clone());
msg.set_file("foo.bar", None);
msg.set_file_and_deduplicate(&d, &file, Some("foo.bar"), None)
.unwrap();
msg.param.set_int(Param::Forwarded, 1);
assert_eq!(
msg.get_summary_text(ctx).await,

View File

@@ -261,9 +261,6 @@ impl Context {
/// Ensure that a file is an acceptable webxdc for sending.
pub(crate) async fn ensure_sendable_webxdc_file(&self, path: &Path) -> Result<()> {
let filename = path.to_str().unwrap_or_default();
if !filename.ends_with(WEBXDC_SUFFIX) {
bail!("{} is not a valid webxdc file", filename);
}
let valid = match FsZipFileReader::new(path).await {
Ok(archive) => {
@@ -1047,9 +1044,9 @@ mod tests {
Ok(())
}
async fn create_webxdc_instance(t: &TestContext, name: &str, bytes: &[u8]) -> Result<Message> {
fn create_webxdc_instance(t: &TestContext, name: &str, bytes: &[u8]) -> Result<Message> {
let mut instance = Message::new(Viewtype::File);
instance.set_file_from_bytes(t, name, bytes, None).await?;
instance.set_file_from_bytes(t, name, bytes, None)?;
Ok(instance)
}
@@ -1058,8 +1055,7 @@ mod tests {
t,
"minimal.xdc",
include_bytes!("../test-data/webxdc/minimal.xdc"),
)
.await?;
)?;
let instance_msg_id = send_msg(t, chat_id, &mut instance).await?;
assert_eq!(instance.viewtype, Viewtype::Webxdc);
Message::load_from_db(t, instance_msg_id).await
@@ -1078,9 +1074,7 @@ mod tests {
// sending using bad extension is not working, even when setting Viewtype to webxdc
let mut instance = Message::new(Viewtype::Webxdc);
instance
.set_file_from_bytes(&t, "index.html", b"<html>ola!</html>", None)
.await?;
instance.set_file_from_bytes(&t, "index.html", b"<html>ola!</html>", None)?;
assert!(send_msg(&t, chat_id, &mut instance).await.is_err());
Ok(())
@@ -1096,8 +1090,7 @@ mod tests {
&t,
"invalid-no-zip-but-7z.xdc",
include_bytes!("../test-data/webxdc/invalid-no-zip-but-7z.xdc"),
)
.await?;
)?;
let instance_id = send_msg(&t, chat_id, &mut instance).await?;
assert_eq!(instance.viewtype, Viewtype::File);
let test = Message::load_from_db(&t, instance_id).await?;
@@ -1105,14 +1098,12 @@ mod tests {
// sending invalid .xdc as Viewtype::Webxdc should fail already on sending
let mut instance = Message::new(Viewtype::Webxdc);
instance
.set_file_from_bytes(
&t,
"invalid2.xdc",
include_bytes!("../test-data/webxdc/invalid-no-zip-but-7z.xdc"),
None,
)
.await?;
instance.set_file_from_bytes(
&t,
"invalid2.xdc",
include_bytes!("../test-data/webxdc/invalid-no-zip-but-7z.xdc"),
None,
)?;
assert!(send_msg(&t, chat_id, &mut instance).await.is_err());
Ok(())
@@ -1128,8 +1119,7 @@ mod tests {
&t,
"chess.xdc",
include_bytes!("../test-data/webxdc/chess.xdc"),
)
.await?;
)?;
let instance_id = send_msg(&t, chat_id, &mut instance).await?;
let instance = Message::load_from_db(&t, instance_id).await?;
assert_eq!(instance.viewtype, Viewtype::Webxdc);
@@ -1315,8 +1305,7 @@ mod tests {
&alice,
"chess.xdc",
include_bytes!("../test-data/webxdc/chess.xdc"),
)
.await?;
)?;
let sent1 = alice.send_msg(chat.id, &mut alice_instance).await;
let alice_instance = sent1.load_from_db().await;
alice
@@ -1445,8 +1434,7 @@ mod tests {
&t,
"minimal.xdc",
include_bytes!("../test-data/webxdc/minimal.xdc"),
)
.await?;
)?;
chat_id.set_draft(&t, Some(&mut instance)).await?;
let instance = chat_id.get_draft(&t).await?.unwrap();
t.send_webxdc_status_update(instance.id, r#"{"payload": 42}"#)
@@ -1882,8 +1870,7 @@ mod tests {
&t,
"minimal.xdc",
include_bytes!("../test-data/webxdc/minimal.xdc"),
)
.await?;
)?;
chat_id.set_draft(&t, Some(&mut instance)).await?;
let (first, last) = (StatusUpdateSerial(1), StatusUpdateSerial::MAX);
assert_eq!(
@@ -2028,8 +2015,7 @@ mod tests {
&alice,
"minimal.xdc",
include_bytes!("../test-data/webxdc/minimal.xdc"),
)
.await?;
)?;
alice_chat_id
.set_draft(&alice, Some(&mut alice_instance))
.await?;
@@ -2143,8 +2129,7 @@ mod tests {
&t,
"some-files.xdc",
include_bytes!("../test-data/webxdc/some-files.xdc"),
)
.await?;
)?;
chat_id.set_draft(&t, Some(&mut instance)).await?;
let buf = instance.get_webxdc_blob(&t, "index.html").await?;
@@ -2243,8 +2228,7 @@ sth_for_the = "future""#
&t,
"with-min-api-1001.xdc",
include_bytes!("../test-data/webxdc/with-min-api-1001.xdc"),
)
.await?;
)?;
send_msg(&t, chat_id, &mut instance).await?;
let instance = t.get_last_msg().await;
@@ -2270,8 +2254,7 @@ sth_for_the = "future""#
&t,
"with-manifest-empty-name.xdc",
include_bytes!("../test-data/webxdc/with-manifest-empty-name.xdc"),
)
.await?;
)?;
chat_id.set_draft(&t, Some(&mut instance)).await?;
let info = instance.get_webxdc_info(&t).await?;
assert_eq!(info.name, "with-manifest-empty-name.xdc");
@@ -2281,8 +2264,7 @@ sth_for_the = "future""#
&t,
"with-manifest-no-name.xdc",
include_bytes!("../test-data/webxdc/with-manifest-no-name.xdc"),
)
.await?;
)?;
chat_id.set_draft(&t, Some(&mut instance)).await?;
let info = instance.get_webxdc_info(&t).await?;
assert_eq!(info.name, "with-manifest-no-name.xdc");
@@ -2292,8 +2274,7 @@ sth_for_the = "future""#
&t,
"with-minimal-manifest.xdc",
include_bytes!("../test-data/webxdc/with-minimal-manifest.xdc"),
)
.await?;
)?;
chat_id.set_draft(&t, Some(&mut instance)).await?;
let info = instance.get_webxdc_info(&t).await?;
assert_eq!(info.name, "nice app!");
@@ -2303,8 +2284,7 @@ sth_for_the = "future""#
&t,
"with-manifest-and-png-icon.xdc",
include_bytes!("../test-data/webxdc/with-manifest-and-png-icon.xdc"),
)
.await?;
)?;
chat_id.set_draft(&t, Some(&mut instance)).await?;
let info = instance.get_webxdc_info(&t).await?;
assert_eq!(info.name, "with some icon");
@@ -2314,8 +2294,7 @@ sth_for_the = "future""#
&t,
"with-png-icon.xdc",
include_bytes!("../test-data/webxdc/with-png-icon.xdc"),
)
.await?;
)?;
chat_id.set_draft(&t, Some(&mut instance)).await?;
let info = instance.get_webxdc_info(&t).await?;
assert_eq!(info.name, "with-png-icon.xdc");
@@ -2325,8 +2304,7 @@ sth_for_the = "future""#
&t,
"with-jpg-icon.xdc",
include_bytes!("../test-data/webxdc/with-jpg-icon.xdc"),
)
.await?;
)?;
chat_id.set_draft(&t, Some(&mut instance)).await?;
let info = instance.get_webxdc_info(&t).await?;
assert_eq!(info.name, "with-jpg-icon.xdc");
@@ -2667,8 +2645,7 @@ sth_for_the = "future""#
} else {
include_bytes!("../test-data/webxdc/minimal.xdc")
},
)
.await?;
)?;
let instance_id = send_msg(&t, chat_id, &mut instance).await?;
t.send_webxdc_status_update(
instance_id,
@@ -2693,8 +2670,7 @@ sth_for_the = "future""#
&t,
"with-minimal-manifest.xdc",
include_bytes!("../test-data/webxdc/with-minimal-manifest.xdc"),
)
.await?;
)?;
send_msg(&t, chat_id, &mut instance).await?;
let chatlist = Chatlist::try_load(&t, 0, None, None).await?;
@@ -2717,8 +2693,7 @@ sth_for_the = "future""#
&alice,
"minimal.xdc",
include_bytes!("../test-data/webxdc/minimal.xdc"),
)
.await?;
)?;
alice_instance.set_text("user added text".to_string());
send_msg(&alice, alice_chat.id, &mut alice_instance).await?;
let alice_instance = alice.get_last_msg().await;
@@ -2821,8 +2796,7 @@ sth_for_the = "future""#
&alice,
"debug_logging.xdc",
include_bytes!("../test-data/webxdc/minimal.xdc"),
)
.await?;
)?;
assert!(alice.debug_logging.read().unwrap().is_none());
send_msg(&alice, chat_id, &mut instance).await?;
assert!(alice.debug_logging.read().unwrap().is_some());

View File

@@ -190,8 +190,7 @@ mod tests {
"mapstest.xdc",
include_bytes!("../../test-data/webxdc/mapstest-integration-unset.xdc"),
None,
)
.await?;
)?;
t.send_msg(self_chat.id, &mut msg).await;
assert_integration(&t, "with some icon").await?; // still the default integration
@@ -202,8 +201,7 @@ mod tests {
"mapstest.xdc",
include_bytes!("../../test-data/webxdc/mapstest-integration-set.xdc"),
None,
)
.await?;
)?;
let sent = t.send_msg(self_chat.id, &mut msg).await;
let info = msg.get_webxdc_info(&t).await?;
assert!(info.summary.contains("Used as map"));