From cb1d0085272b2e77ee01dfcd354ff080f3f22022 Mon Sep 17 00:00:00 2001 From: iequidoo Date: Wed, 2 Oct 2024 15:51:15 -0300 Subject: [PATCH] feat: Store blobs in subdirs with random names (#4309) Recently there was an accident with a chatbot that replaced its avatar set from the command line with an unrelated avatar of a contact. Both the `selfavatar` setting and the contact avatar `i` param pointed to `$BLOBDIR/avatar.png` at the time it was detected. How this happened is unclear, but it is possible that `avatar.png` was removed, unmounted or otherwise not detected by the core, and the core stored avatar received from the contact as `avatar.png`, while `selfavatar` config still pointed to `$BLOBDIR/avatar.png`. Such bugs are unavoidable even if the core itself has no bugs as we cannot rely on blobdir not reside on the faulty network filesystem, being incorrectly backed up and restored etc., so we should assume that files may be randomly removed. Then there may be dangling `$BLOBDIR/...` references in the database which may accidentally point to unrelated files, could even be an `avatar.png` file sent to the bot in private. To prevent such bugs, store blobs in blobdir subdirs with random names. Also this helps when we receive multiple attachments having the same name -- before, random filename suffixes were added to subsequent attachments, now attachments preserve their filenames which is important if they are opened in external programs. --- python/src/deltachat/message.py | 3 +- python/tests/test_1_online.py | 9 +- python/tests/test_2_increation.py | 4 +- src/blob.rs | 318 ++++++++++++++---------------- src/chat.rs | 7 +- src/config.rs | 8 +- src/constants.rs | 3 + src/imex.rs | 47 +++-- src/mimeparser.rs | 7 +- src/receive_imf/tests.rs | 8 +- src/sql.rs | 33 +++- 11 files changed, 235 insertions(+), 212 deletions(-) diff --git a/python/src/deltachat/message.py b/python/src/deltachat/message.py index 9ffd980c1..491a6520d 100644 --- a/python/src/deltachat/message.py +++ b/python/src/deltachat/message.py @@ -108,7 +108,7 @@ class Message: @props.with_doc def filename(self): - """filename if there was an attachment, otherwise empty string.""" + """file path if there was an attachment, otherwise empty string.""" return from_dc_charpointer(lib.dc_msg_get_file(self._dc_msg)) def set_file(self, path, mime_type=None): @@ -121,7 +121,6 @@ class Message: @props.with_doc def basename(self) -> str: """basename of the attachment if it exists, otherwise empty string.""" - # FIXME, it does not return basename return from_dc_charpointer(lib.dc_msg_get_filename(self._dc_msg)) @props.with_doc diff --git a/python/tests/test_1_online.py b/python/tests/test_1_online.py index ea1e45e27..ca6c42910 100644 --- a/python/tests/test_1_online.py +++ b/python/tests/test_1_online.py @@ -181,14 +181,12 @@ def test_send_file_twice_unicode_filename_mangling(tmp_path, acfactory, lp): msg = send_and_receive_message() assert msg.text == "withfile" assert open(msg.filename).read() == "some data" - msg.filename.index(basename) - assert msg.filename.endswith(ext) + assert msg.filename.endswith(basename + ext) msg2 = send_and_receive_message() assert msg2.text == "withfile" assert open(msg2.filename).read() == "some data" - msg2.filename.index(basename) - assert msg2.filename.endswith(ext) + assert msg2.filename.endswith(basename + ext) assert msg.filename != msg2.filename @@ -214,8 +212,7 @@ def test_send_file_html_attachment(tmp_path, acfactory, lp): msg = ac2.get_message_by_id(ev.data2) assert open(msg.filename).read() == content - msg.filename.index(basename) - assert msg.filename.endswith(ext) + assert msg.filename.endswith(basename + ext) def test_html_message(acfactory, lp): diff --git a/python/tests/test_2_increation.py b/python/tests/test_2_increation.py index 1fe85e02a..2007c1dd1 100644 --- a/python/tests/test_2_increation.py +++ b/python/tests/test_2_increation.py @@ -50,8 +50,8 @@ class TestOnlineInCreation: src = tmp_path / "file.txt" src.write_text("hello there\n") msg = chat.send_file(str(src)) - assert msg.filename.startswith(os.path.join(ac1.get_blobdir(), "file")) - assert msg.filename.endswith(".txt") + assert msg.filename.startswith(ac1.get_blobdir()) + assert msg.filename.endswith("file.txt") def test_forward_increation(self, acfactory, data, lp): ac1, ac2 = acfactory.get_online_accounts(2) diff --git a/src/blob.rs b/src/blob.rs index 43a42850f..ec55ee6ec 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -10,17 +10,15 @@ use std::path::{Path, PathBuf}; use anyhow::{format_err, Context as _, Result}; use base64::Engine as _; -use futures::StreamExt; use image::codecs::jpeg::JpegEncoder; use image::ImageReader; use image::{DynamicImage, GenericImage, GenericImageView, ImageFormat, Pixel, Rgba}; use num_traits::FromPrimitive; use tokio::io::AsyncWriteExt; use tokio::{fs, io}; -use tokio_stream::wrappers::ReadDirStream; use crate::config::Config; -use crate::constants::{self, MediaQuality}; +use crate::constants::{self, MediaQuality, BLOB_CREATE_ATTEMPTS}; use crate::context::Context; use crate::events::EventType; use crate::log::LogExt; @@ -58,7 +56,8 @@ impl<'a> BlobObject<'a> { ) -> Result> { let blobdir = context.get_blobdir(); let (stem, ext) = BlobObject::sanitise_name(suggested_name); - let (name, mut file) = BlobObject::create_new_file(context, blobdir, &stem, &ext).await?; + let (subdir, name, mut file) = + BlobObject::create_new_file(context, blobdir, &stem, &ext).await?; file.write_all(data).await.context("file write failure")?; // workaround a bug in async-std @@ -68,42 +67,41 @@ impl<'a> BlobObject<'a> { let blob = BlobObject { blobdir, - name: format!("$BLOBDIR/{name}"), + name: format!("$BLOBDIR/{subdir}/{name}"), }; context.emit_event(EventType::NewBlobFile(blob.as_name().to_string())); Ok(blob) } - // Creates a new file, returning a tuple of the name and the handle. + // Creates a new file, returning a tuple of the subdir and file names and the handle. async fn create_new_file( context: &Context, dir: &Path, stem: &str, ext: &str, - ) -> Result<(String, fs::File)> { - const MAX_ATTEMPT: u32 = 16; + ) -> Result<(String, String, fs::File)> { let mut attempt = 0; - let mut name = format!("{stem}{ext}"); loop { attempt += 1; - let path = dir.join(&name); - match fs::OpenOptions::new() + let subdir = format!("{:016x}", rand::random::()); + let path = dir.join(&subdir); + if let Err(err) = fs::create_dir(&path).await.log_err(context) { + if attempt >= BLOB_CREATE_ATTEMPTS { + return Err(err).context("Failed to create subdir"); + } else if attempt == 1 && !dir.exists() { + fs::create_dir_all(dir).await.log_err(context).ok(); + } + continue; + } + let name = format!("{}{}", stem, ext); + let path = path.join(&name); + let file = fs::OpenOptions::new() .create_new(true) .write(true) .open(&path) .await - { - Ok(file) => return Ok((name, file)), - Err(err) => { - if attempt >= MAX_ATTEMPT { - return Err(err).context("failed to create file"); - } else if attempt == 1 && !dir.exists() { - fs::create_dir_all(dir).await.log_err(context).ok(); - } else { - name = format!("{}-{}{}", stem, rand::random::(), ext); - } - } - } + .context("Failed to create file")?; + return Ok((subdir, name, file)); } } @@ -118,12 +116,11 @@ impl<'a> BlobObject<'a> { .await .with_context(|| format!("failed to open file {}", src.display()))?; let (stem, ext) = BlobObject::sanitise_name(&src.to_string_lossy()); - let (name, mut dst_file) = + let (subdir, name, mut dst_file) = BlobObject::create_new_file(context, context.get_blobdir(), &stem, &ext).await?; - let name_for_err = name.clone(); if let Err(err) = io::copy(&mut src_file, &mut dst_file).await { // Attempt to remove the failed file, swallow errors resulting from that. - let path = context.get_blobdir().join(&name_for_err); + let path = context.get_blobdir().join(&subdir).join(&name); fs::remove_file(path).await.ok(); return Err(err).context("failed to copy file"); } @@ -133,7 +130,7 @@ impl<'a> BlobObject<'a> { let blob = BlobObject { blobdir: context.get_blobdir(), - name: format!("$BLOBDIR/{name}"), + name: format!("$BLOBDIR/{subdir}/{name}"), }; context.emit_event(EventType::NewBlobFile(blob.as_name().to_string())); Ok(blob) @@ -222,7 +219,8 @@ impl<'a> BlobObject<'a> { /// The path relative in the blob directory. pub fn as_rel_path(&self) -> &Path { - Path::new(self.as_file_name()) + let name = self.name.split_once('/').unwrap_or_default().1; + Path::new(name) } /// Returns the extension of the blob. @@ -317,8 +315,13 @@ impl<'a> BlobObject<'a> { Some(name) => name, None => return false, }; - if uname.find('/').is_some() { - return false; + if let Some((subdir, name)) = uname.split_once('/') { + if subdir.is_empty() || name.is_empty() { + return false; + } + if name.find('/').is_some() { + return false; + } } if uname.find('\\').is_some() { return false; @@ -353,9 +356,7 @@ impl<'a> BlobObject<'a> { Ok(blob.as_name().to_string()) } - pub async fn recode_to_avatar_size(&mut self, context: &Context) -> Result<()> { - let blob_abs = self.to_abs_path(); - + pub async fn recode_to_avatar_size(&mut self, context: &'a Context) -> Result<()> { let img_wh = match MediaQuality::from_i32(context.get_config_int(Config::MediaQuality).await?) .unwrap_or_default() @@ -368,16 +369,7 @@ impl<'a> BlobObject<'a> { let strict_limits = true; // max_bytes is 20_000 bytes: Outlook servers don't allow headers larger than 32k. // 32 / 4 * 3 = 24k if you account for base64 encoding. To be safe, we reduced this to 20k. - if let Some(new_name) = self.recode_to_size( - context, - blob_abs, - maybe_sticker, - img_wh, - 20_000, - strict_limits, - )? { - self.name = new_name; - } + self.recode_to_size(context, maybe_sticker, img_wh, 20_000, strict_limits)?; Ok(()) } @@ -390,10 +382,9 @@ impl<'a> BlobObject<'a> { /// reset. pub async fn recode_to_image_size( &mut self, - context: &Context, + context: &'a Context, maybe_sticker: &mut bool, ) -> Result<()> { - let blob_abs = self.to_abs_path(); let (img_wh, max_bytes) = match MediaQuality::from_i32(context.get_config_int(Config::MediaQuality).await?) .unwrap_or_default() @@ -405,16 +396,7 @@ impl<'a> BlobObject<'a> { MediaQuality::Worse => (constants::WORSE_IMAGE_SIZE, constants::WORSE_IMAGE_BYTES), }; let strict_limits = false; - if let Some(new_name) = self.recode_to_size( - context, - blob_abs, - maybe_sticker, - img_wh, - max_bytes, - strict_limits, - )? { - self.name = new_name; - } + self.recode_to_size(context, maybe_sticker, img_wh, max_bytes, strict_limits)?; Ok(()) } @@ -422,13 +404,13 @@ impl<'a> BlobObject<'a> { /// proceed with the result. fn recode_to_size( &mut self, - context: &Context, - mut blob_abs: PathBuf, + context: &'a Context, maybe_sticker: &mut bool, mut img_wh: u32, max_bytes: usize, strict_limits: bool, - ) -> Result> { + ) -> Result<()> { + let mut blob_abs = self.to_abs_path(); // Add white background only to avatars to spare the CPU. let mut add_white_bg = img_wh <= constants::BALANCED_AVATAR_SIZE; let mut no_exif = false; @@ -455,7 +437,6 @@ impl<'a> BlobObject<'a> { let mut img = imgreader.decode().context("image decode failure")?; let orientation = exif.as_ref().map(|exif| exif_orientation(exif, context)); let mut encoded = Vec::new(); - let mut changed_name = None; if *maybe_sticker { let x_max = img.width().saturating_sub(1); @@ -467,7 +448,7 @@ impl<'a> BlobObject<'a> { || img.get_pixel(x_max, y_max).0[3] == 0); } if *maybe_sticker && exif.is_none() { - return Ok(None); + return Ok(()); } img = match orientation { @@ -565,9 +546,7 @@ impl<'a> BlobObject<'a> { && matches!(ofmt, ImageOutputFormat::Jpeg { .. }) { blob_abs = blob_abs.with_extension("jpg"); - let file_name = blob_abs.file_name().context("No image file name (???)")?; - let file_name = file_name.to_str().context("Filename is no UTF-8 (???)")?; - changed_name = Some(format!("$BLOBDIR/{file_name}")); + *self = Self::from_path(context, &blob_abs)?; } if encoded.is_empty() { @@ -580,8 +559,7 @@ impl<'a> BlobObject<'a> { std::fs::write(&blob_abs, &encoded) .context("failed to write recoded blob to file")?; } - - Ok(changed_name) + Ok(()) }); match res { Ok(_) => res, @@ -591,7 +569,7 @@ impl<'a> BlobObject<'a> { context, "Cannot recode image, using original data: {err:#}.", ); - Ok(None) + Ok(()) } else { Err(err) } @@ -624,7 +602,7 @@ fn exif_orientation(exif: &exif::Exif, context: &Context) -> i32 { impl fmt::Display for BlobObject<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "$BLOBDIR/{}", self.name) + write!(f, "{}", self.name) } } @@ -641,32 +619,36 @@ pub(crate) struct BlobDirContents<'a> { impl<'a> BlobDirContents<'a> { pub(crate) async fn new(context: &'a Context) -> Result> { - let readdir = fs::read_dir(context.get_blobdir()).await?; - let inner = ReadDirStream::new(readdir) - .filter_map(|entry| async move { - match entry { - Ok(entry) => Some(entry), + let blobdir = context.get_blobdir(); + let mut dirs = vec![blobdir.to_path_buf()]; + let mut inner = Vec::::new(); + while let Some(d) = dirs.pop() { + let mut readdir = fs::read_dir(&d).await?; + loop { + let entry = match readdir.next_entry().await { + Ok(Some(e)) => e, + Ok(None) => break, Err(err) => { - error!(context, "Failed to read blob file: {err}."); - None + error!(context, "Failed to read next entry: {err}."); + continue; } + }; + let Ok(file_type) = entry.file_type().await else { + continue; + }; + if file_type.is_file() { + inner.push(entry.path()); + } else if file_type.is_dir() && d == blobdir { + dirs.push(entry.path()); + } else { + warn!( + context, + "Export: Found blob dir entry {} that is not a file or subdir, ignoring.", + entry.path().display(), + ); } - }) - .filter_map(|entry| async move { - match entry.file_type().await.ok()?.is_file() { - true => Some(entry.path()), - false => { - warn!( - context, - "Export: Found blob dir entry {} that is not a file, ignoring.", - entry.path().display() - ); - None - } - } - }) - .collect() - .await; + } + } Ok(Self { inner, context }) } @@ -761,6 +743,7 @@ fn add_white_bg(img: &mut DynamicImage) { #[cfg(test)] mod tests { use fs::File; + use regex::Regex; use super::*; use crate::chat::{self, create_group_chat, ProtectionStatus}; @@ -780,18 +763,21 @@ mod tests { async fn test_create() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo", b"hello").await.unwrap(); - let fname = t.get_blobdir().join("foo"); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}/foo$").unwrap(); + assert!(re.is_match(blob.as_name())); + assert_eq!(blob.as_file_name(), "foo"); + let fname = t.get_blobdir().join(blob.as_rel_path()); let data = fs::read(fname).await.unwrap(); assert_eq!(data, b"hello"); - assert_eq!(blob.as_name(), "$BLOBDIR/foo"); - assert_eq!(blob.to_abs_path(), t.get_blobdir().join("foo")); + assert_eq!(blob.to_abs_path(), t.get_blobdir().join(blob.as_rel_path())); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_lowercase_ext() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo.TXT", b"hello").await.unwrap(); - assert_eq!(blob.as_name(), "$BLOBDIR/foo.txt"); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}/foo.txt$").unwrap(); + assert!(re.is_match(blob.as_name())); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -805,7 +791,8 @@ mod tests { async fn test_as_rel_path() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); - assert_eq!(blob.as_rel_path(), Path::new("foo.txt")); + let re = Regex::new("^[[:xdigit:]]{16}/foo.txt$").unwrap(); + assert!(re.is_match(blob.as_rel_path().to_str().unwrap())); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -820,46 +807,40 @@ mod tests { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_create_dup() { let t = TestContext::new().await; - BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); - let foo_path = t.get_blobdir().join("foo.txt"); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}/foo.txt$").unwrap(); + + let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); + assert!(re.is_match(blob.as_name())); + let foo_path = t.get_blobdir().join(blob.as_rel_path()); assert!(foo_path.exists()); - BlobObject::create(&t, "foo.txt", b"world").await.unwrap(); - let mut dir = fs::read_dir(t.get_blobdir()).await.unwrap(); - while let Ok(Some(dirent)) = dir.next_entry().await { - let fname = dirent.file_name(); - if fname == foo_path.file_name().unwrap() { - assert_eq!(fs::read(&foo_path).await.unwrap(), b"hello"); - } else { - let name = fname.to_str().unwrap(); - assert!(name.starts_with("foo")); - assert!(name.ends_with(".txt")); - } - } + + let blob = BlobObject::create(&t, "foo.txt", b"world").await.unwrap(); + assert!(re.is_match(blob.as_name())); + let foo_path2 = t.get_blobdir().join(blob.as_rel_path()); + assert!(foo_path2.exists()); + + assert!(foo_path != foo_path2); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_double_ext_preserved() { let t = TestContext::new().await; - BlobObject::create(&t, "foo.tar.gz", b"hello") + let blob = BlobObject::create(&t, "foo.tar.gz", b"hello") .await .unwrap(); - let foo_path = t.get_blobdir().join("foo.tar.gz"); - assert!(foo_path.exists()); - BlobObject::create(&t, "foo.tar.gz", b"world") + assert_eq!(blob.as_file_name(), "foo.tar.gz"); + let foo_path = t.get_blobdir().join(blob.as_rel_path()); + assert_eq!(foo_path.file_name().unwrap(), OsStr::new("foo.tar.gz")); + assert_eq!(fs::read(&foo_path).await.unwrap(), b"hello"); + + let blob = BlobObject::create(&t, "foo.tar.gz", b"world") .await .unwrap(); - let mut dir = fs::read_dir(t.get_blobdir()).await.unwrap(); - while let Ok(Some(dirent)) = dir.next_entry().await { - let fname = dirent.file_name(); - if fname == foo_path.file_name().unwrap() { - assert_eq!(fs::read(&foo_path).await.unwrap(), b"hello"); - } else { - let name = fname.to_str().unwrap(); - println!("{name}"); - assert!(name.starts_with("foo")); - assert!(name.ends_with(".tar.gz")); - } - } + assert_eq!(blob.as_file_name(), "foo.tar.gz"); + let foo_path1 = t.get_blobdir().join(blob.as_rel_path()); + assert_eq!(foo_path1.file_name().unwrap(), OsStr::new("foo.tar.gz")); + assert_eq!(fs::read(&foo_path1).await.unwrap(), b"world"); + assert_ne!(foo_path, foo_path1); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -877,7 +858,8 @@ mod tests { let src = t.dir.path().join("src"); fs::write(&src, b"boo").await.unwrap(); let blob = BlobObject::create_and_copy(&t, src.as_ref()).await.unwrap(); - assert_eq!(blob.as_name(), "$BLOBDIR/src"); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}/src$").unwrap(); + assert!(re.is_match(blob.as_name())); let data = fs::read(blob.to_abs_path()).await.unwrap(); assert_eq!(data, b"boo"); @@ -898,17 +880,24 @@ mod tests { let blob = BlobObject::new_from_path(&t, src_ext.as_ref()) .await .unwrap(); - assert_eq!(blob.as_name(), "$BLOBDIR/external"); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}/external$").unwrap(); + assert!(re.is_match(blob.as_name())); let data = fs::read(blob.to_abs_path()).await.unwrap(); assert_eq!(data, b"boo"); - let src_int = t.get_blobdir().join("internal"); - fs::write(&src_int, b"boo").await.unwrap(); - let blob = BlobObject::new_from_path(&t, &src_int).await.unwrap(); - assert_eq!(blob.as_name(), "$BLOBDIR/internal"); - let data = fs::read(blob.to_abs_path()).await.unwrap(); - assert_eq!(data, b"boo"); + fs::create_dir(t.get_blobdir().join("subdir")) + .await + .unwrap(); + for rel_path in ["0", "subdir/0"] { + let src_int = t.get_blobdir().join(rel_path); + fs::write(&src_int, b"boo").await.unwrap(); + let blob = BlobObject::new_from_path(&t, &src_int).await.unwrap(); + assert_eq!(blob.as_name().strip_prefix("$BLOBDIR/").unwrap(), rel_path); + let data = fs::read(blob.to_abs_path()).await.unwrap(); + assert_eq!(data, b"boo"); + } } + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_create_from_name_long() { let t = TestContext::new().await; @@ -917,10 +906,10 @@ mod tests { let blob = BlobObject::new_from_path(&t, src_ext.as_ref()) .await .unwrap(); - assert_eq!( - blob.as_name(), - "$BLOBDIR/autocrypt-setup-message-4137848473.html" - ); + let re = + Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}/autocrypt-setup-message-4137848473.html$") + .unwrap(); + assert!(re.is_match(blob.as_name())); } #[test] @@ -928,7 +917,9 @@ mod tests { assert!(BlobObject::is_acceptible_blob_name("foo")); assert!(BlobObject::is_acceptible_blob_name("foo.txt")); assert!(BlobObject::is_acceptible_blob_name("f".repeat(128))); - assert!(!BlobObject::is_acceptible_blob_name("foo/bar")); + assert!(BlobObject::is_acceptible_blob_name("foo/bar")); + assert!(!BlobObject::is_acceptible_blob_name("/bar")); + assert!(!BlobObject::is_acceptible_blob_name("foo/")); assert!(!BlobObject::is_acceptible_blob_name("foo\\bar")); assert!(!BlobObject::is_acceptible_blob_name("foo\x00bar")); } @@ -1001,15 +992,8 @@ mod tests { let img_wh = 128; let maybe_sticker = &mut false; let strict_limits = true; - blob.recode_to_size( - &t, - blob.to_abs_path(), - maybe_sticker, - img_wh, - 20_000, - strict_limits, - ) - .unwrap(); + blob.recode_to_size(&t, maybe_sticker, img_wh, 20_000, strict_limits) + .unwrap(); tokio::task::block_in_place(move || { let img = image::open(blob.to_abs_path()).unwrap(); assert!(img.width() == img_wh); @@ -1025,19 +1009,21 @@ mod tests { let avatar_src = t.dir.path().join("avatar.jpg"); let avatar_bytes = include_bytes!("../test-data/image/avatar1000x1000.jpg"); fs::write(&avatar_src, avatar_bytes).await.unwrap(); - let avatar_blob = t.get_blobdir().join("avatar.jpg"); - assert!(!avatar_blob.exists()); t.set_config(Config::Selfavatar, Some(avatar_src.to_str().unwrap())) .await .unwrap(); + let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap(); + let blobdir = t.get_blobdir().to_str().unwrap(); + assert!(avatar_blob.starts_with(blobdir)); + let re = Regex::new("[[:xdigit:]]{16}/avatar.jpg$").unwrap(); + assert!(re.is_match(&avatar_blob)); + let avatar_blob = Path::new(&avatar_blob); assert!(avatar_blob.exists()); assert!(fs::metadata(&avatar_blob).await.unwrap().len() < avatar_bytes.len() as u64); - let avatar_cfg = t.get_config(Config::Selfavatar).await.unwrap(); - assert_eq!(avatar_cfg, avatar_blob.to_str().map(|s| s.to_string())); check_image_size(avatar_src, 1000, 1000); check_image_size( - &avatar_blob, + avatar_blob, constants::BALANCED_AVATAR_SIZE, constants::BALANCED_AVATAR_SIZE, ); @@ -1047,20 +1033,13 @@ mod tests { file.metadata().await.unwrap().len() } - let mut blob = BlobObject::new_from_path(&t, &avatar_blob).await.unwrap(); + let mut blob = BlobObject::new_from_path(&t, avatar_blob).await.unwrap(); let maybe_sticker = &mut false; let strict_limits = true; - blob.recode_to_size( - &t, - blob.to_abs_path(), - maybe_sticker, - 1000, - 3000, - strict_limits, - ) - .unwrap(); - assert!(file_size(&avatar_blob).await <= 3000); - assert!(file_size(&avatar_blob).await > 2000); + blob.recode_to_size(&t, maybe_sticker, 1000, 3000, strict_limits) + .unwrap(); + assert!(file_size(avatar_blob).await <= 3000); + assert!(file_size(avatar_blob).await > 2000); tokio::task::block_in_place(move || { let img = image::open(avatar_blob).unwrap(); assert!(img.width() > 130); @@ -1100,18 +1079,19 @@ mod tests { let avatar_src = t.dir.path().join("avatar.png"); let avatar_bytes = include_bytes!("../test-data/image/avatar64x64.png"); fs::write(&avatar_src, avatar_bytes).await.unwrap(); - let avatar_blob = t.get_blobdir().join("avatar.png"); - assert!(!avatar_blob.exists()); t.set_config(Config::Selfavatar, Some(avatar_src.to_str().unwrap())) .await .unwrap(); - assert!(avatar_blob.exists()); + let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap(); + let blobdir = t.get_blobdir().to_str().unwrap(); + assert!(avatar_blob.starts_with(blobdir)); + let re = Regex::new("[[:xdigit:]]{16}/avatar.png$").unwrap(); + assert!(re.is_match(&avatar_blob)); + assert!(Path::new(&avatar_blob).exists()); assert_eq!( fs::metadata(&avatar_blob).await.unwrap().len(), avatar_bytes.len() as u64 ); - let avatar_cfg = t.get_config(Config::Selfavatar).await.unwrap(); - assert_eq!(avatar_cfg, avatar_blob.to_str().map(|s| s.to_string())); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] diff --git a/src/chat.rs b/src/chat.rs index c1d2f3d1d..63e550461 100644 --- a/src/chat.rs +++ b/src/chat.rs @@ -4717,6 +4717,7 @@ mod tests { use crate::message::delete_msgs; use crate::receive_imf::receive_imf; use crate::test_utils::{sync, TestContext, TestContextManager}; + use regex::Regex; use strum::IntoEnumIterator; use tokio::fs; @@ -7398,9 +7399,11 @@ mod tests { // the file bob receives should not contain BIDI-control characters assert_eq!( - Some("$BLOBDIR/harmless_file.txt.exe"), - msg.param.get(Param::File), + msg.param.get(Param::Filename).unwrap(), + "harmless_file.txt.exe" ); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}/harmless_file.txt.exe$").unwrap(); + assert!(re.is_match(msg.param.get(Param::File).unwrap())); Ok(()) } diff --git a/src/config.rs b/src/config.rs index 46eae017a..25e666800 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1165,10 +1165,10 @@ mod tests { // this. let self_chat = alice0.get_self_chat().await; let self_chat_avatar_path = self_chat.get_profile_image(&alice0).await?.unwrap(); - assert_eq!( - self_chat_avatar_path, - alice0.get_blobdir().join("icon-saved-messages.png") - ); + assert_eq!(self_chat_avatar_path.extension().unwrap(), "png"); + let self_chat_avatar = fs::read(self_chat_avatar_path).await?; + let expected_avatar = include_bytes!("../assets/icon-saved-messages.png").to_vec(); + assert_eq!(self_chat_avatar, expected_avatar); assert!(alice1 .get_config(Config::Selfavatar) .await? diff --git a/src/constants.rs b/src/constants.rs index affd7700d..8028cfa4d 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -217,6 +217,9 @@ pub(crate) const DC_FOLDERS_CONFIGURED_KEY: &str = "folders_configured"; // this value can be increased if the folder configuration is changed and must be redone on next program start pub(crate) const DC_FOLDERS_CONFIGURED_VERSION: i32 = 5; +// Maximum attemps to create a blob file. +pub(crate) const BLOB_CREATE_ATTEMPTS: u32 = 2; + // If more recipients are needed in SMTP's `RCPT TO:` header, the recipient list is split into // chunks. This does not affect MIME's `To:` header. Can be overwritten by setting // `max_smtp_rcpt_to` in the provider db. diff --git a/src/imex.rs b/src/imex.rs index 07b55df5b..cd90b1bc0 100644 --- a/src/imex.rs +++ b/src/imex.rs @@ -1,11 +1,12 @@ //! # Import/export module. use std::ffi::OsStr; +use std::io::ErrorKind as IoErrorKind; use std::path::{Path, PathBuf}; use std::pin::Pin; use ::pgp::types::PublicKeyTrait; -use anyhow::{bail, ensure, format_err, Context as _, Result}; +use anyhow::{anyhow, bail, ensure, format_err, Context as _, Result}; use futures::TryStreamExt; use futures_lite::FutureExt; use pin_project::pin_project; @@ -386,24 +387,48 @@ async fn import_backup_stream_inner( if path.file_name() == Some(OsStr::new(DBFILE_BACKUP_NAME)) { continue; } - // async_tar unpacked to $BLOBDIR/BLOBS_BACKUP_NAME/, so we move the file afterwards. + // async_tar unpacked to "$BLOBDIR/BLOBS_BACKUP_NAME/", so we move the file afterwards. let from_path = context.get_blobdir().join(&path); if from_path.is_file() { - if let Some(name) = from_path.file_name() { - let to_path = context.get_blobdir().join(name); - if let Err(e) = fs::rename(&from_path, &to_path).await { - blobs.push(from_path); - break Err(e).context("Failed to move file to blobdir"); - } - blobs.push(to_path); - } else { - warn!(context, "No file name"); + blobs.push(from_path); + let Some(from_path) = blobs.last() else { + continue; + }; + let mut components = path.components(); + components.next(); // Skip "$BLOBDIR". + components.next(); // Skip "BLOBS_BACKUP_NAME". + let Some(comp0) = components.next() else { + break Err(anyhow!("Not enough components in {}.", path.display())); + }; + let comp1 = components.next(); + if components.next().is_some() { + break Err(anyhow!("Too many components in {}.", path.display())); } + let mut to_path = context.get_blobdir().join(comp0); + if let Some(comp) = comp1 { + if let Err(e) = fs::create_dir(&to_path).await { + // The subdir may remain from a previous import try. + if e.kind() != IoErrorKind::AlreadyExists { + break Err(e).context("Failed to create subdir"); + } + } + to_path = to_path.join(comp); + } + if let Err(e) = fs::rename(&from_path, &to_path).await { + break Err(e).context("Failed to move file to blobdir"); + } + blobs.pop(); + blobs.push(to_path); } }; if res.is_err() { for blob in blobs { fs::remove_file(&blob).await.log_err(context).ok(); + if let Some(dir) = blob.parent() { + if dir != context.get_blobdir() { + fs::remove_dir(dir).await.ok(); + } + } } } diff --git a/src/mimeparser.rs b/src/mimeparser.rs index 095d4e12f..acf3c6036 100644 --- a/src/mimeparser.rs +++ b/src/mimeparser.rs @@ -2365,6 +2365,7 @@ mod tests { #![allow(clippy::indexing_slicing)] use mailparse::ParsedMail; + use regex::Regex; use super::*; use crate::{ @@ -3972,10 +3973,8 @@ Message. mime_message.parts[0].msg, "this is a classic email – I attached the .EML file".to_string() ); - assert_eq!( - mime_message.parts[0].param.get(Param::File), - Some("$BLOBDIR/.eml") - ); + let re = Regex::new("^\\$BLOBDIR/[[:xdigit:]]{16}/.eml$").unwrap(); + assert!(re.is_match(mime_message.parts[0].param.get(Param::File).unwrap())); assert_eq!(mime_message.parts[0].org_filename, Some(".eml".to_string())); diff --git a/src/receive_imf/tests.rs b/src/receive_imf/tests.rs index ee3cd317b..f6f105715 100644 --- a/src/receive_imf/tests.rs +++ b/src/receive_imf/tests.rs @@ -1671,8 +1671,8 @@ async fn test_pdf_filename_simple() { assert_eq!(msg.viewtype, Viewtype::File); assert_eq!(msg.text, "mail body"); let file_path = msg.param.get(Param::File).unwrap(); - assert!(file_path.starts_with("$BLOBDIR/simple")); - assert!(file_path.ends_with(".pdf")); + assert!(file_path.starts_with("$BLOBDIR/")); + assert!(file_path.ends_with("/simple.pdf")); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -1687,8 +1687,8 @@ async fn test_pdf_filename_continuation() { assert_eq!(msg.viewtype, Viewtype::File); assert_eq!(msg.text, "mail body"); let file_path = msg.param.get(Param::File).unwrap(); - assert!(file_path.starts_with("$BLOBDIR/test pdf äöüß")); - assert!(file_path.ends_with(".pdf")); + assert!(file_path.starts_with("$BLOBDIR/")); + assert!(file_path.ends_with("/test pdf äöüß.pdf")); } /// HTML-images may come with many embedded images, eg. tiny icons, corners for formatting, diff --git a/src/sql.rs b/src/sql.rs index 0356f851c..fbb876be8 100644 --- a/src/sql.rs +++ b/src/sql.rs @@ -849,8 +849,16 @@ pub async fn remove_unused_files(context: &Context) -> Result<()> { info!(context, "{} files in use.", files_in_use.len()); /* go through directories and delete unused files */ let blobdir = context.get_blobdir(); - for p in [&blobdir.join(BLOBS_BACKUP_NAME), blobdir] { - match tokio::fs::read_dir(p).await { + let blobs_backup_dir = blobdir.join(BLOBS_BACKUP_NAME); + let mut dirs = vec![]; + for d in [blobdir, blobs_backup_dir.as_path()] { + dirs.push((d.to_path_buf(), false)); + dirs.push((d.to_path_buf(), true)); + } + while let Some((p, add_dirs)) = dirs.pop() { + let check_files_use = + p == blobdir || (p.parent() == Some(blobdir) && p != blobs_backup_dir); + match tokio::fs::read_dir(&p).await { Ok(mut dir_handle) => { /* avoid deletion of files that are just created to build a message object */ let diff = std::time::Duration::from_secs(60 * 60); @@ -859,10 +867,17 @@ pub async fn remove_unused_files(context: &Context) -> Result<()> { .unwrap_or(SystemTime::UNIX_EPOCH); while let Ok(Some(entry)) = dir_handle.next_entry().await { - let name_f = entry.file_name(); - let name_s = name_f.to_string_lossy(); + let path = entry.path(); + let Ok(path) = path + .strip_prefix(blobdir) + .context("housekeeping") + .log_err(context) + else { + continue; + }; + let name_s = path.to_string_lossy(); - if p == blobdir + if check_files_use && (is_file_in_use(&files_in_use, None, &name_s) || is_file_in_use(&files_in_use, Some(".increation"), &name_s) || is_file_in_use(&files_in_use, Some(".waveform"), &name_s) @@ -873,7 +888,9 @@ pub async fn remove_unused_files(context: &Context) -> Result<()> { if let Ok(stats) = tokio::fs::metadata(entry.path()).await { if stats.is_dir() { - if let Err(e) = tokio::fs::remove_dir(entry.path()).await { + if add_dirs { + dirs.push((entry.path(), false)); + } else if let Err(e) = tokio::fs::remove_dir(entry.path()).await { // The dir could be created not by a user, but by a desktop // environment f.e. So, no warning. info!( @@ -895,7 +912,7 @@ pub async fn remove_unused_files(context: &Context) -> Result<()> { .accessed() .map_or(false, |t| t > keep_files_newer_than); - if p == blobdir + if check_files_use && (recently_created || recently_modified || recently_accessed) { info!( @@ -927,7 +944,7 @@ pub async fn remove_unused_files(context: &Context) -> Result<()> { } } Err(err) => { - if !p.ends_with(BLOBS_BACKUP_NAME) { + if !p.starts_with(&blobs_backup_dir) { warn!( context, "Housekeeping: Cannot read dir {}: {:#}.",