From ed33f30a60a1605c4d5934a1df68d57f413faad9 Mon Sep 17 00:00:00 2001 From: iequidoo Date: Tue, 25 Jul 2023 20:37:48 -0300 Subject: [PATCH] feat: Use random filename suffixes for blobstorage (#4309) Recently there was an accident with a chatbot that replaced its avatar set from the command line with an unrelated avatar of a contact. Both the `selfavatar` setting and the contact avatar `i` param pointed to `$BLOBDIR/avatar.png` at the time it was detected. How this happened is unclear, but it is possible that `avatar.png` was removed, unmounted or otherwise not detected by the core, and the core stored avatar received from the contact as `avatar.png`, while `selfavatar` config still pointed to `$BLOBDIR/avatar.png`. Such bugs are unavoidable even if the core itself has no bugs as we cannot rely on blobdir not reside on the faulty network filesystem, being incorrectly backed up and restored etc., so we should assume that files may be randomly removed. Then there may be dangling `$BLOBDIR/...` references in the database which may accidentally point to unrelated files, could even be an `avatar.png` file sent to the bot in private. To prevent such bugs, we add random filename suffixes for the blobdir objects. Thanks to the added Param::Filename these random suffixes aren't sent over the network. --- python/src/deltachat/message.py | 3 +- python/tests/test_3_offline.py | 17 +++-- src/blob.rs | 109 ++++++++++++++++++-------------- src/chat.rs | 7 +- src/constants.rs | 3 + src/mimeparser.rs | 7 +- 6 files changed, 84 insertions(+), 62 deletions(-) diff --git a/python/src/deltachat/message.py b/python/src/deltachat/message.py index 9ffd980c1..491a6520d 100644 --- a/python/src/deltachat/message.py +++ b/python/src/deltachat/message.py @@ -108,7 +108,7 @@ class Message: @props.with_doc def filename(self): - """filename if there was an attachment, otherwise empty string.""" + """file path if there was an attachment, otherwise empty string.""" return from_dc_charpointer(lib.dc_msg_get_file(self._dc_msg)) def set_file(self, path, mime_type=None): @@ -121,7 +121,6 @@ class Message: @props.with_doc def basename(self) -> str: """basename of the attachment if it exists, otherwise empty string.""" - # FIXME, it does not return basename return from_dc_charpointer(lib.dc_msg_get_filename(self._dc_msg)) @props.with_doc diff --git a/python/tests/test_3_offline.py b/python/tests/test_3_offline.py index 20a64e6e8..74f3fbf62 100644 --- a/python/tests/test_3_offline.py +++ b/python/tests/test_3_offline.py @@ -444,27 +444,30 @@ class TestOfflineChat: assert msg.filemime == "image/png" @pytest.mark.parametrize( - ("fn", "typein", "typeout"), + ("stem", "ext", "typein", "typeout"), [ - ("r", None, "application/octet-stream"), - ("r.txt", None, "text/plain"), - ("r.txt", "text/plain", "text/plain"), - ("r.txt", "image/png", "image/png"), + ("r", "", None, "application/octet-stream"), + ("r", ".txt", None, "text/plain"), + ("r", ".txt", "text/plain", "text/plain"), + ("r", ".txt", "image/png", "image/png"), ], ) - def test_message_file(self, chat1, data, lp, fn, typein, typeout): + def test_message_file(self, chat1, data, lp, stem, ext, typein, typeout): lp.sec("sending file") + fn = stem + ext fp = data.get_path(fn) msg = chat1.send_file(fp, typein) assert msg assert msg.id > 0 assert msg.is_file() assert os.path.exists(msg.filename) - assert msg.filename.endswith(msg.basename) + assert msg.filename.endswith(ext) + assert msg.basename == fn assert msg.filemime == typeout msg2 = chat1.send_file(fp, typein) assert msg2 != msg assert msg2.filename != msg.filename + assert msg2.basename == fn def test_create_contact(self, acfactory): ac1 = acfactory.get_pseudo_configured_account() diff --git a/src/blob.rs b/src/blob.rs index dcc157ada..0c7a61cee 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -19,7 +19,7 @@ use tokio::{fs, io}; use tokio_stream::wrappers::ReadDirStream; use crate::config::Config; -use crate::constants::{self, MediaQuality}; +use crate::constants::{self, MediaQuality, BLOB_CREATE_ATTEMPTS}; use crate::context::Context; use crate::events::EventType; use crate::log::LogExt; @@ -80,10 +80,9 @@ impl<'a> BlobObject<'a> { stem: &str, ext: &str, ) -> Result<(String, fs::File)> { - const MAX_ATTEMPT: u32 = 16; let mut attempt = 0; - let mut name = format!("{stem}{ext}"); loop { + let name = format!("{}-{:016x}{}", stem, rand::random::(), ext); attempt += 1; let path = dir.join(&name); match fs::OpenOptions::new() @@ -94,12 +93,10 @@ impl<'a> BlobObject<'a> { { Ok(file) => return Ok((name, file)), Err(err) => { - if attempt >= MAX_ATTEMPT { + if attempt >= BLOB_CREATE_ATTEMPTS { return Err(err).context("failed to create file"); } else if attempt == 1 && !dir.exists() { fs::create_dir_all(dir).await.log_err(context).ok(); - } else { - name = format!("{}-{}{}", stem, rand::random::(), ext); } } } @@ -743,6 +740,7 @@ fn add_white_bg(img: &mut DynamicImage) { #[cfg(test)] mod tests { use fs::File; + use regex::Regex; use super::*; use crate::chat::{self, create_group_chat, ProtectionStatus}; @@ -762,32 +760,43 @@ mod tests { async fn test_create() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo", b"hello").await.unwrap(); - let fname = t.get_blobdir().join("foo"); + let re = Regex::new("^foo-[[:xdigit:]]{16}$").unwrap(); + assert!(re.is_match(blob.as_file_name())); + let fname = t.get_blobdir().join(blob.as_file_name()); let data = fs::read(fname).await.unwrap(); assert_eq!(data, b"hello"); - assert_eq!(blob.as_name(), "$BLOBDIR/foo"); - assert_eq!(blob.to_abs_path(), t.get_blobdir().join("foo")); + assert_eq!( + blob.as_name(), + "$BLOBDIR/".to_string() + blob.as_file_name() + ); + assert_eq!( + blob.to_abs_path(), + t.get_blobdir().join(blob.as_file_name()) + ); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_lowercase_ext() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo.TXT", b"hello").await.unwrap(); - assert_eq!(blob.as_name(), "$BLOBDIR/foo.txt"); + let re = Regex::new("^\\$BLOBDIR/foo-[[:xdigit:]]{16}.txt$").unwrap(); + assert!(re.is_match(blob.as_name())); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_as_file_name() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); - assert_eq!(blob.as_file_name(), "foo.txt"); + let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap(); + assert!(re.is_match(blob.as_file_name())); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_as_rel_path() { let t = TestContext::new().await; let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); - assert_eq!(blob.as_rel_path(), Path::new("foo.txt")); + let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap(); + assert!(re.is_match(blob.as_rel_path().to_str().unwrap())); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -802,30 +811,30 @@ mod tests { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_create_dup() { let t = TestContext::new().await; - BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); - let foo_path = t.get_blobdir().join("foo.txt"); + let re = Regex::new("^foo-[[:xdigit:]]{16}.txt$").unwrap(); + + let blob = BlobObject::create(&t, "foo.txt", b"hello").await.unwrap(); + assert!(re.is_match(blob.as_rel_path().to_str().unwrap())); + let foo_path = t.get_blobdir().join(blob.as_file_name()); assert!(foo_path.exists()); - BlobObject::create(&t, "foo.txt", b"world").await.unwrap(); - let mut dir = fs::read_dir(t.get_blobdir()).await.unwrap(); - while let Ok(Some(dirent)) = dir.next_entry().await { - let fname = dirent.file_name(); - if fname == foo_path.file_name().unwrap() { - assert_eq!(fs::read(&foo_path).await.unwrap(), b"hello"); - } else { - let name = fname.to_str().unwrap(); - assert!(name.starts_with("foo")); - assert!(name.ends_with(".txt")); - } - } + + let blob = BlobObject::create(&t, "foo.txt", b"world").await.unwrap(); + assert!(re.is_match(blob.as_rel_path().to_str().unwrap())); + let foo_path2 = t.get_blobdir().join(blob.as_file_name()); + assert!(foo_path2.exists()); + + assert!(foo_path != foo_path2); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_double_ext_preserved() { let t = TestContext::new().await; - BlobObject::create(&t, "foo.tar.gz", b"hello") + let blob = BlobObject::create(&t, "foo.tar.gz", b"hello") .await .unwrap(); - let foo_path = t.get_blobdir().join("foo.tar.gz"); + let re = Regex::new("^foo-[[:xdigit:]]{16}.tar.gz$").unwrap(); + assert!(re.is_match(blob.as_file_name())); + let foo_path = t.get_blobdir().join(blob.as_file_name()); assert!(foo_path.exists()); BlobObject::create(&t, "foo.tar.gz", b"world") .await @@ -859,7 +868,8 @@ mod tests { let src = t.dir.path().join("src"); fs::write(&src, b"boo").await.unwrap(); let blob = BlobObject::create_and_copy(&t, src.as_ref()).await.unwrap(); - assert_eq!(blob.as_name(), "$BLOBDIR/src"); + let re = Regex::new("^\\$BLOBDIR/src-[[:xdigit:]]{16}$").unwrap(); + assert!(re.is_match(blob.as_name())); let data = fs::read(blob.to_abs_path()).await.unwrap(); assert_eq!(data, b"boo"); @@ -880,7 +890,8 @@ mod tests { let blob = BlobObject::new_from_path(&t, src_ext.as_ref()) .await .unwrap(); - assert_eq!(blob.as_name(), "$BLOBDIR/external"); + let re = Regex::new("^\\$BLOBDIR/external-[[:xdigit:]]{16}$").unwrap(); + assert!(re.is_match(blob.as_name())); let data = fs::read(blob.to_abs_path()).await.unwrap(); assert_eq!(data, b"boo"); @@ -891,6 +902,7 @@ mod tests { let data = fs::read(blob.to_abs_path()).await.unwrap(); assert_eq!(data, b"boo"); } + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test_create_from_name_long() { let t = TestContext::new().await; @@ -899,10 +911,10 @@ mod tests { let blob = BlobObject::new_from_path(&t, src_ext.as_ref()) .await .unwrap(); - assert_eq!( - blob.as_name(), - "$BLOBDIR/autocrypt-setup-message-4137848473.html" - ); + let re = + Regex::new("^\\$BLOBDIR/autocrypt-setup-message-4137848473-[[:xdigit:]]{16}.html$") + .unwrap(); + assert!(re.is_match(blob.as_name())); } #[test] @@ -994,19 +1006,21 @@ mod tests { let avatar_src = t.dir.path().join("avatar.jpg"); let avatar_bytes = include_bytes!("../test-data/image/avatar1000x1000.jpg"); fs::write(&avatar_src, avatar_bytes).await.unwrap(); - let avatar_blob = t.get_blobdir().join("avatar.jpg"); - assert!(!avatar_blob.exists()); t.set_config(Config::Selfavatar, Some(avatar_src.to_str().unwrap())) .await .unwrap(); + let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap(); + let blobdir = t.get_blobdir().to_str().unwrap(); + assert!(avatar_blob.starts_with(blobdir)); + let re = Regex::new("avatar-[[:xdigit:]]{16}.jpg$").unwrap(); + assert!(re.is_match(&avatar_blob)); + let avatar_blob = Path::new(&avatar_blob); assert!(avatar_blob.exists()); assert!(fs::metadata(&avatar_blob).await.unwrap().len() < avatar_bytes.len() as u64); - let avatar_cfg = t.get_config(Config::Selfavatar).await.unwrap(); - assert_eq!(avatar_cfg, avatar_blob.to_str().map(|s| s.to_string())); check_image_size(avatar_src, 1000, 1000); check_image_size( - &avatar_blob, + avatar_blob, constants::BALANCED_AVATAR_SIZE, constants::BALANCED_AVATAR_SIZE, ); @@ -1016,7 +1030,7 @@ mod tests { file.metadata().await.unwrap().len() } - let mut blob = BlobObject::new_from_path(&t, &avatar_blob).await.unwrap(); + let mut blob = BlobObject::new_from_path(&t, avatar_blob).await.unwrap(); let maybe_sticker = &mut false; let strict_limits = true; blob.recode_to_size( @@ -1028,8 +1042,8 @@ mod tests { strict_limits, ) .unwrap(); - assert!(file_size(&avatar_blob).await <= 3000); - assert!(file_size(&avatar_blob).await > 2000); + assert!(file_size(avatar_blob).await <= 3000); + assert!(file_size(avatar_blob).await > 2000); tokio::task::block_in_place(move || { let img = image::open(avatar_blob).unwrap(); assert!(img.width() > 130); @@ -1069,18 +1083,19 @@ mod tests { let avatar_src = t.dir.path().join("avatar.png"); let avatar_bytes = include_bytes!("../test-data/image/avatar64x64.png"); fs::write(&avatar_src, avatar_bytes).await.unwrap(); - let avatar_blob = t.get_blobdir().join("avatar.png"); - assert!(!avatar_blob.exists()); t.set_config(Config::Selfavatar, Some(avatar_src.to_str().unwrap())) .await .unwrap(); - assert!(avatar_blob.exists()); + let avatar_blob = t.get_config(Config::Selfavatar).await.unwrap().unwrap(); + let blobdir = t.get_blobdir().to_str().unwrap(); + assert!(avatar_blob.starts_with(blobdir)); + let re = Regex::new("avatar-[[:xdigit:]]{16}.png$").unwrap(); + assert!(re.is_match(&avatar_blob)); + assert!(Path::new(&avatar_blob).exists()); assert_eq!( fs::metadata(&avatar_blob).await.unwrap().len(), avatar_bytes.len() as u64 ); - let avatar_cfg = t.get_config(Config::Selfavatar).await.unwrap(); - assert_eq!(avatar_cfg, avatar_blob.to_str().map(|s| s.to_string())); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] diff --git a/src/chat.rs b/src/chat.rs index f6a7a2b8b..961e6ae1a 100644 --- a/src/chat.rs +++ b/src/chat.rs @@ -4520,6 +4520,7 @@ mod tests { use crate::message::delete_msgs; use crate::receive_imf::receive_imf; use crate::test_utils::{sync, TestContext, TestContextManager}; + use regex::Regex; use strum::IntoEnumIterator; use tokio::fs; @@ -7078,9 +7079,11 @@ mod tests { // the file bob receives should not contain BIDI-control characters assert_eq!( - Some("$BLOBDIR/harmless_file.txt.exe"), - msg.param.get(Param::File), + msg.param.get(Param::Filename).unwrap(), + "harmless_file.txt.exe" ); + let re = Regex::new("^\\$BLOBDIR/harmless_file-[[:xdigit:]]{16}.txt.exe$").unwrap(); + assert!(re.is_match(msg.param.get(Param::File).unwrap())); Ok(()) } diff --git a/src/constants.rs b/src/constants.rs index ce30520b7..a53f81dc8 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -211,6 +211,9 @@ pub(crate) const DC_FOLDERS_CONFIGURED_KEY: &str = "folders_configured"; // this value can be increased if the folder configuration is changed and must be redone on next program start pub(crate) const DC_FOLDERS_CONFIGURED_VERSION: i32 = 4; +// Maximum attemps to create a blob file. +pub(crate) const BLOB_CREATE_ATTEMPTS: u32 = 2; + // If more recipients are needed in SMTP's `RCPT TO:` header, the recipient list is split into // chunks. This does not affect MIME's `To:` header. Can be overwritten by setting // `max_smtp_rcpt_to` in the provider db. diff --git a/src/mimeparser.rs b/src/mimeparser.rs index ee88130d5..ad0c0fe29 100644 --- a/src/mimeparser.rs +++ b/src/mimeparser.rs @@ -2254,6 +2254,7 @@ mod tests { #![allow(clippy::indexing_slicing)] use mailparse::ParsedMail; + use regex::Regex; use super::*; use crate::{ @@ -3800,10 +3801,8 @@ Message. mime_message.parts[0].msg, "this is a classic email – I attached the .EML file".to_string() ); - assert_eq!( - mime_message.parts[0].param.get(Param::File), - Some("$BLOBDIR/.eml") - ); + let re = Regex::new("^\\$BLOBDIR/-[[:xdigit:]]{16}.eml$").unwrap(); + assert!(re.is_match(mime_message.parts[0].param.get(Param::File).unwrap())); assert_eq!(mime_message.parts[0].org_filename, Some(".eml".to_string()));