diff --git a/src/blob.rs b/src/blob.rs index 036bc4f3f..5bdcbf447 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -23,6 +23,7 @@ use crate::constants::{self, MediaQuality}; use crate::context::Context; use crate::events::EventType; use crate::log::LogExt; +use crate::tools::sanitize_filename; /// Represents a file in the blob directory. /// @@ -92,7 +93,7 @@ impl<'a> BlobObject<'a> { let mut src_file = fs::File::open(src) .await .with_context(|| format!("failed to open file {}", src.display()))?; - let (stem, ext) = BlobObject::sanitise_name(&src.to_string_lossy()); + let (stem, ext) = BlobObject::sanitize_name_and_split_extension(&src.to_string_lossy()); let (name, mut dst_file) = BlobObject::create_new_file(context, context.get_blobdir(), &stem, &ext).await?; let name_for_err = name.clone(); @@ -159,10 +160,9 @@ impl<'a> BlobObject<'a> { let hash = hash.get(0..31).unwrap_or(hash); let new_file = if let Some(extension) = original_name.extension().filter(|e| e.len() <= 32) { - format!( - "$BLOBDIR/{hash}.{}", - extension.to_string_lossy().to_lowercase() - ) + let extension = extension.to_string_lossy().to_lowercase(); + let extension = sanitize_filename(&extension); + format!("$BLOBDIR/{hash}.{}", extension) } else { format!("$BLOBDIR/{hash}") }; @@ -215,7 +215,7 @@ impl<'a> BlobObject<'a> { /// the file will be copied into the blob directory first. If the /// source file is already in the blobdir it will not be copied /// and only be created if it is a valid blobname, that is no - /// subdirectory is used and [BlobObject::sanitise_name] does not + /// subdirectory is used and [BlobObject::sanitize_name_and_split_extension] does not /// modify the filename. /// /// Paths into the blob directory may be either defined by an absolute path @@ -311,41 +311,14 @@ impl<'a> BlobObject<'a> { } } - /// Create a safe name based on a messy input string. - /// - /// The safe name will be a valid filename on Unix and Windows and - /// not contain any path separators. The input can contain path - /// segments separated by either Unix or Windows path separators, - /// the rightmost non-empty segment will be used as name, - /// sanitised for special characters. - /// - /// The resulting name is returned as a tuple, the first part + /// The name is returned as a tuple, the first part /// being the stem or basename and the second being an extension, /// including the dot. E.g. "foo.txt" is returned as `("foo", /// ".txt")` while "bar" is returned as `("bar", "")`. /// /// The extension part will always be lowercased. - fn sanitise_name(name: &str) -> (String, String) { - let mut name = name; - for part in name.rsplit('/') { - if !part.is_empty() { - name = part; - break; - } - } - for part in name.rsplit('\\') { - if !part.is_empty() { - name = part; - break; - } - } - let opts = sanitize_filename::Options { - truncate: true, - windows: true, - replacement: "", - }; - - let name = sanitize_filename::sanitize_with_options(name, opts); + fn sanitize_name_and_split_extension(name: &str) -> (String, String) { + let name = sanitize_filename(name); // Let's take a tricky filename, // "file.with_lots_of_characters_behind_point_and_double_ending.tar.gz" as an example. // Assume that the extension is 32 chars maximum. diff --git a/src/blob/blob_tests.rs b/src/blob/blob_tests.rs index b9e766354..5c67641da 100644 --- a/src/blob/blob_tests.rs +++ b/src/blob/blob_tests.rs @@ -181,30 +181,33 @@ fn test_is_blob_name() { #[test] fn test_sanitise_name() { - let (stem, ext) = BlobObject::sanitise_name("Я ЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯ.txt"); + let (stem, ext) = BlobObject::sanitize_name_and_split_extension( + "Я ЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯ.txt", + ); assert_eq!(ext, ".txt"); assert!(!stem.is_empty()); // the extensions are kept together as between stem and extension a number may be added - // and `foo.tar.gz` should become `foo-1234.tar.gz` and not `foo.tar-1234.gz` - let (stem, ext) = BlobObject::sanitise_name("wot.tar.gz"); + let (stem, ext) = BlobObject::sanitize_name_and_split_extension("wot.tar.gz"); assert_eq!(stem, "wot"); assert_eq!(ext, ".tar.gz"); - let (stem, ext) = BlobObject::sanitise_name(".foo.bar"); - assert_eq!(stem, ""); + let (stem, ext) = BlobObject::sanitize_name_and_split_extension(".foo.bar"); + assert_eq!(stem, "file"); assert_eq!(ext, ".foo.bar"); - let (stem, ext) = BlobObject::sanitise_name("foo?.bar"); + let (stem, ext) = BlobObject::sanitize_name_and_split_extension("foo?.bar"); assert!(stem.contains("foo")); assert!(!stem.contains('?')); assert_eq!(ext, ".bar"); - let (stem, ext) = BlobObject::sanitise_name("no-extension"); + let (stem, ext) = BlobObject::sanitize_name_and_split_extension("no-extension"); assert_eq!(stem, "no-extension"); assert_eq!(ext, ""); - let (stem, ext) = BlobObject::sanitise_name("path/ignored\\this: is* forbidden?.c"); + let (stem, ext) = + BlobObject::sanitize_name_and_split_extension("path/ignored\\this: is* forbidden?.c"); assert_eq!(ext, ".c"); assert!(!stem.contains("path")); assert!(!stem.contains("ignored")); @@ -216,7 +219,7 @@ fn test_sanitise_name() { assert!(!stem.contains('*')); assert!(!stem.contains('?')); - let (stem, ext) = BlobObject::sanitise_name( + let (stem, ext) = BlobObject::sanitize_name_and_split_extension( "file.with_lots_of_characters_behind_point_and_double_ending.tar.gz", ); assert_eq!( @@ -225,11 +228,11 @@ fn test_sanitise_name() { ); assert_eq!(ext, ".tar.gz"); - let (stem, ext) = BlobObject::sanitise_name("a. tar.tar.gz"); + let (stem, ext) = BlobObject::sanitize_name_and_split_extension("a. tar.tar.gz"); assert_eq!(stem, "a. tar"); assert_eq!(ext, ".tar.gz"); - let (stem, ext) = BlobObject::sanitise_name("Guia_uso_GNB (v0.8).pdf"); + let (stem, ext) = BlobObject::sanitize_name_and_split_extension("Guia_uso_GNB (v0.8).pdf"); assert_eq!(stem, "Guia_uso_GNB (v0.8)"); assert_eq!(ext, ".pdf"); } diff --git a/src/message.rs b/src/message.rs index cfe01b717..0a73a355e 100644 --- a/src/message.rs +++ b/src/message.rs @@ -32,8 +32,8 @@ use crate::reaction::get_msg_reactions; use crate::sql; use crate::summary::Summary; use crate::tools::{ - buf_compress, buf_decompress, get_filebytes, get_filemeta, gm2local_offset, read_file, time, - timestamp_to_str, truncate, + buf_compress, buf_decompress, get_filebytes, get_filemeta, gm2local_offset, read_file, + sanitize_filename, time, timestamp_to_str, truncate, }; /// Message ID, including reserved IDs. @@ -807,12 +807,12 @@ impl Message { /// To get the full path, use [`Self::get_file()`]. pub fn get_filename(&self) -> Option { if let Some(name) = self.param.get(Param::Filename) { - return Some(name.to_string()); + return Some(sanitize_filename(name)); } self.param .get(Param::File) .and_then(|file| Path::new(file).file_name()) - .map(|name| name.to_string_lossy().to_string()) + .map(|name| sanitize_filename(&name.to_string_lossy())) } /// Returns the size of the file in bytes, if applicable. diff --git a/src/message/message_tests.rs b/src/message/message_tests.rs index 5a2118e1a..2f6da4a76 100644 --- a/src/message/message_tests.rs +++ b/src/message/message_tests.rs @@ -755,3 +755,31 @@ async fn test_delete_msgs_offline() -> Result<()> { Ok(()) } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn test_sanitize_filename_message() -> Result<()> { + let t = &TestContext::new().await; + let mut msg = Message::new(Viewtype::File); + + // Even if some of these characters may be valid on one platform, + // they need to be removed in case a backup is transferred to another platform + // and the UI there tries to copy the blob to a file with the original name + // before passing it to an external program. + msg.set_file_from_bytes(t, "/\\:ee.tx*T ", b"hallo", None)?; + assert_eq!(msg.get_filename().unwrap(), "ee.txT"); + + let blob = msg.param.get_blob(Param::File, t).await?.unwrap(); + assert_eq!(blob.suffix().unwrap(), "txt"); + + // The filename shouldn't be empty if there were only illegal characters: + msg.set_file_from_bytes(t, "/\\:.txt", b"hallo", None)?; + assert_eq!(msg.get_filename().unwrap(), "file.txt"); + + msg.set_file_from_bytes(t, "/\\:", b"hallo", None)?; + assert_eq!(msg.get_filename().unwrap(), "file"); + + msg.set_file_from_bytes(t, ".txt", b"hallo", None)?; + assert_eq!(msg.get_filename().unwrap(), "file.txt"); + + Ok(()) +} diff --git a/src/receive_imf/receive_imf_tests.rs b/src/receive_imf/receive_imf_tests.rs index 409ea9937..59a07f6fe 100644 --- a/src/receive_imf/receive_imf_tests.rs +++ b/src/receive_imf/receive_imf_tests.rs @@ -5582,3 +5582,38 @@ async fn test_two_group_securejoins() -> Result<()> { Ok(()) } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn test_sanitize_filename_in_received() -> Result<()> { + let alice = &TestContext::new_alice().await; + let raw = b"Message-ID: Mr.XA6y3og8-az.WGbH9_dNcQx@testr +To: +From: \"=?utf-8?q??=\" +Content-Type: multipart/mixed; boundary=\"mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z\" + + +--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z +Content-Type: text/plain; charset=utf-8 + +-- +Sent with my Delta Chat Messenger: https://delta.chat + +--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z +Content-Type: text/html +Content-Disposition: attachment; filename=\"te\xE2\x80\xACst/../../test.H|TML\xE2\x80\xAC \" +Content-Transfer-Encoding: base64 + +PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh + +--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z--"; + + let msg = receive_imf(alice, raw, false).await?.unwrap(); + let msg = Message::load_from_db(alice, msg.msg_ids[0]).await?; + + assert_eq!(msg.get_filename().unwrap(), "test.HTML"); + + let blob = msg.param.get_blob(Param::File, alice).await?.unwrap(); + assert_eq!(blob.suffix().unwrap(), "html"); + + Ok(()) +} diff --git a/src/tools.rs b/src/tools.rs index 9cb6d05d2..db70e9e0b 100644 --- a/src/tools.rs +++ b/src/tools.rs @@ -366,6 +366,41 @@ pub(crate) async fn delete_file(context: &Context, path: impl AsRef) -> Re Ok(()) } +/// Create a safe name based on a messy input string. +/// +/// The safe name will be a valid filename on Unix and Windows and +/// not contain any path separators. The input can contain path +/// segments separated by either Unix or Windows path separators, +/// the rightmost non-empty segment will be used as name, +/// sanitised for special characters. +pub(crate) fn sanitize_filename(mut name: &str) -> String { + for part in name.rsplit('/') { + if !part.is_empty() { + name = part; + break; + } + } + for part in name.rsplit('\\') { + if !part.is_empty() { + name = part; + break; + } + } + + let opts = sanitize_filename::Options { + truncate: true, + windows: true, + replacement: "", + }; + let name = sanitize_filename::sanitize_with_options(name, opts); + + if name.starts_with('.') || name.is_empty() { + format!("file{name}") + } else { + name + } +} + /// A guard which will remove the path when dropped. /// /// It implements [`Deref`] so it can be used as a `&Path`.