fix: get_filename() is now guaranteed to return a valid filename (#6537)

With iOS and Desktop copying the file to a to a temp file with the name
of `get_filename()`, it should be sanitized first.

The PR can be reviewed commit-by-commit or all at once.
This commit is contained in:
Hocuri
2025-02-13 12:26:23 +01:00
committed by GitHub
parent 81e9628ab7
commit 9b6ef5e54f
6 changed files with 124 additions and 50 deletions

View File

@@ -23,6 +23,7 @@ use crate::constants::{self, MediaQuality};
use crate::context::Context; use crate::context::Context;
use crate::events::EventType; use crate::events::EventType;
use crate::log::LogExt; use crate::log::LogExt;
use crate::tools::sanitize_filename;
/// Represents a file in the blob directory. /// Represents a file in the blob directory.
/// ///
@@ -92,7 +93,7 @@ impl<'a> BlobObject<'a> {
let mut src_file = fs::File::open(src) let mut src_file = fs::File::open(src)
.await .await
.with_context(|| format!("failed to open file {}", src.display()))?; .with_context(|| format!("failed to open file {}", src.display()))?;
let (stem, ext) = BlobObject::sanitise_name(&src.to_string_lossy()); let (stem, ext) = BlobObject::sanitize_name_and_split_extension(&src.to_string_lossy());
let (name, mut dst_file) = let (name, mut dst_file) =
BlobObject::create_new_file(context, context.get_blobdir(), &stem, &ext).await?; BlobObject::create_new_file(context, context.get_blobdir(), &stem, &ext).await?;
let name_for_err = name.clone(); let name_for_err = name.clone();
@@ -159,10 +160,9 @@ impl<'a> BlobObject<'a> {
let hash = hash.get(0..31).unwrap_or(hash); let hash = hash.get(0..31).unwrap_or(hash);
let new_file = let new_file =
if let Some(extension) = original_name.extension().filter(|e| e.len() <= 32) { if let Some(extension) = original_name.extension().filter(|e| e.len() <= 32) {
format!( let extension = extension.to_string_lossy().to_lowercase();
"$BLOBDIR/{hash}.{}", let extension = sanitize_filename(&extension);
extension.to_string_lossy().to_lowercase() format!("$BLOBDIR/{hash}.{}", extension)
)
} else { } else {
format!("$BLOBDIR/{hash}") format!("$BLOBDIR/{hash}")
}; };
@@ -215,7 +215,7 @@ impl<'a> BlobObject<'a> {
/// the file will be copied into the blob directory first. If the /// the file will be copied into the blob directory first. If the
/// source file is already in the blobdir it will not be copied /// source file is already in the blobdir it will not be copied
/// and only be created if it is a valid blobname, that is no /// and only be created if it is a valid blobname, that is no
/// subdirectory is used and [BlobObject::sanitise_name] does not /// subdirectory is used and [BlobObject::sanitize_name_and_split_extension] does not
/// modify the filename. /// modify the filename.
/// ///
/// Paths into the blob directory may be either defined by an absolute path /// Paths into the blob directory may be either defined by an absolute path
@@ -311,41 +311,14 @@ impl<'a> BlobObject<'a> {
} }
} }
/// Create a safe name based on a messy input string. /// The name is returned as a tuple, the first part
///
/// The safe name will be a valid filename on Unix and Windows and
/// not contain any path separators. The input can contain path
/// segments separated by either Unix or Windows path separators,
/// the rightmost non-empty segment will be used as name,
/// sanitised for special characters.
///
/// The resulting name is returned as a tuple, the first part
/// being the stem or basename and the second being an extension, /// being the stem or basename and the second being an extension,
/// including the dot. E.g. "foo.txt" is returned as `("foo", /// including the dot. E.g. "foo.txt" is returned as `("foo",
/// ".txt")` while "bar" is returned as `("bar", "")`. /// ".txt")` while "bar" is returned as `("bar", "")`.
/// ///
/// The extension part will always be lowercased. /// The extension part will always be lowercased.
fn sanitise_name(name: &str) -> (String, String) { fn sanitize_name_and_split_extension(name: &str) -> (String, String) {
let mut name = name; let name = sanitize_filename(name);
for part in name.rsplit('/') {
if !part.is_empty() {
name = part;
break;
}
}
for part in name.rsplit('\\') {
if !part.is_empty() {
name = part;
break;
}
}
let opts = sanitize_filename::Options {
truncate: true,
windows: true,
replacement: "",
};
let name = sanitize_filename::sanitize_with_options(name, opts);
// Let's take a tricky filename, // Let's take a tricky filename,
// "file.with_lots_of_characters_behind_point_and_double_ending.tar.gz" as an example. // "file.with_lots_of_characters_behind_point_and_double_ending.tar.gz" as an example.
// Assume that the extension is 32 chars maximum. // Assume that the extension is 32 chars maximum.

View File

@@ -181,30 +181,33 @@ fn test_is_blob_name() {
#[test] #[test]
fn test_sanitise_name() { fn test_sanitise_name() {
let (stem, ext) = BlobObject::sanitise_name("Я ЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯ.txt"); let (stem, ext) = BlobObject::sanitize_name_and_split_extension(
"Я ЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯ.txt",
);
assert_eq!(ext, ".txt"); assert_eq!(ext, ".txt");
assert!(!stem.is_empty()); assert!(!stem.is_empty());
// the extensions are kept together as between stem and extension a number may be added - // the extensions are kept together as between stem and extension a number may be added -
// and `foo.tar.gz` should become `foo-1234.tar.gz` and not `foo.tar-1234.gz` // and `foo.tar.gz` should become `foo-1234.tar.gz` and not `foo.tar-1234.gz`
let (stem, ext) = BlobObject::sanitise_name("wot.tar.gz"); let (stem, ext) = BlobObject::sanitize_name_and_split_extension("wot.tar.gz");
assert_eq!(stem, "wot"); assert_eq!(stem, "wot");
assert_eq!(ext, ".tar.gz"); assert_eq!(ext, ".tar.gz");
let (stem, ext) = BlobObject::sanitise_name(".foo.bar"); let (stem, ext) = BlobObject::sanitize_name_and_split_extension(".foo.bar");
assert_eq!(stem, ""); assert_eq!(stem, "file");
assert_eq!(ext, ".foo.bar"); assert_eq!(ext, ".foo.bar");
let (stem, ext) = BlobObject::sanitise_name("foo?.bar"); let (stem, ext) = BlobObject::sanitize_name_and_split_extension("foo?.bar");
assert!(stem.contains("foo")); assert!(stem.contains("foo"));
assert!(!stem.contains('?')); assert!(!stem.contains('?'));
assert_eq!(ext, ".bar"); assert_eq!(ext, ".bar");
let (stem, ext) = BlobObject::sanitise_name("no-extension"); let (stem, ext) = BlobObject::sanitize_name_and_split_extension("no-extension");
assert_eq!(stem, "no-extension"); assert_eq!(stem, "no-extension");
assert_eq!(ext, ""); assert_eq!(ext, "");
let (stem, ext) = BlobObject::sanitise_name("path/ignored\\this: is* forbidden?.c"); let (stem, ext) =
BlobObject::sanitize_name_and_split_extension("path/ignored\\this: is* forbidden?.c");
assert_eq!(ext, ".c"); assert_eq!(ext, ".c");
assert!(!stem.contains("path")); assert!(!stem.contains("path"));
assert!(!stem.contains("ignored")); assert!(!stem.contains("ignored"));
@@ -216,7 +219,7 @@ fn test_sanitise_name() {
assert!(!stem.contains('*')); assert!(!stem.contains('*'));
assert!(!stem.contains('?')); assert!(!stem.contains('?'));
let (stem, ext) = BlobObject::sanitise_name( let (stem, ext) = BlobObject::sanitize_name_and_split_extension(
"file.with_lots_of_characters_behind_point_and_double_ending.tar.gz", "file.with_lots_of_characters_behind_point_and_double_ending.tar.gz",
); );
assert_eq!( assert_eq!(
@@ -225,11 +228,11 @@ fn test_sanitise_name() {
); );
assert_eq!(ext, ".tar.gz"); assert_eq!(ext, ".tar.gz");
let (stem, ext) = BlobObject::sanitise_name("a. tar.tar.gz"); let (stem, ext) = BlobObject::sanitize_name_and_split_extension("a. tar.tar.gz");
assert_eq!(stem, "a. tar"); assert_eq!(stem, "a. tar");
assert_eq!(ext, ".tar.gz"); assert_eq!(ext, ".tar.gz");
let (stem, ext) = BlobObject::sanitise_name("Guia_uso_GNB (v0.8).pdf"); let (stem, ext) = BlobObject::sanitize_name_and_split_extension("Guia_uso_GNB (v0.8).pdf");
assert_eq!(stem, "Guia_uso_GNB (v0.8)"); assert_eq!(stem, "Guia_uso_GNB (v0.8)");
assert_eq!(ext, ".pdf"); assert_eq!(ext, ".pdf");
} }

View File

@@ -32,8 +32,8 @@ use crate::reaction::get_msg_reactions;
use crate::sql; use crate::sql;
use crate::summary::Summary; use crate::summary::Summary;
use crate::tools::{ use crate::tools::{
buf_compress, buf_decompress, get_filebytes, get_filemeta, gm2local_offset, read_file, time, buf_compress, buf_decompress, get_filebytes, get_filemeta, gm2local_offset, read_file,
timestamp_to_str, truncate, sanitize_filename, time, timestamp_to_str, truncate,
}; };
/// Message ID, including reserved IDs. /// Message ID, including reserved IDs.
@@ -807,12 +807,12 @@ impl Message {
/// To get the full path, use [`Self::get_file()`]. /// To get the full path, use [`Self::get_file()`].
pub fn get_filename(&self) -> Option<String> { pub fn get_filename(&self) -> Option<String> {
if let Some(name) = self.param.get(Param::Filename) { if let Some(name) = self.param.get(Param::Filename) {
return Some(name.to_string()); return Some(sanitize_filename(name));
} }
self.param self.param
.get(Param::File) .get(Param::File)
.and_then(|file| Path::new(file).file_name()) .and_then(|file| Path::new(file).file_name())
.map(|name| name.to_string_lossy().to_string()) .map(|name| sanitize_filename(&name.to_string_lossy()))
} }
/// Returns the size of the file in bytes, if applicable. /// Returns the size of the file in bytes, if applicable.

View File

@@ -755,3 +755,31 @@ async fn test_delete_msgs_offline() -> Result<()> {
Ok(()) Ok(())
} }
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_sanitize_filename_message() -> Result<()> {
let t = &TestContext::new().await;
let mut msg = Message::new(Viewtype::File);
// Even if some of these characters may be valid on one platform,
// they need to be removed in case a backup is transferred to another platform
// and the UI there tries to copy the blob to a file with the original name
// before passing it to an external program.
msg.set_file_from_bytes(t, "/\\:ee.tx*T ", b"hallo", None)?;
assert_eq!(msg.get_filename().unwrap(), "ee.txT");
let blob = msg.param.get_blob(Param::File, t).await?.unwrap();
assert_eq!(blob.suffix().unwrap(), "txt");
// The filename shouldn't be empty if there were only illegal characters:
msg.set_file_from_bytes(t, "/\\:.txt", b"hallo", None)?;
assert_eq!(msg.get_filename().unwrap(), "file.txt");
msg.set_file_from_bytes(t, "/\\:", b"hallo", None)?;
assert_eq!(msg.get_filename().unwrap(), "file");
msg.set_file_from_bytes(t, ".txt", b"hallo", None)?;
assert_eq!(msg.get_filename().unwrap(), "file.txt");
Ok(())
}

View File

@@ -5582,3 +5582,38 @@ async fn test_two_group_securejoins() -> Result<()> {
Ok(()) Ok(())
} }
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_sanitize_filename_in_received() -> Result<()> {
let alice = &TestContext::new_alice().await;
let raw = b"Message-ID: Mr.XA6y3og8-az.WGbH9_dNcQx@testr
To: <tmp_5890965001269692@testrun.org>
From: \"=?utf-8?q??=\" <tmp_6272287793210918@testrun.org>
Content-Type: multipart/mixed; boundary=\"mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z\"
--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z
Content-Type: text/plain; charset=utf-8
--
Sent with my Delta Chat Messenger: https://delta.chat
--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z
Content-Type: text/html
Content-Disposition: attachment; filename=\"te\xE2\x80\xACst/../../test.H|TML\xE2\x80\xAC \"
Content-Transfer-Encoding: base64
PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh
--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z--";
let msg = receive_imf(alice, raw, false).await?.unwrap();
let msg = Message::load_from_db(alice, msg.msg_ids[0]).await?;
assert_eq!(msg.get_filename().unwrap(), "test.HTML");
let blob = msg.param.get_blob(Param::File, alice).await?.unwrap();
assert_eq!(blob.suffix().unwrap(), "html");
Ok(())
}

View File

@@ -366,6 +366,41 @@ pub(crate) async fn delete_file(context: &Context, path: impl AsRef<Path>) -> Re
Ok(()) Ok(())
} }
/// Create a safe name based on a messy input string.
///
/// The safe name will be a valid filename on Unix and Windows and
/// not contain any path separators. The input can contain path
/// segments separated by either Unix or Windows path separators,
/// the rightmost non-empty segment will be used as name,
/// sanitised for special characters.
pub(crate) fn sanitize_filename(mut name: &str) -> String {
for part in name.rsplit('/') {
if !part.is_empty() {
name = part;
break;
}
}
for part in name.rsplit('\\') {
if !part.is_empty() {
name = part;
break;
}
}
let opts = sanitize_filename::Options {
truncate: true,
windows: true,
replacement: "",
};
let name = sanitize_filename::sanitize_with_options(name, opts);
if name.starts_with('.') || name.is_empty() {
format!("file{name}")
} else {
name
}
}
/// A guard which will remove the path when dropped. /// A guard which will remove the path when dropped.
/// ///
/// It implements [`Deref`] so it can be used as a `&Path`. /// It implements [`Deref`] so it can be used as a `&Path`.