fix: get_filename() is now guaranteed to return a valid filename (#6537)

With iOS and Desktop copying the file to a to a temp file with the name
of `get_filename()`, it should be sanitized first.

The PR can be reviewed commit-by-commit or all at once.
This commit is contained in:
Hocuri
2025-02-13 12:26:23 +01:00
committed by GitHub
parent 81e9628ab7
commit 9b6ef5e54f
6 changed files with 124 additions and 50 deletions

View File

@@ -23,6 +23,7 @@ use crate::constants::{self, MediaQuality};
use crate::context::Context;
use crate::events::EventType;
use crate::log::LogExt;
use crate::tools::sanitize_filename;
/// Represents a file in the blob directory.
///
@@ -92,7 +93,7 @@ impl<'a> BlobObject<'a> {
let mut src_file = fs::File::open(src)
.await
.with_context(|| format!("failed to open file {}", src.display()))?;
let (stem, ext) = BlobObject::sanitise_name(&src.to_string_lossy());
let (stem, ext) = BlobObject::sanitize_name_and_split_extension(&src.to_string_lossy());
let (name, mut dst_file) =
BlobObject::create_new_file(context, context.get_blobdir(), &stem, &ext).await?;
let name_for_err = name.clone();
@@ -159,10 +160,9 @@ impl<'a> BlobObject<'a> {
let hash = hash.get(0..31).unwrap_or(hash);
let new_file =
if let Some(extension) = original_name.extension().filter(|e| e.len() <= 32) {
format!(
"$BLOBDIR/{hash}.{}",
extension.to_string_lossy().to_lowercase()
)
let extension = extension.to_string_lossy().to_lowercase();
let extension = sanitize_filename(&extension);
format!("$BLOBDIR/{hash}.{}", extension)
} else {
format!("$BLOBDIR/{hash}")
};
@@ -215,7 +215,7 @@ impl<'a> BlobObject<'a> {
/// the file will be copied into the blob directory first. If the
/// source file is already in the blobdir it will not be copied
/// and only be created if it is a valid blobname, that is no
/// subdirectory is used and [BlobObject::sanitise_name] does not
/// subdirectory is used and [BlobObject::sanitize_name_and_split_extension] does not
/// modify the filename.
///
/// Paths into the blob directory may be either defined by an absolute path
@@ -311,41 +311,14 @@ impl<'a> BlobObject<'a> {
}
}
/// Create a safe name based on a messy input string.
///
/// The safe name will be a valid filename on Unix and Windows and
/// not contain any path separators. The input can contain path
/// segments separated by either Unix or Windows path separators,
/// the rightmost non-empty segment will be used as name,
/// sanitised for special characters.
///
/// The resulting name is returned as a tuple, the first part
/// The name is returned as a tuple, the first part
/// being the stem or basename and the second being an extension,
/// including the dot. E.g. "foo.txt" is returned as `("foo",
/// ".txt")` while "bar" is returned as `("bar", "")`.
///
/// The extension part will always be lowercased.
fn sanitise_name(name: &str) -> (String, String) {
let mut name = name;
for part in name.rsplit('/') {
if !part.is_empty() {
name = part;
break;
}
}
for part in name.rsplit('\\') {
if !part.is_empty() {
name = part;
break;
}
}
let opts = sanitize_filename::Options {
truncate: true,
windows: true,
replacement: "",
};
let name = sanitize_filename::sanitize_with_options(name, opts);
fn sanitize_name_and_split_extension(name: &str) -> (String, String) {
let name = sanitize_filename(name);
// Let's take a tricky filename,
// "file.with_lots_of_characters_behind_point_and_double_ending.tar.gz" as an example.
// Assume that the extension is 32 chars maximum.

View File

@@ -181,30 +181,33 @@ fn test_is_blob_name() {
#[test]
fn test_sanitise_name() {
let (stem, ext) = BlobObject::sanitise_name("Я ЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯ.txt");
let (stem, ext) = BlobObject::sanitize_name_and_split_extension(
"Я ЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯЯ.txt",
);
assert_eq!(ext, ".txt");
assert!(!stem.is_empty());
// the extensions are kept together as between stem and extension a number may be added -
// and `foo.tar.gz` should become `foo-1234.tar.gz` and not `foo.tar-1234.gz`
let (stem, ext) = BlobObject::sanitise_name("wot.tar.gz");
let (stem, ext) = BlobObject::sanitize_name_and_split_extension("wot.tar.gz");
assert_eq!(stem, "wot");
assert_eq!(ext, ".tar.gz");
let (stem, ext) = BlobObject::sanitise_name(".foo.bar");
assert_eq!(stem, "");
let (stem, ext) = BlobObject::sanitize_name_and_split_extension(".foo.bar");
assert_eq!(stem, "file");
assert_eq!(ext, ".foo.bar");
let (stem, ext) = BlobObject::sanitise_name("foo?.bar");
let (stem, ext) = BlobObject::sanitize_name_and_split_extension("foo?.bar");
assert!(stem.contains("foo"));
assert!(!stem.contains('?'));
assert_eq!(ext, ".bar");
let (stem, ext) = BlobObject::sanitise_name("no-extension");
let (stem, ext) = BlobObject::sanitize_name_and_split_extension("no-extension");
assert_eq!(stem, "no-extension");
assert_eq!(ext, "");
let (stem, ext) = BlobObject::sanitise_name("path/ignored\\this: is* forbidden?.c");
let (stem, ext) =
BlobObject::sanitize_name_and_split_extension("path/ignored\\this: is* forbidden?.c");
assert_eq!(ext, ".c");
assert!(!stem.contains("path"));
assert!(!stem.contains("ignored"));
@@ -216,7 +219,7 @@ fn test_sanitise_name() {
assert!(!stem.contains('*'));
assert!(!stem.contains('?'));
let (stem, ext) = BlobObject::sanitise_name(
let (stem, ext) = BlobObject::sanitize_name_and_split_extension(
"file.with_lots_of_characters_behind_point_and_double_ending.tar.gz",
);
assert_eq!(
@@ -225,11 +228,11 @@ fn test_sanitise_name() {
);
assert_eq!(ext, ".tar.gz");
let (stem, ext) = BlobObject::sanitise_name("a. tar.tar.gz");
let (stem, ext) = BlobObject::sanitize_name_and_split_extension("a. tar.tar.gz");
assert_eq!(stem, "a. tar");
assert_eq!(ext, ".tar.gz");
let (stem, ext) = BlobObject::sanitise_name("Guia_uso_GNB (v0.8).pdf");
let (stem, ext) = BlobObject::sanitize_name_and_split_extension("Guia_uso_GNB (v0.8).pdf");
assert_eq!(stem, "Guia_uso_GNB (v0.8)");
assert_eq!(ext, ".pdf");
}

View File

@@ -32,8 +32,8 @@ use crate::reaction::get_msg_reactions;
use crate::sql;
use crate::summary::Summary;
use crate::tools::{
buf_compress, buf_decompress, get_filebytes, get_filemeta, gm2local_offset, read_file, time,
timestamp_to_str, truncate,
buf_compress, buf_decompress, get_filebytes, get_filemeta, gm2local_offset, read_file,
sanitize_filename, time, timestamp_to_str, truncate,
};
/// Message ID, including reserved IDs.
@@ -807,12 +807,12 @@ impl Message {
/// To get the full path, use [`Self::get_file()`].
pub fn get_filename(&self) -> Option<String> {
if let Some(name) = self.param.get(Param::Filename) {
return Some(name.to_string());
return Some(sanitize_filename(name));
}
self.param
.get(Param::File)
.and_then(|file| Path::new(file).file_name())
.map(|name| name.to_string_lossy().to_string())
.map(|name| sanitize_filename(&name.to_string_lossy()))
}
/// Returns the size of the file in bytes, if applicable.

View File

@@ -755,3 +755,31 @@ async fn test_delete_msgs_offline() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_sanitize_filename_message() -> Result<()> {
let t = &TestContext::new().await;
let mut msg = Message::new(Viewtype::File);
// Even if some of these characters may be valid on one platform,
// they need to be removed in case a backup is transferred to another platform
// and the UI there tries to copy the blob to a file with the original name
// before passing it to an external program.
msg.set_file_from_bytes(t, "/\\:ee.tx*T ", b"hallo", None)?;
assert_eq!(msg.get_filename().unwrap(), "ee.txT");
let blob = msg.param.get_blob(Param::File, t).await?.unwrap();
assert_eq!(blob.suffix().unwrap(), "txt");
// The filename shouldn't be empty if there were only illegal characters:
msg.set_file_from_bytes(t, "/\\:.txt", b"hallo", None)?;
assert_eq!(msg.get_filename().unwrap(), "file.txt");
msg.set_file_from_bytes(t, "/\\:", b"hallo", None)?;
assert_eq!(msg.get_filename().unwrap(), "file");
msg.set_file_from_bytes(t, ".txt", b"hallo", None)?;
assert_eq!(msg.get_filename().unwrap(), "file.txt");
Ok(())
}

View File

@@ -5582,3 +5582,38 @@ async fn test_two_group_securejoins() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_sanitize_filename_in_received() -> Result<()> {
let alice = &TestContext::new_alice().await;
let raw = b"Message-ID: Mr.XA6y3og8-az.WGbH9_dNcQx@testr
To: <tmp_5890965001269692@testrun.org>
From: \"=?utf-8?q??=\" <tmp_6272287793210918@testrun.org>
Content-Type: multipart/mixed; boundary=\"mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z\"
--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z
Content-Type: text/plain; charset=utf-8
--
Sent with my Delta Chat Messenger: https://delta.chat
--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z
Content-Type: text/html
Content-Disposition: attachment; filename=\"te\xE2\x80\xACst/../../test.H|TML\xE2\x80\xAC \"
Content-Transfer-Encoding: base64
PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh
--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z--";
let msg = receive_imf(alice, raw, false).await?.unwrap();
let msg = Message::load_from_db(alice, msg.msg_ids[0]).await?;
assert_eq!(msg.get_filename().unwrap(), "test.HTML");
let blob = msg.param.get_blob(Param::File, alice).await?.unwrap();
assert_eq!(blob.suffix().unwrap(), "html");
Ok(())
}

View File

@@ -366,6 +366,41 @@ pub(crate) async fn delete_file(context: &Context, path: impl AsRef<Path>) -> Re
Ok(())
}
/// Create a safe name based on a messy input string.
///
/// The safe name will be a valid filename on Unix and Windows and
/// not contain any path separators. The input can contain path
/// segments separated by either Unix or Windows path separators,
/// the rightmost non-empty segment will be used as name,
/// sanitised for special characters.
pub(crate) fn sanitize_filename(mut name: &str) -> String {
for part in name.rsplit('/') {
if !part.is_empty() {
name = part;
break;
}
}
for part in name.rsplit('\\') {
if !part.is_empty() {
name = part;
break;
}
}
let opts = sanitize_filename::Options {
truncate: true,
windows: true,
replacement: "",
};
let name = sanitize_filename::sanitize_with_options(name, opts);
if name.starts_with('.') || name.is_empty() {
format!("file{name}")
} else {
name
}
}
/// A guard which will remove the path when dropped.
///
/// It implements [`Deref`] so it can be used as a `&Path`.