fix: Assume file extensions are 32 chars max and don't contain whitespace (#5338)

Before file extensions were also limited to 32 chars, but extra chars in the beginning were just cut
off, e.g. "file.with_lots_of_characters_behind_point_and_double_ending.tar.gz" was considered to
have an extension "d_point_and_double_ending.tar.gz". Better to take only "tar.gz" then.

Also don't include whitespace-containing parts in extensions. File extensions generally don't
contain whitespaces.
This commit is contained in:
iequidoo
2024-10-08 17:38:22 -03:00
committed by iequidoo
parent 19be12a25d
commit 8f41aed917
2 changed files with 44 additions and 23 deletions

View File

@@ -253,16 +253,16 @@ impl<'a> BlobObject<'a> {
/// ///
/// The extension part will always be lowercased. /// The extension part will always be lowercased.
fn sanitise_name(name: &str) -> (String, String) { fn sanitise_name(name: &str) -> (String, String) {
let mut name = name.to_string(); let mut name = name;
for part in name.rsplit('/') { for part in name.rsplit('/') {
if !part.is_empty() { if !part.is_empty() {
name = part.to_string(); name = part;
break; break;
} }
} }
for part in name.rsplit('\\') { for part in name.rsplit('\\') {
if !part.is_empty() { if !part.is_empty() {
name = part.to_string(); name = part;
break; break;
} }
} }
@@ -272,32 +272,39 @@ impl<'a> BlobObject<'a> {
replacement: "", replacement: "",
}; };
let clean = sanitize_filename::sanitize_with_options(name, opts); let name = sanitize_filename::sanitize_with_options(name, opts);
// Let's take the tricky filename // Let's take a tricky filename,
// "file.with_lots_of_characters_behind_point_and_double_ending.tar.gz" as an example. // "file.with_lots_of_characters_behind_point_and_double_ending.tar.gz" as an example.
// Split it into "file" and "with_lots_of_characters_behind_point_and_double_ending.tar.gz": // Assume that the extension is 32 chars maximum.
let mut iter = clean.splitn(2, '.'); let ext: String = name
.chars()
let stem: String = iter.next().unwrap_or_default().chars().take(64).collect();
// stem == "file"
let ext_chars = iter.next().unwrap_or_default().chars();
let ext: String = ext_chars
.rev() .rev()
.take(32) .take_while(|c| !c.is_whitespace())
.take(33)
.collect::<Vec<_>>() .collect::<Vec<_>>()
.iter() .iter()
.rev() .rev()
.collect(); .collect();
// ext == "d_point_and_double_ending.tar.gz" // ext == "nd_point_and_double_ending.tar.gz"
if ext.is_empty() { // Split it into "nd_point_and_double_ending" and "tar.gz":
(stem, "".to_string()) let mut iter = ext.splitn(2, '.');
iter.next();
let ext = iter.next().unwrap_or_default();
let ext = if ext.is_empty() {
String::new()
} else { } else {
(stem, format!(".{ext}").to_lowercase()) format!(".{ext}")
// Return ("file", ".d_point_and_double_ending.tar.gz") // ".tar.gz"
// which is not perfect but acceptable. };
} let stem = name
.strip_suffix(&ext)
.unwrap_or_default()
.chars()
.take(64)
.collect();
(stem, ext.to_lowercase())
} }
/// Checks whether a name is a valid blob name. /// Checks whether a name is a valid blob name.
@@ -963,6 +970,19 @@ mod tests {
assert!(!stem.contains(':')); assert!(!stem.contains(':'));
assert!(!stem.contains('*')); assert!(!stem.contains('*'));
assert!(!stem.contains('?')); assert!(!stem.contains('?'));
let (stem, ext) = BlobObject::sanitise_name(
"file.with_lots_of_characters_behind_point_and_double_ending.tar.gz",
);
assert_eq!(
stem,
"file.with_lots_of_characters_behind_point_and_double_ending"
);
assert_eq!(ext, ".tar.gz");
let (stem, ext) = BlobObject::sanitise_name("a. tar.tar.gz");
assert_eq!(stem, "a. tar");
assert_eq!(ext, ".tar.gz");
} }
#[tokio::test(flavor = "multi_thread", worker_threads = 2)] #[tokio::test(flavor = "multi_thread", worker_threads = 2)]

View File

@@ -3163,20 +3163,21 @@ Reply from different address
} }
#[tokio::test(flavor = "multi_thread", worker_threads = 2)] #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_long_and_duplicated_filenames() -> Result<()> { async fn test_weird_and_duplicated_filenames() -> Result<()> {
let mut tcm = TestContextManager::new(); let mut tcm = TestContextManager::new();
let alice = tcm.alice().await; let alice = tcm.alice().await;
let bob = tcm.bob().await; let bob = tcm.bob().await;
for filename_sent in &[ for filename_sent in &[
"foo.bar very long file name test baz.tar.gz", "foo.bar very long file name test baz.tar.gz",
"foobarabababababababbababababverylongfilenametestbaz.tar.gz", "foo.barabababababababbababababverylongfilenametestbaz.tar.gz",
"fooo...tar.gz", "fooo...tar.gz",
"foo. .tar.gz", "foo. .tar.gz",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.tar.gz", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.tar.gz",
"a.tar.gz", "a.tar.gz",
"a.tar.gz", "a.tar.gz",
"a.a..a.a.a.a.tar.gz", "a.a..a.a.a.a.tar.gz",
"a. tar.tar.gz",
] { ] {
let attachment = alice.blobdir.join(filename_sent); let attachment = alice.blobdir.join(filename_sent);
let content = format!("File content of {filename_sent}"); let content = format!("File content of {filename_sent}");