mirror of
https://github.com/chatmail/core.git
synced 2026-04-02 05:22:14 +03:00
Merge pull request #2094 from deltachat/fix-non-utf8-filenames
test and fix non-utf8 filenames
This commit is contained in:
@@ -26,6 +26,7 @@ use crate::param::*;
|
||||
use crate::peerstate::Peerstate;
|
||||
use crate::simplify::*;
|
||||
use crate::stock::StockMessage;
|
||||
use charset::Charset;
|
||||
use percent_encoding::percent_decode_str;
|
||||
|
||||
/// A parsed MIME message.
|
||||
@@ -687,7 +688,7 @@ impl MimeMessage {
|
||||
let (mime_type, msg_type) = get_mime_type(mail)?;
|
||||
let raw_mime = mail.ctype.mimetype.to_lowercase();
|
||||
|
||||
let filename = get_attachment_filename(mail)?;
|
||||
let filename = get_attachment_filename(context, mail)?;
|
||||
|
||||
let old_part_count = self.parts.len();
|
||||
|
||||
@@ -1275,7 +1276,10 @@ fn is_attachment_disposition(mail: &mailparse::ParsedMail<'_>) -> bool {
|
||||
/// returned. If Content-Disposition is "attachment" but filename is
|
||||
/// not specified, filename is guessed. If Content-Disposition cannot
|
||||
/// be parsed, returns an error.
|
||||
fn get_attachment_filename(mail: &mailparse::ParsedMail) -> Result<Option<String>> {
|
||||
fn get_attachment_filename(
|
||||
context: &Context,
|
||||
mail: &mailparse::ParsedMail,
|
||||
) -> Result<Option<String>> {
|
||||
let ct = mail.get_content_disposition();
|
||||
|
||||
// try to get file name as "encoded-words" from
|
||||
@@ -1291,7 +1295,7 @@ fn get_attachment_filename(mail: &mailparse::ParsedMail) -> Result<Option<String
|
||||
desired_filename = ct
|
||||
.params
|
||||
.iter()
|
||||
.filter(|(key, _value)| key.starts_with("filename"))
|
||||
.filter(|(key, _value)| key.starts_with("filename*"))
|
||||
.fold(None, |acc, (key, value)| {
|
||||
if key.ends_with('*') {
|
||||
apostrophe_encoded = true;
|
||||
@@ -1303,13 +1307,30 @@ fn get_attachment_filename(mail: &mailparse::ParsedMail) -> Result<Option<String
|
||||
}
|
||||
});
|
||||
if apostrophe_encoded {
|
||||
// we're currently always assuming utf-8, this might need adaption, however, should not break things.
|
||||
if let Some(name) = desired_filename {
|
||||
desired_filename = if let Some(name) = name.splitn(3, '\'').last() {
|
||||
Some(percent_decode_str(&name).decode_utf8_lossy().to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
let mut parts = name.splitn(3, '\'');
|
||||
desired_filename =
|
||||
if let (Some(charset), Some(value)) = (parts.next(), parts.last()) {
|
||||
let decoded_bytes = percent_decode_str(&value);
|
||||
if charset.to_lowercase() == "utf-8" {
|
||||
Some(decoded_bytes.decode_utf8_lossy().to_string())
|
||||
} else {
|
||||
// encoded_words crate say, latin-1 is not reported; moreover, latin1 is a good default
|
||||
if let Some(charset) = Charset::for_label(charset.as_bytes())
|
||||
.or_else(|| Charset::for_label(b"latin1"))
|
||||
{
|
||||
let decoded_bytes = decoded_bytes.collect::<Vec<u8>>();
|
||||
let (utf8_str, _, _) = charset.decode(&*decoded_bytes);
|
||||
Some(utf8_str.into())
|
||||
} else {
|
||||
warn!(context, "latin1 encoding does not exist");
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
warn!(context, "apostroped encoding invalid");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1462,53 +1483,146 @@ mod tests {
|
||||
assert!(is_attachment_disposition(&mail.subparts[1]));
|
||||
}
|
||||
|
||||
fn load_mail_with_attachment(raw: &[u8]) -> ParsedMail {
|
||||
fn load_mail_with_attachment<'a>(t: &'a TestContext, raw: &'a [u8]) -> ParsedMail<'a> {
|
||||
let mail = mailparse::parse_mail(raw).unwrap();
|
||||
assert!(get_attachment_filename(&mail).unwrap().is_none());
|
||||
assert!(get_attachment_filename(&mail.subparts[0])
|
||||
assert!(get_attachment_filename(&t.ctx, &mail).unwrap().is_none());
|
||||
assert!(get_attachment_filename(&t.ctx, &mail.subparts[0])
|
||||
.unwrap()
|
||||
.is_none());
|
||||
mail
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_attachment_filename() {
|
||||
let mail = load_mail_with_attachment(include_bytes!(
|
||||
"../test-data/message/attach_filename_simple.eml"
|
||||
));
|
||||
let filename = get_attachment_filename(&mail.subparts[1]).unwrap();
|
||||
#[async_std::test]
|
||||
async fn test_get_attachment_filename() {
|
||||
let t = TestContext::new().await;
|
||||
let mail = load_mail_with_attachment(
|
||||
&t,
|
||||
include_bytes!("../test-data/message/attach_filename_simple.eml"),
|
||||
);
|
||||
let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
|
||||
assert_eq!(filename, Some("test.html".to_string()))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_attachment_filename_encoded_words() {
|
||||
let mail = load_mail_with_attachment(include_bytes!(
|
||||
"../test-data/message/attach_filename_encoded_words.eml"
|
||||
));
|
||||
let filename = get_attachment_filename(&mail.subparts[1]).unwrap();
|
||||
#[async_std::test]
|
||||
async fn test_get_attachment_filename_encoded_words() {
|
||||
let t = TestContext::new().await;
|
||||
let mail = load_mail_with_attachment(
|
||||
&t,
|
||||
include_bytes!("../test-data/message/attach_filename_encoded_words.eml"),
|
||||
);
|
||||
let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
|
||||
assert_eq!(filename, Some("Maßnahmen Okt. 2020.html".to_string()))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_attachment_filename_encoded_words_cont() {
|
||||
#[async_std::test]
|
||||
async fn test_get_attachment_filename_encoded_words_binary() {
|
||||
let t = TestContext::new().await;
|
||||
let mail = load_mail_with_attachment(
|
||||
&t,
|
||||
include_bytes!("../test-data/message/attach_filename_encoded_words_binary.eml"),
|
||||
);
|
||||
let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
|
||||
assert_eq!(filename, Some(" § 165 Abs".to_string()))
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn test_get_attachment_filename_encoded_words_windows1251() {
|
||||
let t = TestContext::new().await;
|
||||
let mail = load_mail_with_attachment(
|
||||
&t,
|
||||
include_bytes!("../test-data/message/attach_filename_encoded_words_windows1251.eml"),
|
||||
);
|
||||
let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
|
||||
assert_eq!(filename, Some("file Что нового 2020.pdf".to_string()))
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn test_get_attachment_filename_encoded_words_cont() {
|
||||
// test continued encoded-words and also test apostropes work that way
|
||||
let mail = load_mail_with_attachment(include_bytes!(
|
||||
"../test-data/message/attach_filename_encoded_words_cont.eml"
|
||||
));
|
||||
let filename = get_attachment_filename(&mail.subparts[1]).unwrap();
|
||||
let t = TestContext::new().await;
|
||||
let mail = load_mail_with_attachment(
|
||||
&t,
|
||||
include_bytes!("../test-data/message/attach_filename_encoded_words_cont.eml"),
|
||||
);
|
||||
let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
|
||||
assert_eq!(filename, Some("Maßn'ah'men Okt. 2020.html".to_string()))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_attachment_filename_combined() {
|
||||
#[async_std::test]
|
||||
async fn test_get_attachment_filename_encoded_words_bad_delimiter() {
|
||||
let t = TestContext::new().await;
|
||||
let mail = load_mail_with_attachment(
|
||||
&t,
|
||||
include_bytes!("../test-data/message/attach_filename_encoded_words_bad_delimiter.eml"),
|
||||
);
|
||||
let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
|
||||
// not decoded as a space is missing after encoded-words part
|
||||
assert_eq!(filename, Some("=?utf-8?q?foo?=.bar".to_string()))
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn test_get_attachment_filename_apostrophed() {
|
||||
let t = TestContext::new().await;
|
||||
let mail = load_mail_with_attachment(
|
||||
&t,
|
||||
include_bytes!("../test-data/message/attach_filename_apostrophed.eml"),
|
||||
);
|
||||
let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
|
||||
assert_eq!(filename, Some("Maßnahmen Okt. 2021.html".to_string()))
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn test_get_attachment_filename_apostrophed_cont() {
|
||||
let t = TestContext::new().await;
|
||||
let mail = load_mail_with_attachment(
|
||||
&t,
|
||||
include_bytes!("../test-data/message/attach_filename_apostrophed_cont.eml"),
|
||||
);
|
||||
let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
|
||||
assert_eq!(filename, Some("Maßnahmen März 2022.html".to_string()))
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn test_get_attachment_filename_apostrophed_windows1251() {
|
||||
let t = TestContext::new().await;
|
||||
let mail = load_mail_with_attachment(
|
||||
&t,
|
||||
include_bytes!("../test-data/message/attach_filename_apostrophed_windows1251.eml"),
|
||||
);
|
||||
let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
|
||||
assert_eq!(filename, Some("программирование.HTM".to_string()))
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn test_get_attachment_filename_apostrophed_cp1252() {
|
||||
let t = TestContext::new().await;
|
||||
let mail = load_mail_with_attachment(
|
||||
&t,
|
||||
include_bytes!("../test-data/message/attach_filename_apostrophed_cp1252.eml"),
|
||||
);
|
||||
let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
|
||||
assert_eq!(filename, Some("Auftragsbestätigung.pdf".to_string()))
|
||||
}
|
||||
|
||||
#[async_std::test]
|
||||
async fn test_get_attachment_filename_combined() {
|
||||
// test that if `filename` and `filename*0` are given, the filename is not doubled
|
||||
let mail = load_mail_with_attachment(include_bytes!(
|
||||
"../test-data/message/attach_filename_combined.eml"
|
||||
));
|
||||
let filename = get_attachment_filename(&mail.subparts[1]).unwrap();
|
||||
let t = TestContext::new().await;
|
||||
let mail = load_mail_with_attachment(
|
||||
&t,
|
||||
include_bytes!("../test-data/message/attach_filename_combined.eml"),
|
||||
);
|
||||
let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
|
||||
assert_eq!(filename, Some("Maßnahmen Okt. 2020.html".to_string()))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_charset_latin1() {
|
||||
// make sure, latin1 exists under this name
|
||||
// as we're using it as default in get_attachment_filename() for non-utf-8
|
||||
assert!(Charset::for_label(b"latin1").is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mailparse_content_type() {
|
||||
let ctype =
|
||||
|
||||
@@ -6,6 +6,7 @@ Tasks | Standards
|
||||
---------------------------------|---------------------------------------------
|
||||
Transport | IMAP v4 ([RFC 3501](https://tools.ietf.org/html/rfc3501)), SMTP ([RFC 5321](https://tools.ietf.org/html/rfc5321)) and Internet Message Format (IMF, [RFC 5322](https://tools.ietf.org/html/rfc5322))
|
||||
Embedded media | MIME Document Series ([RFC 2045](https://tools.ietf.org/html/rfc2045), [RFC 2046](https://tools.ietf.org/html/rfc2046)), Content-Disposition Header ([RFC 2183](https://tools.ietf.org/html/rfc2183)), Multipart/Related ([RFC 2387](https://tools.ietf.org/html/rfc2387))
|
||||
Filename encoding | Encoded Words ([RFC 2047](https://tools.ietf.org/html/rfc2047)), Encoded Word Extensions ([RFC 2231](https://tools.ietf.org/html/rfc2231))
|
||||
Identify server folders | IMAP LIST Extension ([RFC 6154](https://tools.ietf.org/html/rfc6154))
|
||||
Push | IMAP IDLE ([RFC 2177](https://tools.ietf.org/html/rfc2177))
|
||||
Authorization | OAuth2 ([RFC 6749](https://tools.ietf.org/html/rfc6749))
|
||||
|
||||
24
test-data/message/attach_filename_apostrophed.eml
Normal file
24
test-data/message/attach_filename_apostrophed.eml
Normal file
@@ -0,0 +1,24 @@
|
||||
Subject: Test apostrophed filenames
|
||||
Message-ID: 12345@testrun.org
|
||||
Date: Sat, 07 Dec 2019 19:00:27 +0000
|
||||
X-Mailer: Kopano 8.7.16
|
||||
To: recp@testrun.org
|
||||
From: sender@testrun.org
|
||||
Content-Type: multipart/mixed; boundary="==BREAK=="
|
||||
|
||||
|
||||
--==BREAK==
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
|
||||
apostrophed filenames as of
|
||||
https://tools.ietf.org/html/rfc2231
|
||||
|
||||
--==BREAK==
|
||||
Content-Type: text/html
|
||||
Content-Disposition: attachment;
|
||||
filename*=utf-8''Ma%C3%9Fnahmen%20Okt.%202021.html
|
||||
Content-Transfer-Encoding: base64
|
||||
|
||||
PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh
|
||||
|
||||
--==BREAK==--
|
||||
34
test-data/message/attach_filename_apostrophed_cont.eml
Normal file
34
test-data/message/attach_filename_apostrophed_cont.eml
Normal file
@@ -0,0 +1,34 @@
|
||||
Subject: Test apostrophed filenames
|
||||
Message-ID: 12345@testrun.org
|
||||
Date: Sat, 07 Dec 2019 19:00:27 +0000
|
||||
X-Mailer: Kopano 8.7.16
|
||||
To: recp@testrun.org
|
||||
From: sender@testrun.org
|
||||
Content-Type: multipart/mixed; boundary="==BREAK=="
|
||||
|
||||
|
||||
--==BREAK==
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
|
||||
apostrophed filenames as of
|
||||
https://tools.ietf.org/html/rfc2231,
|
||||
span over several header lines.
|
||||
|
||||
note, that, in contrast to encoded-words,
|
||||
the character-set is not repeated.
|
||||
|
||||
as a side-effect,
|
||||
this tests unquoted header attributes in filename*1*
|
||||
and lower-case-urlencoded utf-8
|
||||
|
||||
--==BREAK==
|
||||
Content-Type: text/html
|
||||
Content-Disposition: attachment;
|
||||
filename*0*="utf-8''Ma%C3%9Fna";
|
||||
filename*1*=hm;
|
||||
filename*2*="en%20M%c3%a4rz%202022.html";
|
||||
Content-Transfer-Encoding: base64
|
||||
|
||||
PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh
|
||||
|
||||
--==BREAK==--
|
||||
23
test-data/message/attach_filename_apostrophed_cp1252.eml
Normal file
23
test-data/message/attach_filename_apostrophed_cp1252.eml
Normal file
@@ -0,0 +1,23 @@
|
||||
Subject: Test apostrophed filenames
|
||||
Message-ID: 12345@testrun.org
|
||||
Date: Sat, 07 Dec 2019 19:00:27 +0000
|
||||
X-Mailer: Kopano 8.7.16
|
||||
To: recp@testrun.org
|
||||
From: sender@testrun.org
|
||||
Content-Type: multipart/mixed; boundary="==BREAK=="
|
||||
|
||||
|
||||
--==BREAK==
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
|
||||
testing cp1252 aka ANSI aka Windows-1252
|
||||
|
||||
--==BREAK==
|
||||
Content-Type: text/html
|
||||
Content-Disposition: attachment;
|
||||
filename*=Cp1252''Auftragsbest%E4tigung.pdf;
|
||||
Content-Transfer-Encoding: base64
|
||||
|
||||
PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh
|
||||
|
||||
--==BREAK==--
|
||||
@@ -0,0 +1,29 @@
|
||||
Subject: Test apostrophed filenames
|
||||
Message-ID: 12345@testrun.org
|
||||
Date: Sat, 07 Dec 2019 19:00:27 +0000
|
||||
X-Mailer: Kopano 8.7.16
|
||||
To: recp@testrun.org
|
||||
From: sender@testrun.org
|
||||
Content-Type: multipart/mixed; boundary="==BREAK=="
|
||||
|
||||
|
||||
--==BREAK==
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
|
||||
apostrophed filenames as of
|
||||
https://tools.ietf.org/html/rfc2231,
|
||||
testing non-utf-8 charset
|
||||
|
||||
examples:
|
||||
%EF%F0%EE%E3%F0%E0%EC%EC%E8%F0%EE%E2%E0%ED%E8%E5 = программирование = programming
|
||||
|
||||
--==BREAK==
|
||||
Content-Type: text/html
|
||||
Content-Disposition: attachment;
|
||||
filename*0*=windows-1251''%EF%F0%EE%E3%F0%E0%EC%EC%E8%F0%EE%E2%E0;
|
||||
filename*1*=%ED%E8%E5.HTM
|
||||
Content-Transfer-Encoding: base64
|
||||
|
||||
PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh
|
||||
|
||||
--==BREAK==--
|
||||
@@ -0,0 +1,29 @@
|
||||
Subject: Test encoded-words filenames
|
||||
Message-ID: 123456@testrun.org
|
||||
Date: Sat, 07 Dec 2019 19:00:27 +0000
|
||||
Chat-Version: 1.0
|
||||
To: recp@testrun.org
|
||||
From: sender@testrun.org
|
||||
Content-Type: multipart/mixed; boundary="==BREAK=="
|
||||
|
||||
|
||||
--==BREAK==
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
|
||||
there MUST be a space between encoded words and plain text,
|
||||
if there is none, decoding should return the original string
|
||||
https://tools.ietf.org/html/rfc2047 5.1:
|
||||
|
||||
"Ordinary ASCII text and 'encoded-word's may appear together in the
|
||||
same header field. However, an 'encoded-word' that appears in a
|
||||
header field defined as '*text' MUST be separated from any adjacent
|
||||
'encoded-word' or 'text' by 'linear-white-space'."
|
||||
|
||||
--==BREAK==
|
||||
Content-Type: text/html
|
||||
Content-Disposition: attachment; filename="=?utf-8?q?foo?=.bar";
|
||||
Content-Transfer-Encoding: base64
|
||||
|
||||
PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh
|
||||
|
||||
--==BREAK==--
|
||||
26
test-data/message/attach_filename_encoded_words_binary.eml
Normal file
26
test-data/message/attach_filename_encoded_words_binary.eml
Normal file
@@ -0,0 +1,26 @@
|
||||
Subject: Test binary-encoded-words filenames
|
||||
Message-ID: 123456@testrun.org
|
||||
Date: Sat, 07 Dec 2019 19:00:27 +0000
|
||||
Chat-Version: 1.0
|
||||
To: recp@testrun.org
|
||||
From: sender@testrun.org
|
||||
Content-Type: multipart/mixed; boundary="==BREAK=="
|
||||
|
||||
|
||||
--==BREAK==
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
|
||||
test binary word-encoded filename,
|
||||
filename is " § 165 Abs" - note the leading space.
|
||||
|
||||
as a side-effect, this also tests that the encoding-name
|
||||
also works in UPPERCASE.
|
||||
|
||||
--==BREAK==
|
||||
Content-Type: text/html
|
||||
Content-Disposition: attachment; filename="=?UTF-8?B?IMKnIDE2NSBBYnM=?=";
|
||||
Content-Transfer-Encoding: base64
|
||||
|
||||
PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh
|
||||
|
||||
--==BREAK==--
|
||||
@@ -0,0 +1,31 @@
|
||||
Subject: Test encoded-words
|
||||
Message-ID: 123456@testrun.org
|
||||
Date: Sat, 07 Dec 2019 19:00:27 +0000
|
||||
Chat-Version: 1.0
|
||||
To: recp@testrun.org
|
||||
From: sender@testrun.org
|
||||
Content-Type: multipart/mixed; boundary="==BREAK=="
|
||||
|
||||
|
||||
--==BREAK==
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
|
||||
testing encoded-words filenames with windows-1251 (cyrillic) encoding.
|
||||
|
||||
as a side-effect, this also tests that encoded words work together with
|
||||
plain text as long as they're separated by spaces, see
|
||||
https://tools.ietf.org/html/rfc2047 5.1:
|
||||
|
||||
"Ordinary ASCII text and 'encoded-word's may appear together in the
|
||||
same header field. However, an 'encoded-word' that appears in a
|
||||
header field defined as '*text' MUST be separated from any adjacent
|
||||
'encoded-word' or 'text' by 'linear-white-space'."
|
||||
|
||||
--==BREAK==
|
||||
Content-Type: text/html
|
||||
Content-Disposition: attachment; filename="file =?Windows-1251?B?1/LuIO3u4u7j7g==?= 2020.pdf";
|
||||
Content-Transfer-Encoding: base64
|
||||
|
||||
PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh
|
||||
|
||||
--==BREAK==--
|
||||
Reference in New Issue
Block a user