diff --git a/src/mimeparser.rs b/src/mimeparser.rs
index 21d03e029..624695fc3 100644
--- a/src/mimeparser.rs
+++ b/src/mimeparser.rs
@@ -26,6 +26,7 @@ use crate::param::*;
use crate::peerstate::Peerstate;
use crate::simplify::*;
use crate::stock::StockMessage;
+use charset::Charset;
use percent_encoding::percent_decode_str;
/// A parsed MIME message.
@@ -687,7 +688,7 @@ impl MimeMessage {
let (mime_type, msg_type) = get_mime_type(mail)?;
let raw_mime = mail.ctype.mimetype.to_lowercase();
- let filename = get_attachment_filename(mail)?;
+ let filename = get_attachment_filename(context, mail)?;
let old_part_count = self.parts.len();
@@ -1275,7 +1276,10 @@ fn is_attachment_disposition(mail: &mailparse::ParsedMail<'_>) -> bool {
/// returned. If Content-Disposition is "attachment" but filename is
/// not specified, filename is guessed. If Content-Disposition cannot
/// be parsed, returns an error.
-fn get_attachment_filename(mail: &mailparse::ParsedMail) -> Result> {
+fn get_attachment_filename(
+ context: &Context,
+ mail: &mailparse::ParsedMail,
+) -> Result > {
let ct = mail.get_content_disposition();
// try to get file name as "encoded-words" from
@@ -1291,7 +1295,7 @@ fn get_attachment_filename(mail: &mailparse::ParsedMail) -> Result Result >();
+ let (utf8_str, _, _) = charset.decode(&*decoded_bytes);
+ Some(utf8_str.into())
+ } else {
+ warn!(context, "latin1 encoding does not exist");
+ None
+ }
+ }
+ } else {
+ warn!(context, "apostroped encoding invalid");
+ None
+ }
}
}
}
@@ -1462,53 +1483,146 @@ mod tests {
assert!(is_attachment_disposition(&mail.subparts[1]));
}
- fn load_mail_with_attachment(raw: &[u8]) -> ParsedMail {
+ fn load_mail_with_attachment<'a>(t: &'a TestContext, raw: &'a [u8]) -> ParsedMail<'a> {
let mail = mailparse::parse_mail(raw).unwrap();
- assert!(get_attachment_filename(&mail).unwrap().is_none());
- assert!(get_attachment_filename(&mail.subparts[0])
+ assert!(get_attachment_filename(&t.ctx, &mail).unwrap().is_none());
+ assert!(get_attachment_filename(&t.ctx, &mail.subparts[0])
.unwrap()
.is_none());
mail
}
- #[test]
- fn test_get_attachment_filename() {
- let mail = load_mail_with_attachment(include_bytes!(
- "../test-data/message/attach_filename_simple.eml"
- ));
- let filename = get_attachment_filename(&mail.subparts[1]).unwrap();
+ #[async_std::test]
+ async fn test_get_attachment_filename() {
+ let t = TestContext::new().await;
+ let mail = load_mail_with_attachment(
+ &t,
+ include_bytes!("../test-data/message/attach_filename_simple.eml"),
+ );
+ let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
assert_eq!(filename, Some("test.html".to_string()))
}
- #[test]
- fn test_get_attachment_filename_encoded_words() {
- let mail = load_mail_with_attachment(include_bytes!(
- "../test-data/message/attach_filename_encoded_words.eml"
- ));
- let filename = get_attachment_filename(&mail.subparts[1]).unwrap();
+ #[async_std::test]
+ async fn test_get_attachment_filename_encoded_words() {
+ let t = TestContext::new().await;
+ let mail = load_mail_with_attachment(
+ &t,
+ include_bytes!("../test-data/message/attach_filename_encoded_words.eml"),
+ );
+ let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
assert_eq!(filename, Some("Maßnahmen Okt. 2020.html".to_string()))
}
- #[test]
- fn test_get_attachment_filename_encoded_words_cont() {
+ #[async_std::test]
+ async fn test_get_attachment_filename_encoded_words_binary() {
+ let t = TestContext::new().await;
+ let mail = load_mail_with_attachment(
+ &t,
+ include_bytes!("../test-data/message/attach_filename_encoded_words_binary.eml"),
+ );
+ let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
+ assert_eq!(filename, Some(" § 165 Abs".to_string()))
+ }
+
+ #[async_std::test]
+ async fn test_get_attachment_filename_encoded_words_windows1251() {
+ let t = TestContext::new().await;
+ let mail = load_mail_with_attachment(
+ &t,
+ include_bytes!("../test-data/message/attach_filename_encoded_words_windows1251.eml"),
+ );
+ let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
+ assert_eq!(filename, Some("file Что нового 2020.pdf".to_string()))
+ }
+
+ #[async_std::test]
+ async fn test_get_attachment_filename_encoded_words_cont() {
// test continued encoded-words and also test apostropes work that way
- let mail = load_mail_with_attachment(include_bytes!(
- "../test-data/message/attach_filename_encoded_words_cont.eml"
- ));
- let filename = get_attachment_filename(&mail.subparts[1]).unwrap();
+ let t = TestContext::new().await;
+ let mail = load_mail_with_attachment(
+ &t,
+ include_bytes!("../test-data/message/attach_filename_encoded_words_cont.eml"),
+ );
+ let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
assert_eq!(filename, Some("Maßn'ah'men Okt. 2020.html".to_string()))
}
- #[test]
- fn test_get_attachment_filename_combined() {
+ #[async_std::test]
+ async fn test_get_attachment_filename_encoded_words_bad_delimiter() {
+ let t = TestContext::new().await;
+ let mail = load_mail_with_attachment(
+ &t,
+ include_bytes!("../test-data/message/attach_filename_encoded_words_bad_delimiter.eml"),
+ );
+ let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
+ // not decoded as a space is missing after encoded-words part
+ assert_eq!(filename, Some("=?utf-8?q?foo?=.bar".to_string()))
+ }
+
+ #[async_std::test]
+ async fn test_get_attachment_filename_apostrophed() {
+ let t = TestContext::new().await;
+ let mail = load_mail_with_attachment(
+ &t,
+ include_bytes!("../test-data/message/attach_filename_apostrophed.eml"),
+ );
+ let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
+ assert_eq!(filename, Some("Maßnahmen Okt. 2021.html".to_string()))
+ }
+
+ #[async_std::test]
+ async fn test_get_attachment_filename_apostrophed_cont() {
+ let t = TestContext::new().await;
+ let mail = load_mail_with_attachment(
+ &t,
+ include_bytes!("../test-data/message/attach_filename_apostrophed_cont.eml"),
+ );
+ let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
+ assert_eq!(filename, Some("Maßnahmen März 2022.html".to_string()))
+ }
+
+ #[async_std::test]
+ async fn test_get_attachment_filename_apostrophed_windows1251() {
+ let t = TestContext::new().await;
+ let mail = load_mail_with_attachment(
+ &t,
+ include_bytes!("../test-data/message/attach_filename_apostrophed_windows1251.eml"),
+ );
+ let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
+ assert_eq!(filename, Some("программирование.HTM".to_string()))
+ }
+
+ #[async_std::test]
+ async fn test_get_attachment_filename_apostrophed_cp1252() {
+ let t = TestContext::new().await;
+ let mail = load_mail_with_attachment(
+ &t,
+ include_bytes!("../test-data/message/attach_filename_apostrophed_cp1252.eml"),
+ );
+ let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
+ assert_eq!(filename, Some("Auftragsbestätigung.pdf".to_string()))
+ }
+
+ #[async_std::test]
+ async fn test_get_attachment_filename_combined() {
// test that if `filename` and `filename*0` are given, the filename is not doubled
- let mail = load_mail_with_attachment(include_bytes!(
- "../test-data/message/attach_filename_combined.eml"
- ));
- let filename = get_attachment_filename(&mail.subparts[1]).unwrap();
+ let t = TestContext::new().await;
+ let mail = load_mail_with_attachment(
+ &t,
+ include_bytes!("../test-data/message/attach_filename_combined.eml"),
+ );
+ let filename = get_attachment_filename(&t.ctx, &mail.subparts[1]).unwrap();
assert_eq!(filename, Some("Maßnahmen Okt. 2020.html".to_string()))
}
+ #[test]
+ fn test_charset_latin1() {
+ // make sure, latin1 exists under this name
+ // as we're using it as default in get_attachment_filename() for non-utf-8
+ assert!(Charset::for_label(b"latin1").is_some());
+ }
+
#[test]
fn test_mailparse_content_type() {
let ctype =
diff --git a/standards.md b/standards.md
index 9aceb6285..8ce5707fa 100644
--- a/standards.md
+++ b/standards.md
@@ -6,6 +6,7 @@ Tasks | Standards
---------------------------------|---------------------------------------------
Transport | IMAP v4 ([RFC 3501](https://tools.ietf.org/html/rfc3501)), SMTP ([RFC 5321](https://tools.ietf.org/html/rfc5321)) and Internet Message Format (IMF, [RFC 5322](https://tools.ietf.org/html/rfc5322))
Embedded media | MIME Document Series ([RFC 2045](https://tools.ietf.org/html/rfc2045), [RFC 2046](https://tools.ietf.org/html/rfc2046)), Content-Disposition Header ([RFC 2183](https://tools.ietf.org/html/rfc2183)), Multipart/Related ([RFC 2387](https://tools.ietf.org/html/rfc2387))
+Filename encoding | Encoded Words ([RFC 2047](https://tools.ietf.org/html/rfc2047)), Encoded Word Extensions ([RFC 2231](https://tools.ietf.org/html/rfc2231))
Identify server folders | IMAP LIST Extension ([RFC 6154](https://tools.ietf.org/html/rfc6154))
Push | IMAP IDLE ([RFC 2177](https://tools.ietf.org/html/rfc2177))
Authorization | OAuth2 ([RFC 6749](https://tools.ietf.org/html/rfc6749))
diff --git a/test-data/message/attach_filename_apostrophed.eml b/test-data/message/attach_filename_apostrophed.eml
new file mode 100644
index 000000000..f45d4826e
--- /dev/null
+++ b/test-data/message/attach_filename_apostrophed.eml
@@ -0,0 +1,24 @@
+Subject: Test apostrophed filenames
+Message-ID: 12345@testrun.org
+Date: Sat, 07 Dec 2019 19:00:27 +0000
+X-Mailer: Kopano 8.7.16
+To: recp@testrun.org
+From: sender@testrun.org
+Content-Type: multipart/mixed; boundary="==BREAK=="
+
+
+--==BREAK==
+Content-Type: text/plain; charset=utf-8
+
+apostrophed filenames as of
+https://tools.ietf.org/html/rfc2231
+
+--==BREAK==
+Content-Type: text/html
+Content-Disposition: attachment;
+ filename*=utf-8''Ma%C3%9Fnahmen%20Okt.%202021.html
+Content-Transfer-Encoding: base64
+
+PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh
+
+--==BREAK==--
diff --git a/test-data/message/attach_filename_apostrophed_cont.eml b/test-data/message/attach_filename_apostrophed_cont.eml
new file mode 100644
index 000000000..b7d9cf6a4
--- /dev/null
+++ b/test-data/message/attach_filename_apostrophed_cont.eml
@@ -0,0 +1,34 @@
+Subject: Test apostrophed filenames
+Message-ID: 12345@testrun.org
+Date: Sat, 07 Dec 2019 19:00:27 +0000
+X-Mailer: Kopano 8.7.16
+To: recp@testrun.org
+From: sender@testrun.org
+Content-Type: multipart/mixed; boundary="==BREAK=="
+
+
+--==BREAK==
+Content-Type: text/plain; charset=utf-8
+
+apostrophed filenames as of
+https://tools.ietf.org/html/rfc2231,
+span over several header lines.
+
+note, that, in contrast to encoded-words,
+the character-set is not repeated.
+
+as a side-effect,
+this tests unquoted header attributes in filename*1*
+and lower-case-urlencoded utf-8
+
+--==BREAK==
+Content-Type: text/html
+Content-Disposition: attachment;
+ filename*0*="utf-8''Ma%C3%9Fna";
+ filename*1*=hm;
+ filename*2*="en%20M%c3%a4rz%202022.html";
+Content-Transfer-Encoding: base64
+
+PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh
+
+--==BREAK==--
diff --git a/test-data/message/attach_filename_apostrophed_cp1252.eml b/test-data/message/attach_filename_apostrophed_cp1252.eml
new file mode 100644
index 000000000..01df40f4c
--- /dev/null
+++ b/test-data/message/attach_filename_apostrophed_cp1252.eml
@@ -0,0 +1,23 @@
+Subject: Test apostrophed filenames
+Message-ID: 12345@testrun.org
+Date: Sat, 07 Dec 2019 19:00:27 +0000
+X-Mailer: Kopano 8.7.16
+To: recp@testrun.org
+From: sender@testrun.org
+Content-Type: multipart/mixed; boundary="==BREAK=="
+
+
+--==BREAK==
+Content-Type: text/plain; charset=utf-8
+
+testing cp1252 aka ANSI aka Windows-1252
+
+--==BREAK==
+Content-Type: text/html
+Content-Disposition: attachment;
+ filename*=Cp1252''Auftragsbest%E4tigung.pdf;
+Content-Transfer-Encoding: base64
+
+PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh
+
+--==BREAK==--
diff --git a/test-data/message/attach_filename_apostrophed_windows1251.eml b/test-data/message/attach_filename_apostrophed_windows1251.eml
new file mode 100644
index 000000000..a040a96da
--- /dev/null
+++ b/test-data/message/attach_filename_apostrophed_windows1251.eml
@@ -0,0 +1,29 @@
+Subject: Test apostrophed filenames
+Message-ID: 12345@testrun.org
+Date: Sat, 07 Dec 2019 19:00:27 +0000
+X-Mailer: Kopano 8.7.16
+To: recp@testrun.org
+From: sender@testrun.org
+Content-Type: multipart/mixed; boundary="==BREAK=="
+
+
+--==BREAK==
+Content-Type: text/plain; charset=utf-8
+
+apostrophed filenames as of
+https://tools.ietf.org/html/rfc2231,
+testing non-utf-8 charset
+
+examples:
+%EF%F0%EE%E3%F0%E0%EC%EC%E8%F0%EE%E2%E0%ED%E8%E5 = программирование = programming
+
+--==BREAK==
+Content-Type: text/html
+Content-Disposition: attachment;
+ filename*0*=windows-1251''%EF%F0%EE%E3%F0%E0%EC%EC%E8%F0%EE%E2%E0;
+ filename*1*=%ED%E8%E5.HTM
+Content-Transfer-Encoding: base64
+
+PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh
+
+--==BREAK==--
diff --git a/test-data/message/attach_filename_encoded_words_bad_delimiter.eml b/test-data/message/attach_filename_encoded_words_bad_delimiter.eml
new file mode 100644
index 000000000..a1ae8207c
--- /dev/null
+++ b/test-data/message/attach_filename_encoded_words_bad_delimiter.eml
@@ -0,0 +1,29 @@
+Subject: Test encoded-words filenames
+Message-ID: 123456@testrun.org
+Date: Sat, 07 Dec 2019 19:00:27 +0000
+Chat-Version: 1.0
+To: recp@testrun.org
+From: sender@testrun.org
+Content-Type: multipart/mixed; boundary="==BREAK=="
+
+
+--==BREAK==
+Content-Type: text/plain; charset=utf-8
+
+there MUST be a space between encoded words and plain text,
+if there is none, decoding should return the original string
+https://tools.ietf.org/html/rfc2047 5.1:
+
+"Ordinary ASCII text and 'encoded-word's may appear together in the
+same header field. However, an 'encoded-word' that appears in a
+header field defined as '*text' MUST be separated from any adjacent
+'encoded-word' or 'text' by 'linear-white-space'."
+
+--==BREAK==
+Content-Type: text/html
+Content-Disposition: attachment; filename="=?utf-8?q?foo?=.bar";
+Content-Transfer-Encoding: base64
+
+PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh
+
+--==BREAK==--
diff --git a/test-data/message/attach_filename_encoded_words_binary.eml b/test-data/message/attach_filename_encoded_words_binary.eml
new file mode 100644
index 000000000..e028f1340
--- /dev/null
+++ b/test-data/message/attach_filename_encoded_words_binary.eml
@@ -0,0 +1,26 @@
+Subject: Test binary-encoded-words filenames
+Message-ID: 123456@testrun.org
+Date: Sat, 07 Dec 2019 19:00:27 +0000
+Chat-Version: 1.0
+To: recp@testrun.org
+From: sender@testrun.org
+Content-Type: multipart/mixed; boundary="==BREAK=="
+
+
+--==BREAK==
+Content-Type: text/plain; charset=utf-8
+
+test binary word-encoded filename,
+filename is " § 165 Abs" - note the leading space.
+
+as a side-effect, this also tests that the encoding-name
+also works in UPPERCASE.
+
+--==BREAK==
+Content-Type: text/html
+Content-Disposition: attachment; filename="=?UTF-8?B?IMKnIDE2NSBBYnM=?=";
+Content-Transfer-Encoding: base64
+
+PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh
+
+--==BREAK==--
diff --git a/test-data/message/attach_filename_encoded_words_windows1251.eml b/test-data/message/attach_filename_encoded_words_windows1251.eml
new file mode 100644
index 000000000..cee238324
--- /dev/null
+++ b/test-data/message/attach_filename_encoded_words_windows1251.eml
@@ -0,0 +1,31 @@
+Subject: Test encoded-words
+Message-ID: 123456@testrun.org
+Date: Sat, 07 Dec 2019 19:00:27 +0000
+Chat-Version: 1.0
+To: recp@testrun.org
+From: sender@testrun.org
+Content-Type: multipart/mixed; boundary="==BREAK=="
+
+
+--==BREAK==
+Content-Type: text/plain; charset=utf-8
+
+testing encoded-words filenames with windows-1251 (cyrillic) encoding.
+
+as a side-effect, this also tests that encoded words work together with
+plain text as long as they're separated by spaces, see
+https://tools.ietf.org/html/rfc2047 5.1:
+
+"Ordinary ASCII text and 'encoded-word's may appear together in the
+same header field. However, an 'encoded-word' that appears in a
+header field defined as '*text' MUST be separated from any adjacent
+'encoded-word' or 'text' by 'linear-white-space'."
+
+--==BREAK==
+Content-Type: text/html
+Content-Disposition: attachment; filename="file =?Windows-1251?B?1/LuIO3u4u7j7g==?= 2020.pdf";
+Content-Transfer-Encoding: base64
+
+PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh
+
+--==BREAK==--