diff --git a/src/mimeparser.rs b/src/mimeparser.rs index d91e6732a..0b77ddf6e 100644 --- a/src/mimeparser.rs +++ b/src/mimeparser.rs @@ -1276,61 +1276,68 @@ fn is_attachment_disposition(mail: &mailparse::ParsedMail<'_>) -> bool { /// not specified, filename is guessed. If Content-Disposition cannot /// be parsed, returns an error. fn get_attachment_filename(mail: &mailparse::ParsedMail) -> Result> { - // try to get file name from - // `Content-Disposition: ... filename*=...` - // or `Content-Disposition: ... filename*0*=... filename*1*=... filename*2*=...` - // or `Content-Disposition: ... filename=...` - let ct = mail.get_content_disposition(); - let desired_filename: Option = ct - .params - .iter() - .filter(|(key, _value)| key.starts_with("filename")) - .fold(None, |acc, (_key, value)| { - if let Some(acc) = acc { - Some(acc + value) - } else { - Some(value.to_string()) - } - }); + // try to get file name as "encoded-words" from + // `Content-Disposition: ... filename=...` + let mut desired_filename = ct.params.get("filename").map(|s| s.to_string()); - let desired_filename = - desired_filename.or_else(|| ct.params.get("name").map(|s| s.to_string())); + // try to get file name from + // `Content-Disposition: ... filename*0*=... filename*1*=... filename*2*=...` + // encoded as CHARSET'LANG'test%2E%70%64%66 (key ends with `*`) + // or as "encoded-words" (key does not end with `*`) + if desired_filename.is_none() { + let mut apostrophe_encoded = false; + desired_filename = ct + .params + .iter() + .filter(|(key, _value)| key.starts_with("filename")) + .fold(None, |acc, (key, value)| { + if key.ends_with('*') { + apostrophe_encoded = true; + } + if let Some(acc) = acc { + Some(acc + value) + } else { + Some(value.to_string()) + } + }); + if apostrophe_encoded { + // we're currently always assuming utf-8, this might need adaption, however, should not break things. + if let Some(name) = desired_filename { + desired_filename = if let Some(name) = name.splitn(3, '\'').last() { + Some(percent_decode_str(&name).decode_utf8_lossy().to_string()) + } else { + None + } + } + } + } + + // if no filename is set, try `Content-Disposition: ... name=...` + if desired_filename.is_none() { + desired_filename = ct.params.get("name").map(|s| s.to_string()); + } // MS Outlook is known to specify filename in the "name" attribute of // Content-Type and omit Content-Disposition. - let desired_filename = - desired_filename.or_else(|| mail.ctype.params.get("name").map(|s| s.to_string())); - - // decode filename as CHARSET'LANG'test%2E%70%64%66. - // we're always assuming utf-8, - // however, due to lossy decoding, the extension is preserved - // and things should not be totally bad for other encodings. - // we can tweak that when we see sth. else really used in the wild nowadays. - let desired_filename = if let Some(name) = desired_filename { - if let Some(name) = name.splitn(3, '\'').last() { - Some(percent_decode_str(&name).decode_utf8_lossy().to_string()) - } else { - None - } - } else { - None - }; + if desired_filename.is_none() { + desired_filename = mail.ctype.params.get("name").map(|s| s.to_string()); + } // If there is no filename, but part is an attachment, guess filename - if ct.disposition == DispositionType::Attachment && desired_filename.is_none() { + if desired_filename.is_none() && ct.disposition == DispositionType::Attachment { if let Some(subtype) = mail.ctype.mimetype.split('/').nth(1) { - Ok(Some(format!("file.{}", subtype,))) + desired_filename = Some(format!("file.{}", subtype,)); } else { bail!( "could not determine attachment filename: {:?}", ct.disposition ); - } - } else { - Ok(desired_filename) + }; } + + Ok(desired_filename) } /// Returned addresses are normalized and lowercased. @@ -1466,6 +1473,43 @@ mod tests { assert_eq!(filename, Some("test.html".to_string())) } + #[test] + fn test_get_attachment_filename_encoded_words() { + let raw = include_bytes!("../test-data/message/html_attach_encoded_words.eml"); + let mail = mailparse::parse_mail(raw).unwrap(); + assert!(get_attachment_filename(&mail).unwrap().is_none()); + assert!(get_attachment_filename(&mail.subparts[0]) + .unwrap() + .is_none()); + let filename = get_attachment_filename(&mail.subparts[1]).unwrap(); + assert_eq!(filename, Some("Maßnahmen Okt. 2020.html".to_string())) + } + + #[test] + fn test_get_attachment_filename_encoded_words_cont() { + // test continued encoded-words and also test apostropes work that way + let raw = include_bytes!("../test-data/message/html_attach_encoded_words_cont.eml"); + let mail = mailparse::parse_mail(raw).unwrap(); + assert!(get_attachment_filename(&mail).unwrap().is_none()); + assert!(get_attachment_filename(&mail.subparts[0]) + .unwrap() + .is_none()); + let filename = get_attachment_filename(&mail.subparts[1]).unwrap(); + assert_eq!(filename, Some("Maßn'ah'men Okt. 2020.html".to_string())) + } + + #[test] + fn test_get_attachment_filename_combined() { + let raw = include_bytes!("../test-data/message/html_attach_combined.eml"); + let mail = mailparse::parse_mail(raw).unwrap(); + assert!(get_attachment_filename(&mail).unwrap().is_none()); + assert!(get_attachment_filename(&mail.subparts[0]) + .unwrap() + .is_none()); + let filename = get_attachment_filename(&mail.subparts[1]).unwrap(); + assert_eq!(filename, Some("Maßnahmen Okt. 2020.html".to_string())) + } + #[test] fn test_mailparse_content_type() { let ctype = diff --git a/test-data/message/html_attach_combined.eml b/test-data/message/html_attach_combined.eml new file mode 100644 index 000000000..0ad450a58 --- /dev/null +++ b/test-data/message/html_attach_combined.eml @@ -0,0 +1,28 @@ +Chat-Disposition-Notification-To: tmp_6272287793210918@testrun.org +Subject: =?utf-8?q?Chat=3A_File_=E2=80=93_test=2Ehtml?= +Message-ID: 12345@testrun.org +Date: Sat, 07 Dec 2019 19:00:27 +0000 +X-Mailer: Kopano 8.7.16 +To: +From: "=?utf-8?q??=" +Content-Type: multipart/mixed; boundary="mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z" + + +--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z +Content-Type: text/plain; charset=utf-8 + +yip, filename may be given twice, +seen this way in Kopano + +-- +Sent with my Delta Chat Messenger: https://delta.chat + +--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z +Content-Type: text/html +Content-Disposition: attachment; filename="=?utf-8?Q?Ma=C3=9Fnahmen_Okt=2E_2020=2Ehtml?="; + filename*=utf-8''Ma%C3%9Fnahmen%20Okt.%202020.html +Content-Transfer-Encoding: base64 + +PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh + +--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z-- diff --git a/test-data/message/html_attach_encoded_words.eml b/test-data/message/html_attach_encoded_words.eml new file mode 100644 index 000000000..a64c738d6 --- /dev/null +++ b/test-data/message/html_attach_encoded_words.eml @@ -0,0 +1,25 @@ +Chat-Disposition-Notification-To: tmp_6272287793210918@testrun.org +Subject: =?utf-8?q?Chat=3A_File_=E2=80=93_test=2Ehtml?= +Message-ID: Mr.XA6y3og8-az.WGbH9_dNcQx@testrun.org +Date: Sat, 07 Dec 2019 19:00:27 +0000 +X-Mailer: Delta Chat Core 1.0.0-beta.12/DcFFI +Chat-Version: 1.0 +To: +From: "=?utf-8?q??=" +Content-Type: multipart/mixed; boundary="mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z" + + +--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z +Content-Type: text/plain; charset=utf-8 + +-- +Sent with my Delta Chat Messenger: https://delta.chat + +--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z +Content-Type: text/html +Content-Disposition: attachment; filename="=?utf-8?Q?Ma=C3=9Fnahmen_Okt=2E_2020=2Ehtml?="; +Content-Transfer-Encoding: base64 + +PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh + +--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z-- diff --git a/test-data/message/html_attach_encoded_words_cont.eml b/test-data/message/html_attach_encoded_words_cont.eml new file mode 100644 index 000000000..339a2ab4b --- /dev/null +++ b/test-data/message/html_attach_encoded_words_cont.eml @@ -0,0 +1,26 @@ +Chat-Disposition-Notification-To: tmp_6272287793210918@testrun.org +Subject: =?utf-8?q?Chat=3A_File_=E2=80=93_test=2Ehtml?= +Message-ID: Mr.XA6y3og8-az.WGbH9_dNcQx@testrun.org +Date: Sat, 07 Dec 2019 19:00:27 +0000 +X-Mailer: Delta Chat Core 1.0.0-beta.12/DcFFI +Chat-Version: 1.0 +To: +From: "=?utf-8?q??=" +Content-Type: multipart/mixed; boundary="mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z" + + +--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z +Content-Type: text/plain; charset=utf-8 + +-- +Sent with my Delta Chat Messenger: https://delta.chat + +--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z +Content-Type: text/html +Content-Disposition: attachment; filename*0="=?utf-8?Q?Ma=C3=9Fn'ah'men_?="; + filename*1="=?utf-8?Q?Okt=2E_2020=2Ehtml?="; +Content-Transfer-Encoding: base64 + +PGh0bWw+PGJvZHk+dGV4dDwvYm9keT5kYXRh + +--mwkNRwaJw1M5n2xcr2ODfAqvTjcj9Z--