From aec8332544e3a91312c4ad81045465b8b4bcbe4b Mon Sep 17 00:00:00 2001 From: link2xt Date: Sat, 17 Jul 2021 00:22:57 +0300 Subject: [PATCH] mimeparser: use mailparse to parse RFC 2231 filenames mailparse supports RFC 2231 since version 0.13.5, so there is no need for our own code to support it. --- Cargo.lock | 5 ++-- Cargo.toml | 3 +-- src/mimeparser.rs | 63 +++++------------------------------------------ 3 files changed, 9 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bf292c54c..d50133340 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1123,7 +1123,6 @@ dependencies = [ "base64 0.13.0", "bitflags", "byteorder", - "charset", "chrono", "criterion", "deltachat_derive", @@ -2185,9 +2184,9 @@ dependencies = [ [[package]] name = "mailparse" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62db73ff1a42b0e3a8858cf0d5c183bdfc23491f7294ae4a8200c83577457386" +checksum = "c06f526fc13a50f46a3689a6f438cb833c59817c898bb40a3954f341ddf74ce1" dependencies = [ "base64 0.13.0", "charset", diff --git a/Cargo.toml b/Cargo.toml index ef99ce8e4..c2dd7a59f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,6 @@ backtrace = "0.3.59" base64 = "0.13" bitflags = "1.1.0" byteorder = "1.3.1" -charset = "0.1" chrono = "0.4.6" dirs = { version = "3.0.2", optional=true } email = { git = "https://github.com/deltachat/rust-email", branch = "master" } @@ -42,7 +41,7 @@ kamadak-exif = "0.5" lettre_email = { git = "https://github.com/deltachat/lettre", branch = "master" } libc = "0.2.97" log = {version = "0.4.8", optional = true } -mailparse = "0.13.4" +mailparse = "0.13.5" native-tls = "0.2.3" num_cpus = "1.13.0" num-derive = "0.3.0" diff --git a/src/mimeparser.rs b/src/mimeparser.rs index 16472257b..c697f91bf 100644 --- a/src/mimeparser.rs +++ b/src/mimeparser.rs @@ -3,12 +3,10 @@ use std::future::Future; use std::pin::Pin; use anyhow::{bail, Result}; -use charset::Charset; use deltachat_derive::{FromSql, ToSql}; use lettre_email::mime::{self, Mime}; use mailparse::{addrparse_header, DispositionType, MailHeader, MailHeaderMap, SingleInfo}; use once_cell::sync::Lazy; -use percent_encoding::percent_decode_str; use crate::aheader::Aheader; use crate::blob::BlobObject; @@ -1510,55 +1508,13 @@ fn get_attachment_filename( // `Content-Disposition: ... filename=...` let mut desired_filename = ct.params.get("filename").map(|s| s.to_string()); - // try to get file name from - // `Content-Disposition: ... filename*0*=... filename*1*=... filename*2*=...` - // encoded as CHARSET'LANG'test%2E%70%64%66 (key ends with `*`) - // or as "encoded-words" (key does not end with `*`) if desired_filename.is_none() { - let mut apostrophe_encoded = false; - desired_filename = ct - .params - .iter() - .filter(|(key, _value)| key.starts_with("filename*")) - .fold(None, |acc, (key, value)| { - if key.ends_with('*') { - apostrophe_encoded = true; - } - if let Some(acc) = acc { - Some(acc + value) - } else { - Some(value.to_string()) - } - }); - if apostrophe_encoded { - if let Some(name) = desired_filename { - let mut parts = name.splitn(3, '\''); - desired_filename = - if let (Some(charset), Some(value)) = (parts.next(), parts.last()) { - let decoded_bytes = percent_decode_str(value); - if charset.to_lowercase() == "utf-8" { - Some(decoded_bytes.decode_utf8_lossy().to_string()) - } else { - // encoded_words crate say, latin-1 is not reported; moreover, latin1 is a good default - if let Some(charset) = Charset::for_label(charset.as_bytes()) - .or_else(|| Charset::for_label(b"latin1")) - { - let decoded_bytes = decoded_bytes.collect::>(); - let (utf8_str, _, _) = charset.decode(&*decoded_bytes); - Some(utf8_str.into()) - } else { - warn!(context, "latin1 encoding does not exist"); - None - } - } - } else { - warn!(context, "apostrophed encoding invalid: {}", name); - // be graceful and just use the original name. - // some MUA, including Delta Chat up to core1.50, - // use `filename*` mistakenly for simple encoded-words without following rfc2231 - Some(name) - } - } + if let Some(name) = ct.params.get("filename*").map(|s| s.to_string()) { + // be graceful and just use the original name. + // some MUA, including Delta Chat up to core1.50, + // use `filename*` mistakenly for simple encoded-words without following rfc2231 + warn!(context, "apostrophed encoding invalid: {}", name); + desired_filename = Some(name); } } @@ -1920,13 +1876,6 @@ mod tests { assert_eq!(filename, Some("Maßnahmen Okt. 2020.html".to_string())) } - #[test] - fn test_charset_latin1() { - // make sure, latin1 exists under this name - // as we're using it as default in get_attachment_filename() for non-utf-8 - assert!(Charset::for_label(b"latin1").is_some()); - } - #[test] fn test_mailparse_content_type() { let ctype =