Store mime_headers as BLOBs

Raw MIME messages may contain non-ASCII characters. Attempting to
store them as TEXT by using String::from_utf8_lossy results in
non-ASCII characters being replaced with Unicode U+FFFD "REPLACEMENT
CHARACTER" which is later incorrectly decoded when attempting to parse
`mime_headers` content into HTML.
This commit is contained in:
link2xt
2021-06-19 01:22:08 +03:00
parent c08df8d3da
commit a47c0486ae
6 changed files with 87 additions and 21 deletions

View File

@@ -170,15 +170,20 @@ pub(crate) trait Strdup {
unsafe fn strdup(&self) -> *mut libc::c_char;
}
impl<T: AsRef<str>> Strdup for T {
impl Strdup for str {
unsafe fn strdup(&self) -> *mut libc::c_char {
let tmp = CString::new_lossy(self.as_ref());
let tmp = CString::new_lossy(self);
dc_strdup(tmp.as_ptr())
}
}
// We can not implement for AsRef<OsStr> because we already implement
// AsRev<str> and this conflicts. So implement for Path directly.
impl Strdup for String {
unsafe fn strdup(&self) -> *mut libc::c_char {
let s: &str = self;
s.strdup()
}
}
impl Strdup for std::path::Path {
unsafe fn strdup(&self) -> *mut libc::c_char {
let tmp = self.to_c_string().unwrap_or_else(|_| CString::default());
@@ -186,6 +191,13 @@ impl Strdup for std::path::Path {
}
}
impl Strdup for [u8] {
unsafe fn strdup(&self) -> *mut libc::c_char {
let tmp = CString::new_lossy(self);
dc_strdup(tmp.as_ptr())
}
}
/// Convenience methods to turn optional strings into C strings.
///
/// This is the same as the [Strdup] trait but a different trait name

View File

@@ -940,12 +940,12 @@ async fn add_parts(
let mime_headers = if save_mime_headers || save_mime_modified {
if mime_parser.was_encrypted() && !mime_parser.decoded_data.is_empty() {
String::from_utf8_lossy(&mime_parser.decoded_data).to_string()
mime_parser.decoded_data.clone()
} else {
String::from_utf8_lossy(imf_raw).to_string()
imf_raw.to_vec()
}
} else {
"".into()
Vec::new()
};
let sent_timestamp = *sent_timestamp;
@@ -1048,9 +1048,9 @@ INSERT INTO msgs
part.bytes as isize,
is_hidden,
if (save_mime_headers || mime_modified) && !trash {
mime_headers.to_string()
mime_headers.clone()
} else {
"".to_string()
Vec::new()
},
mime_in_reply_to,
mime_references,
@@ -3659,8 +3659,9 @@ YEAAAAAA!.
assert_eq!(msg.get_text(), Some("hi!".to_string()));
assert!(!msg.get_showpadlock());
let mime = message::get_mime_headers(&bob, msg.id).await?;
assert!(mime.contains("Received:"));
assert!(mime.contains("From:"));
let mime_str = String::from_utf8_lossy(&mime);
assert!(mime_str.contains("Received:"));
assert!(mime_str.contains("From:"));
// another one, from bob to alice, that gets encrypted
let chat_bob = bob.create_chat(&alice).await;
@@ -3670,8 +3671,9 @@ YEAAAAAA!.
assert_eq!(msg.get_text(), Some("ho!".to_string()));
assert!(msg.get_showpadlock());
let mime = message::get_mime_headers(&alice, msg.id).await?;
assert!(mime.contains("Received:"));
assert!(mime.contains("From:"));
let mime_str = String::from_utf8_lossy(&mime);
assert!(mime_str.contains("Received:"));
assert!(mime_str.contains("From:"));
Ok(())
}

View File

@@ -248,7 +248,7 @@ impl MsgId {
let rawmime = message::get_mime_headers(context, self).await?;
if !rawmime.is_empty() {
match HtmlMsgParser::from_bytes(context, rawmime.as_bytes()).await {
match HtmlMsgParser::from_bytes(context, &rawmime).await {
Err(err) => {
warn!(context, "get_html: parser error: {}", err);
Ok(None)
@@ -424,10 +424,10 @@ test some special html-characters as &lt; &gt; and &amp; but also &quot; and &#x
}
#[async_std::test]
async fn test_get_html_empty() {
async fn test_get_html_invalid_msgid() {
let t = TestContext::new().await;
let msg_id = MsgId::new(100);
assert!(msg_id.get_html(&t).await.unwrap().is_none())
assert!(msg_id.get_html(&t).await.is_err())
}
#[async_std::test]
@@ -550,4 +550,26 @@ test some special html-characters as &lt; &gt; and &amp; but also &quot; and &#x
let html = msg.get_id().get_html(&bob).await.unwrap().unwrap();
assert!(html.contains("<b>html</b> text"));
}
#[async_std::test]
async fn test_cp1252_html() -> Result<()> {
let t = TestContext::new_alice().await;
t.set_config(Config::ShowEmails, Some("2")).await?;
dc_receive_imf(
&t,
include_bytes!("../test-data/message/cp1252-html.eml"),
"INBOX",
0,
false,
)
.await?;
let msg = t.get_last_msg().await;
assert_eq!(msg.viewtype, Viewtype::Text);
assert!(msg.text.as_ref().unwrap().contains("foo bar ä ö ü ß"));
assert!(msg.has_html());
let html = msg.get_id().get_html(&t).await?.unwrap();
println!("{}", html);
assert!(html.contains("foo bar ä ö ü ß"));
Ok(())
}
}

View File

@@ -7,6 +7,7 @@ use anyhow::{ensure, format_err, Result};
use async_std::path::{Path, PathBuf};
use deltachat_derive::{FromSql, ToSql};
use itertools::Itertools;
use rusqlite::types::ValueRef;
use serde::{Deserialize, Serialize};
use crate::chat::{self, Chat, ChatId};
@@ -1447,18 +1448,25 @@ pub fn guess_msgtype_from_suffix(path: &Path) -> Option<(Viewtype, &str)> {
/// only if `dc_set_config(context, "save_mime_headers", "1")`
/// was called before.
///
/// Returns an empty string if there are no headers saved for the given message,
/// Returns an empty vector if there are no headers saved for the given message,
/// e.g. because of save_mime_headers is not set
/// or the message is not incoming.
pub async fn get_mime_headers(context: &Context, msg_id: MsgId) -> Result<String> {
pub async fn get_mime_headers(context: &Context, msg_id: MsgId) -> Result<Vec<u8>> {
let headers = context
.sql
.query_get_value(
.query_row(
"SELECT mime_headers FROM msgs WHERE id=?;",
paramsv![msg_id],
|row| {
row.get(0).or_else(|err| match row.get_ref(0)? {
ValueRef::Null => Ok(Vec::new()),
ValueRef::Text(text) => Ok(text.to_vec()),
ValueRef::Blob(blob) => Ok(blob.to_vec()),
ValueRef::Integer(_) | ValueRef::Real(_) => Err(err),
})
},
)
.await?
.unwrap_or_default();
.await?;
Ok(headers)
}

View File

@@ -72,6 +72,10 @@ CREATE TABLE msgs (
timestamp_sent INTEGER DEFAULT 0,
timestamp_rcvd INTEGER DEFAULT 0,
hidden INTEGER DEFAULT 0,
-- mime_headers column actually contains BLOBs, i.e. it may
-- contain non-UTF8 MIME messages. TEXT was a bad choice, but
-- thanks to SQLite 3 being dynamically typed, there is no need to
-- change column type.
mime_headers TEXT,
mime_in_reply_to TEXT,
mime_references TEXT,

View File

@@ -0,0 +1,18 @@
Subject: test non-utf8 and dc_get_msg_html()
To: tunis4 <tunis4@testrun.org>
From: "B. Petersen" <bpetersen@b44t.com>
Message-ID: <00007126-1efa-290b-2120-200251f50f23@b44t.com>
Date: Thu, 27 May 2021 17:33:16 +0200
MIME-Version: 1.0
Content-Type: text/plain; charset=windows-1252; format=flowed
Content-Language: en-US
Content-Transfer-Encoding: 8bit
foo bar <20> <20> <20> <20>
-------- Forwarded Message --------
Subject: Foo
Date: Fri, 21 May 2021 08:42:20 +0000
just to force a "more button"