diff --git a/src/dc_dehtml.rs b/src/dc_dehtml.rs
index 049aaeda0..cdf66e824 100644
--- a/src/dc_dehtml.rs
+++ b/src/dc_dehtml.rs
@@ -2,9 +2,6 @@ use lazy_static::lazy_static;
use quick_xml;
use quick_xml::events::{BytesEnd, BytesStart, BytesText};
-use crate::dc_tools::*;
-use crate::x::*;
-
lazy_static! {
static ref LINE_RE: regex::Regex = regex::Regex::new(r"(\r?\n)+").unwrap();
}
@@ -24,19 +21,20 @@ enum AddText {
// dc_dehtml() returns way too many lineends; however, an optimisation on this issue is not needed as
// the lineends are typically remove in further processing by the caller
-pub unsafe fn dc_dehtml(buf_terminated: *mut libc::c_char) -> *mut libc::c_char {
- dc_trim(buf_terminated);
- if *buf_terminated.offset(0isize) as libc::c_int == 0i32 {
- return dc_strdup(b"\x00" as *const u8 as *const libc::c_char);
+pub fn dc_dehtml(buf_terminated: &str) -> String {
+ let buf_terminated = buf_terminated.trim();
+
+ if buf_terminated.is_empty() {
+ return "".into();
}
let mut dehtml = Dehtml {
- strbuilder: String::with_capacity(strlen(buf_terminated)),
+ strbuilder: String::with_capacity(buf_terminated.len()),
add_text: AddText::YesRemoveLineEnds,
last_href: None,
};
- let mut reader = quick_xml::Reader::from_str(as_str(buf_terminated));
+ let mut reader = quick_xml::Reader::from_str(buf_terminated);
let mut buf = Vec::new();
@@ -61,7 +59,7 @@ pub unsafe fn dc_dehtml(buf_terminated: *mut libc::c_char) -> *mut libc::c_char
buf.clear();
}
- dehtml.strbuilder.strdup()
+ dehtml.strbuilder
}
fn dehtml_text_cb(event: &BytesText, dehtml: &mut Dehtml) {
diff --git a/src/dc_mimeparser.rs b/src/dc_mimeparser.rs
index dd97633aa..118c61258 100644
--- a/src/dc_mimeparser.rs
+++ b/src/dc_mimeparser.rs
@@ -1154,28 +1154,30 @@ unsafe fn dc_mimeparser_add_single_part_if_known(
if ok_to_continue {
/* check header directly as is_send_by_messenger is not yet set up */
let is_msgrmsg =
- (!dc_mimeparser_lookup_optional_field(&mimeparser, "Chat-Version")
- .is_null()) as libc::c_int;
+ !dc_mimeparser_lookup_optional_field(&mimeparser, "Chat-Version")
+ .is_null();
- let simplified_txt = simplifier.unwrap().simplify(
- decoded_data,
- decoded_data_bytes as libc::c_int,
- mime_type == 70i32,
- is_msgrmsg,
- );
- if !simplified_txt.is_null()
- && 0 != *simplified_txt.offset(0isize) as libc::c_int
- {
+ let simplified_txt =
+ if decoded_data_bytes <= 0 || decoded_data.is_null() {
+ "".into()
+ } else {
+ let input_c = strndup(decoded_data, decoded_data_bytes as _);
+ let input = to_string_lossy(input_c);
+ let is_html = mime_type == 70;
+ free(input_c as *mut _);
+
+ simplifier.unwrap().simplify(&input, is_html, is_msgrmsg)
+ };
+ if !simplified_txt.is_empty() {
let mut part = dc_mimepart_new();
part.type_0 = 10i32;
part.int_mimetype = mime_type;
- part.msg = simplified_txt;
+ part.msg = simplified_txt.strdup();
part.msg_raw =
strndup(decoded_data, decoded_data_bytes as libc::c_ulong);
do_add_single_part(mimeparser, part);
- } else {
- free(simplified_txt as *mut libc::c_void);
}
+
if simplifier.unwrap().is_forwarded {
mimeparser.is_forwarded = 1i32
}
diff --git a/src/dc_simplify.rs b/src/dc_simplify.rs
index ea7e17691..8d8fe4ed7 100644
--- a/src/dc_simplify.rs
+++ b/src/dc_simplify.rs
@@ -1,64 +1,49 @@
use crate::dc_dehtml::*;
-use crate::dc_tools::*;
-use crate::x::*;
#[derive(Copy, Clone)]
pub struct Simplify {
pub is_forwarded: bool,
- pub is_cut_at_begin: bool,
- pub is_cut_at_end: bool,
+}
+
+/// Return index of footer line in vector of message lines, or vector length if
+/// no footer is found.
+///
+/// Also return whether not-standard (rfc3676, §4.3) footer is found.
+fn find_message_footer(lines: &[&str]) -> (usize, bool) {
+ for ix in 0..lines.len() {
+ let line = lines[ix];
+
+ // quoted-printable may encode `-- ` to `-- =20` which is converted
+ // back to `-- `
+ match line.as_ref() {
+ "-- " | "-- " => return (ix, false),
+ "--" | "---" | "----" => return (ix, true),
+ _ => (),
+ }
+ }
+ return (lines.len(), false);
}
impl Simplify {
pub fn new() -> Self {
Simplify {
is_forwarded: false,
- is_cut_at_begin: false,
- is_cut_at_end: false,
}
}
/// Simplify and normalise text: Remove quotes, signatures, unnecessary
/// lineends etc.
/// The data returned from simplify() must be free()'d when no longer used.
- pub unsafe fn simplify(
- &mut self,
- in_unterminated: *const libc::c_char,
- in_bytes: libc::c_int,
- is_html: bool,
- is_msgrmsg: libc::c_int,
- ) -> *mut libc::c_char {
- if in_bytes <= 0 {
- return "".strdup();
- }
+ pub fn simplify(&mut self, input: &str, is_html: bool, is_msgrmsg: bool) -> String {
+ let mut out = if is_html {
+ dc_dehtml(input)
+ } else {
+ input.to_string()
+ };
- /* create a copy of the given buffer */
- let mut out: *mut libc::c_char;
- let mut temp: *mut libc::c_char;
- self.is_forwarded = false;
- self.is_cut_at_begin = false;
- self.is_cut_at_end = false;
- out = strndup(
- in_unterminated as *mut libc::c_char,
- in_bytes as libc::c_ulong,
- );
- if out.is_null() {
- return dc_strdup(b"\x00" as *const u8 as *const libc::c_char);
- }
- if is_html {
- temp = dc_dehtml(out);
- if !temp.is_null() {
- free(out as *mut libc::c_void);
- out = temp
- }
- }
- dc_remove_cr_chars(out);
- temp = self.simplify_plain_text(out, is_msgrmsg);
- if !temp.is_null() {
- free(out as *mut libc::c_void);
- out = temp
- }
- dc_remove_cr_chars(out);
+ out.retain(|c| c != '\r');
+ out = self.simplify_plain_text(&out, is_msgrmsg);
+ out.retain(|c| c != '\r');
out
}
@@ -67,75 +52,48 @@ impl Simplify {
* Simplify Plain Text
*/
#[allow(non_snake_case)]
- unsafe fn simplify_plain_text(
- &mut self,
- buf_terminated: *const libc::c_char,
- is_msgrmsg: libc::c_int,
- ) -> *mut libc::c_char {
+ fn simplify_plain_text(&mut self, buf_terminated: &str, is_msgrmsg: bool) -> String {
/* This function ...
... removes all text after the line `-- ` (footer mark)
... removes full quotes at the beginning and at the end of the text -
these are all lines starting with the character `>`
... remove a non-empty line before the removed quote (contains sth. like "On 2.9.2016, Bjoern wrote:" in different formats and lanugages) */
/* split the given buffer into lines */
- let lines = dc_split_into_lines(buf_terminated);
+ let lines: Vec<_> = buf_terminated.split('\n').collect();
let mut l_first: usize = 0;
- let mut l_last = lines.len();
- let mut line: *mut libc::c_char;
- let mut footer_mark: libc::c_int = 0i32;
- for l in l_first..l_last {
- line = lines[l];
- if strcmp(line, b"-- \x00" as *const u8 as *const libc::c_char) == 0i32
- || strcmp(line, b"-- \x00" as *const u8 as *const libc::c_char) == 0i32
- {
- footer_mark = 1i32
- }
- if strcmp(line, b"--\x00" as *const u8 as *const libc::c_char) == 0i32
- || strcmp(line, b"---\x00" as *const u8 as *const libc::c_char) == 0i32
- || strcmp(line, b"----\x00" as *const u8 as *const libc::c_char) == 0i32
- {
- footer_mark = 1i32;
- self.is_cut_at_end = true
- }
- if 0 != footer_mark {
- l_last = l;
- /* done */
- break;
- }
- }
+ let mut is_cut_at_begin = false;
+ let (mut l_last, mut is_cut_at_end) = find_message_footer(&lines);
+
if l_last > l_first + 2 {
- let line0: *mut libc::c_char = lines[l_first];
- let line1: *mut libc::c_char = lines[l_first + 1];
- let line2: *mut libc::c_char = lines[l_first + 2];
- if strcmp(
- line0,
- b"---------- Forwarded message ----------\x00" as *const u8 as *const libc::c_char,
- ) == 0i32
- && strncmp(line1, b"From: \x00" as *const u8 as *const libc::c_char, 6) == 0i32
- && *line2.offset(0isize) as libc::c_int == 0i32
+ let line0 = lines[l_first];
+ let line1 = lines[l_first + 1];
+ let line2 = lines[l_first + 2];
+ if line0 == "---------- Forwarded message ----------"
+ && line1.starts_with("From: ")
+ && line2.is_empty()
{
self.is_forwarded = true;
l_first += 3
}
}
for l in l_first..l_last {
- line = lines[l];
- if strncmp(line, b"-----\x00" as *const u8 as *const libc::c_char, 5) == 0i32
- || strncmp(line, b"_____\x00" as *const u8 as *const libc::c_char, 5) == 0i32
- || strncmp(line, b"=====\x00" as *const u8 as *const libc::c_char, 5) == 0i32
- || strncmp(line, b"*****\x00" as *const u8 as *const libc::c_char, 5) == 0i32
- || strncmp(line, b"~~~~~\x00" as *const u8 as *const libc::c_char, 5) == 0i32
+ let line = lines[l];
+ if line == "-----"
+ || line == "_____"
+ || line == "====="
+ || line == "*****"
+ || line == "~~~~~"
{
l_last = l;
- self.is_cut_at_end = true;
+ is_cut_at_end = true;
/* done */
break;
}
}
- if 0 == is_msgrmsg {
+ if !is_msgrmsg {
let mut l_lastQuotedLine = None;
for l in (l_first..l_last).rev() {
- line = lines[l];
+ let line = lines[l];
if is_plain_quote(line) {
l_lastQuotedLine = Some(l)
} else if !is_empty_line(line) {
@@ -144,25 +102,25 @@ impl Simplify {
}
if l_lastQuotedLine.is_some() {
l_last = l_lastQuotedLine.unwrap();
- self.is_cut_at_end = true;
+ is_cut_at_end = true;
if l_last > 1 {
if is_empty_line(lines[l_last - 1]) {
l_last -= 1
}
}
if l_last > 1 {
- line = lines[l_last - 1];
+ let line = lines[l_last - 1];
if is_quoted_headline(line) {
l_last -= 1
}
}
}
}
- if 0 == is_msgrmsg {
+ if !is_msgrmsg {
let mut l_lastQuotedLine_0 = None;
let mut hasQuotedHeadline = 0;
for l in l_first..l_last {
- line = lines[l];
+ let line = lines[l];
if is_plain_quote(line) {
l_lastQuotedLine_0 = Some(l)
} else if !is_empty_line(line) {
@@ -179,19 +137,19 @@ impl Simplify {
}
if l_lastQuotedLine_0.is_some() {
l_first = l_lastQuotedLine_0.unwrap() + 1;
- self.is_cut_at_begin = true
+ is_cut_at_begin = true
}
}
/* re-create buffer from the remaining lines */
let mut ret = String::new();
- if self.is_cut_at_begin {
+ if is_cut_at_begin {
ret += "[...]";
}
/* we write empty lines only in case and non-empty line follows */
let mut pending_linebreaks: libc::c_int = 0i32;
let mut content_lines_added: libc::c_int = 0i32;
for l in l_first..l_last {
- line = lines[l];
+ let line = lines[l];
if is_empty_line(line) {
pending_linebreaks += 1
} else {
@@ -205,142 +163,105 @@ impl Simplify {
}
}
// the incoming message might contain invalid UTF8
- ret += &to_string_lossy(line);
+ ret += line;
content_lines_added += 1;
pending_linebreaks = 1i32
}
}
- if self.is_cut_at_end && (!self.is_cut_at_begin || 0 != content_lines_added) {
+ if is_cut_at_end && (!is_cut_at_begin || 0 != content_lines_added) {
ret += " [...]";
}
- dc_free_splitted_lines(lines);
- ret.strdup()
+ ret
}
}
/**
* Tools
*/
-unsafe fn is_empty_line(buf: *const libc::c_char) -> bool {
- /* force unsigned - otherwise the `> ' '` comparison will fail */
- let mut p1: *const libc::c_uchar = buf as *const libc::c_uchar;
- while 0 != *p1 {
- if *p1 as libc::c_int > ' ' as i32 {
+fn is_empty_line(buf: &str) -> bool {
+ // XXX: can it be simplified to buf.chars().all(|c| c.is_whitespace())?
+ //
+ // Strictly speaking, it is not equivalent (^A is not whitespace, but less than ' '),
+ // but having control sequences in email body?!
+ //
+ // See discussion at: https://github.com/deltachat/deltachat-core-rust/pull/402#discussion_r317062392
+ for c in buf.chars() {
+ if c > ' ' {
return false;
}
- p1 = p1.offset(1isize)
}
true
}
-unsafe fn is_quoted_headline(buf: *const libc::c_char) -> bool {
+fn is_quoted_headline(buf: &str) -> bool {
/* This function may be called for the line _directly_ before a quote.
The function checks if the line contains sth. like "On 01.02.2016, xy@z wrote:" in various languages.
- Currently, we simply check if the last character is a ':'.
- Checking for the existence of an email address may fail (headlines may show the user's name instead of the address) */
- let buf_len: libc::c_int = strlen(buf) as libc::c_int;
- if buf_len > 80i32 {
- return false;
- }
- if buf_len > 0i32 && *buf.offset((buf_len - 1i32) as isize) as libc::c_int == ':' as i32 {
- return true;
- }
- false
+ buf.len() <= 80 && buf.ends_with(':')
}
-unsafe fn is_plain_quote(buf: *const libc::c_char) -> bool {
- if *buf.offset(0isize) as libc::c_int == '>' as i32 {
- return true;
- }
-
- false
+fn is_plain_quote(buf: &str) -> bool {
+ buf.starts_with(">")
}
#[cfg(test)]
mod tests {
use super::*;
- use std::ffi::CStr;
#[test]
fn test_simplify_trim() {
- unsafe {
- let mut simplify = Simplify::new();
- let html: *const libc::c_char =
- b"\r\r\nline1
\r\n\r\n\r\rline2\n\r\x00" as *const u8 as *const libc::c_char;
- let plain: *mut libc::c_char =
- simplify.simplify(html, strlen(html) as libc::c_int, true, 0);
+ let mut simplify = Simplify::new();
+ let html = "\r\r\nline1
\r\n\r\n\r\rline2\n\r";
+ let plain = simplify.simplify(html, true, false);
- assert_eq!(
- CStr::from_ptr(plain as *const libc::c_char)
- .to_str()
- .unwrap(),
- "line1\nline2",
- );
-
- free(plain as *mut libc::c_void);
- }
+ assert_eq!(plain, "line1\nline2");
}
#[test]
fn test_simplify_parse_href() {
- unsafe {
- let mut simplify = Simplify::new();
- let html: *const libc::c_char =
- b"texttext]>text bold]]>\x00"
- as *const u8 as *const libc::c_char;
- let plain = simplify.simplify(html, strlen(html) as libc::c_int, true, 0);
+ let mut simplify = Simplify::new();
+ let html = "]>text bold]]>";
+ let plain = simplify.simplify(html, true, false);
- assert_eq!(
- CStr::from_ptr(plain as *const libc::c_char)
- .to_str()
- .unwrap(),
- "text *bold*<>",
- );
-
- free(plain as *mut libc::c_void);
- }
+ assert_eq!(plain, "text *bold*<>");
}
#[test]
fn test_simplify_html_encoded() {
- unsafe {
- let mut simplify = Simplify::new();
- let html =
- b"<>"'& äÄöÖüÜß fooÆçÇ ♦&noent;\x00"
- as *const u8 as *const libc::c_char;
- let plain = simplify.simplify(html, strlen(html) as libc::c_int, true, 0);
+ let mut simplify = Simplify::new();
+ let html =
+ "<>"'& äÄöÖüÜß fooÆçÇ ♦&noent;";
- assert_eq!(
- CStr::from_ptr(plain as *const libc::c_char)
- .to_str()
- .unwrap(),
- "<>\"\'& äÄöÖüÜß fooÆçÇ \u{2666}\u{200e}\u{200f}\u{200c}&noent;\u{200d}"
- );
+ let plain = simplify.simplify(html, true, false);
- free(plain as *mut libc::c_void);
- }
+ assert_eq!(
+ plain,
+ "<>\"\'& äÄöÖüÜß fooÆçÇ \u{2666}\u{200e}\u{200f}\u{200c}&noent;\u{200d}"
+ );
+ }
+
+ #[test]
+ fn test_simplify_utilities() {
+ assert!(is_empty_line(" \t"));
+ assert!(is_empty_line(""));
+ assert!(is_empty_line(" \r"));
+ assert!(!is_empty_line(" x"));
+ assert!(is_plain_quote("> hello world"));
+ assert!(is_plain_quote(">>"));
+ assert!(!is_plain_quote("Life is pain"));
+ assert!(!is_plain_quote(""));
}
}
diff --git a/src/dc_tools.rs b/src/dc_tools.rs
index b500fcc55..0ef03eb20 100644
--- a/src/dc_tools.rs
+++ b/src/dc_tools.rs
@@ -355,33 +355,6 @@ unsafe fn dc_utf8_strnlen(s: *const libc::c_char, n: size_t) -> size_t {
j
}
-/* split string into lines*/
-pub unsafe fn dc_split_into_lines(buf_terminated: *const libc::c_char) -> Vec<*mut libc::c_char> {
- let mut lines = Vec::new();
- let mut line_chars = 0;
- let mut p1: *const libc::c_char = buf_terminated;
- let mut line_start: *const libc::c_char = p1;
- while 0 != *p1 {
- if *p1 as libc::c_int == '\n' as i32 {
- lines.push(strndup(line_start, line_chars));
- p1 = p1.offset(1isize);
- line_start = p1;
- line_chars = 0;
- } else {
- p1 = p1.offset(1isize);
- line_chars += 1;
- }
- }
- lines.push(strndup(line_start, line_chars));
- lines
-}
-
-pub unsafe fn dc_free_splitted_lines(lines: Vec<*mut libc::c_char>) {
- for s in lines {
- free(s as *mut libc::c_void);
- }
-}
-
pub unsafe fn dc_str_from_clist(
list: *const clist,
delimiter: *const libc::c_char,