mirror of
https://github.com/chatmail/core.git
synced 2026-05-08 01:16:31 +03:00
refactor: make dc_dehtml() function safe
* Make dc_dehtml() function safe
* Change type of is_msgrmsg parameter to bool
* Narrow type of local variable in simplify_plain_text()
* Export less fields of `Simplify' record
* Demote is_cut_* from fields of `Simplify' to local variables
* Refactor part of simplify_plain_text()
Refactor footer ("-- " and similar) code into separate function,
and re-implement it with standard Rust string methods.
It simplifies code and allows removing one mutable local variable.
* Replace dc_split_into_lines with String.split()
* src/dc_simplify.rs(find_message_footer): adjust type signature to accept
slice of &str, not slice of pointers
* src/dc_simplify.rs(simplify_plain_text): adjust code to use '==' operator
instead of strcmp(3).
* src/dc_simplify.rs(is_empty_line, is_quoted_headline, is_plain_quote):
+ adjust type signatures to accept &str, not 'const char *'
+ remove no longer needed 'unsafe' qualifier
* src/dc_tools(dc_split_into_lines, dc_free_splitted_lines): remove no longer
used functions.
In addition to additional type-safety, this change reduces number of
allocations: String.split returns iterator of &str.
* Make simplify_plain_text() safe
* Make Simplify.simplify return String, not pointer
* Refactor Simplify.simplify to use String methods, not pointers
* Make Simplify.simplify() safe
* Avoid neeless allocation in Simplify.simplify when input is html
* Add tests for simplify utilities
* Document discussion about is_empty_line() discussion
This commit is contained in:
committed by
Friedel Ziegelmayer
parent
8a73f84003
commit
d7d7147549
@@ -2,9 +2,6 @@ use lazy_static::lazy_static;
|
|||||||
use quick_xml;
|
use quick_xml;
|
||||||
use quick_xml::events::{BytesEnd, BytesStart, BytesText};
|
use quick_xml::events::{BytesEnd, BytesStart, BytesText};
|
||||||
|
|
||||||
use crate::dc_tools::*;
|
|
||||||
use crate::x::*;
|
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref LINE_RE: regex::Regex = regex::Regex::new(r"(\r?\n)+").unwrap();
|
static ref LINE_RE: regex::Regex = regex::Regex::new(r"(\r?\n)+").unwrap();
|
||||||
}
|
}
|
||||||
@@ -24,19 +21,20 @@ enum AddText {
|
|||||||
|
|
||||||
// dc_dehtml() returns way too many lineends; however, an optimisation on this issue is not needed as
|
// dc_dehtml() returns way too many lineends; however, an optimisation on this issue is not needed as
|
||||||
// the lineends are typically remove in further processing by the caller
|
// the lineends are typically remove in further processing by the caller
|
||||||
pub unsafe fn dc_dehtml(buf_terminated: *mut libc::c_char) -> *mut libc::c_char {
|
pub fn dc_dehtml(buf_terminated: &str) -> String {
|
||||||
dc_trim(buf_terminated);
|
let buf_terminated = buf_terminated.trim();
|
||||||
if *buf_terminated.offset(0isize) as libc::c_int == 0i32 {
|
|
||||||
return dc_strdup(b"\x00" as *const u8 as *const libc::c_char);
|
if buf_terminated.is_empty() {
|
||||||
|
return "".into();
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut dehtml = Dehtml {
|
let mut dehtml = Dehtml {
|
||||||
strbuilder: String::with_capacity(strlen(buf_terminated)),
|
strbuilder: String::with_capacity(buf_terminated.len()),
|
||||||
add_text: AddText::YesRemoveLineEnds,
|
add_text: AddText::YesRemoveLineEnds,
|
||||||
last_href: None,
|
last_href: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut reader = quick_xml::Reader::from_str(as_str(buf_terminated));
|
let mut reader = quick_xml::Reader::from_str(buf_terminated);
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut buf = Vec::new();
|
||||||
|
|
||||||
@@ -61,7 +59,7 @@ pub unsafe fn dc_dehtml(buf_terminated: *mut libc::c_char) -> *mut libc::c_char
|
|||||||
buf.clear();
|
buf.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
dehtml.strbuilder.strdup()
|
dehtml.strbuilder
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dehtml_text_cb(event: &BytesText, dehtml: &mut Dehtml) {
|
fn dehtml_text_cb(event: &BytesText, dehtml: &mut Dehtml) {
|
||||||
|
|||||||
@@ -1154,28 +1154,30 @@ unsafe fn dc_mimeparser_add_single_part_if_known(
|
|||||||
if ok_to_continue {
|
if ok_to_continue {
|
||||||
/* check header directly as is_send_by_messenger is not yet set up */
|
/* check header directly as is_send_by_messenger is not yet set up */
|
||||||
let is_msgrmsg =
|
let is_msgrmsg =
|
||||||
(!dc_mimeparser_lookup_optional_field(&mimeparser, "Chat-Version")
|
!dc_mimeparser_lookup_optional_field(&mimeparser, "Chat-Version")
|
||||||
.is_null()) as libc::c_int;
|
.is_null();
|
||||||
|
|
||||||
let simplified_txt = simplifier.unwrap().simplify(
|
let simplified_txt =
|
||||||
decoded_data,
|
if decoded_data_bytes <= 0 || decoded_data.is_null() {
|
||||||
decoded_data_bytes as libc::c_int,
|
"".into()
|
||||||
mime_type == 70i32,
|
} else {
|
||||||
is_msgrmsg,
|
let input_c = strndup(decoded_data, decoded_data_bytes as _);
|
||||||
);
|
let input = to_string_lossy(input_c);
|
||||||
if !simplified_txt.is_null()
|
let is_html = mime_type == 70;
|
||||||
&& 0 != *simplified_txt.offset(0isize) as libc::c_int
|
free(input_c as *mut _);
|
||||||
{
|
|
||||||
|
simplifier.unwrap().simplify(&input, is_html, is_msgrmsg)
|
||||||
|
};
|
||||||
|
if !simplified_txt.is_empty() {
|
||||||
let mut part = dc_mimepart_new();
|
let mut part = dc_mimepart_new();
|
||||||
part.type_0 = 10i32;
|
part.type_0 = 10i32;
|
||||||
part.int_mimetype = mime_type;
|
part.int_mimetype = mime_type;
|
||||||
part.msg = simplified_txt;
|
part.msg = simplified_txt.strdup();
|
||||||
part.msg_raw =
|
part.msg_raw =
|
||||||
strndup(decoded_data, decoded_data_bytes as libc::c_ulong);
|
strndup(decoded_data, decoded_data_bytes as libc::c_ulong);
|
||||||
do_add_single_part(mimeparser, part);
|
do_add_single_part(mimeparser, part);
|
||||||
} else {
|
|
||||||
free(simplified_txt as *mut libc::c_void);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if simplifier.unwrap().is_forwarded {
|
if simplifier.unwrap().is_forwarded {
|
||||||
mimeparser.is_forwarded = 1i32
|
mimeparser.is_forwarded = 1i32
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,64 +1,49 @@
|
|||||||
use crate::dc_dehtml::*;
|
use crate::dc_dehtml::*;
|
||||||
use crate::dc_tools::*;
|
|
||||||
use crate::x::*;
|
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
pub struct Simplify {
|
pub struct Simplify {
|
||||||
pub is_forwarded: bool,
|
pub is_forwarded: bool,
|
||||||
pub is_cut_at_begin: bool,
|
}
|
||||||
pub is_cut_at_end: bool,
|
|
||||||
|
/// Return index of footer line in vector of message lines, or vector length if
|
||||||
|
/// no footer is found.
|
||||||
|
///
|
||||||
|
/// Also return whether not-standard (rfc3676, §4.3) footer is found.
|
||||||
|
fn find_message_footer(lines: &[&str]) -> (usize, bool) {
|
||||||
|
for ix in 0..lines.len() {
|
||||||
|
let line = lines[ix];
|
||||||
|
|
||||||
|
// quoted-printable may encode `-- ` to `-- =20` which is converted
|
||||||
|
// back to `-- `
|
||||||
|
match line.as_ref() {
|
||||||
|
"-- " | "-- " => return (ix, false),
|
||||||
|
"--" | "---" | "----" => return (ix, true),
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (lines.len(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Simplify {
|
impl Simplify {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Simplify {
|
Simplify {
|
||||||
is_forwarded: false,
|
is_forwarded: false,
|
||||||
is_cut_at_begin: false,
|
|
||||||
is_cut_at_end: false,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Simplify and normalise text: Remove quotes, signatures, unnecessary
|
/// Simplify and normalise text: Remove quotes, signatures, unnecessary
|
||||||
/// lineends etc.
|
/// lineends etc.
|
||||||
/// The data returned from simplify() must be free()'d when no longer used.
|
/// The data returned from simplify() must be free()'d when no longer used.
|
||||||
pub unsafe fn simplify(
|
pub fn simplify(&mut self, input: &str, is_html: bool, is_msgrmsg: bool) -> String {
|
||||||
&mut self,
|
let mut out = if is_html {
|
||||||
in_unterminated: *const libc::c_char,
|
dc_dehtml(input)
|
||||||
in_bytes: libc::c_int,
|
} else {
|
||||||
is_html: bool,
|
input.to_string()
|
||||||
is_msgrmsg: libc::c_int,
|
};
|
||||||
) -> *mut libc::c_char {
|
|
||||||
if in_bytes <= 0 {
|
|
||||||
return "".strdup();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* create a copy of the given buffer */
|
out.retain(|c| c != '\r');
|
||||||
let mut out: *mut libc::c_char;
|
out = self.simplify_plain_text(&out, is_msgrmsg);
|
||||||
let mut temp: *mut libc::c_char;
|
out.retain(|c| c != '\r');
|
||||||
self.is_forwarded = false;
|
|
||||||
self.is_cut_at_begin = false;
|
|
||||||
self.is_cut_at_end = false;
|
|
||||||
out = strndup(
|
|
||||||
in_unterminated as *mut libc::c_char,
|
|
||||||
in_bytes as libc::c_ulong,
|
|
||||||
);
|
|
||||||
if out.is_null() {
|
|
||||||
return dc_strdup(b"\x00" as *const u8 as *const libc::c_char);
|
|
||||||
}
|
|
||||||
if is_html {
|
|
||||||
temp = dc_dehtml(out);
|
|
||||||
if !temp.is_null() {
|
|
||||||
free(out as *mut libc::c_void);
|
|
||||||
out = temp
|
|
||||||
}
|
|
||||||
}
|
|
||||||
dc_remove_cr_chars(out);
|
|
||||||
temp = self.simplify_plain_text(out, is_msgrmsg);
|
|
||||||
if !temp.is_null() {
|
|
||||||
free(out as *mut libc::c_void);
|
|
||||||
out = temp
|
|
||||||
}
|
|
||||||
dc_remove_cr_chars(out);
|
|
||||||
|
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
@@ -67,75 +52,48 @@ impl Simplify {
|
|||||||
* Simplify Plain Text
|
* Simplify Plain Text
|
||||||
*/
|
*/
|
||||||
#[allow(non_snake_case)]
|
#[allow(non_snake_case)]
|
||||||
unsafe fn simplify_plain_text(
|
fn simplify_plain_text(&mut self, buf_terminated: &str, is_msgrmsg: bool) -> String {
|
||||||
&mut self,
|
|
||||||
buf_terminated: *const libc::c_char,
|
|
||||||
is_msgrmsg: libc::c_int,
|
|
||||||
) -> *mut libc::c_char {
|
|
||||||
/* This function ...
|
/* This function ...
|
||||||
... removes all text after the line `-- ` (footer mark)
|
... removes all text after the line `-- ` (footer mark)
|
||||||
... removes full quotes at the beginning and at the end of the text -
|
... removes full quotes at the beginning and at the end of the text -
|
||||||
these are all lines starting with the character `>`
|
these are all lines starting with the character `>`
|
||||||
... remove a non-empty line before the removed quote (contains sth. like "On 2.9.2016, Bjoern wrote:" in different formats and lanugages) */
|
... remove a non-empty line before the removed quote (contains sth. like "On 2.9.2016, Bjoern wrote:" in different formats and lanugages) */
|
||||||
/* split the given buffer into lines */
|
/* split the given buffer into lines */
|
||||||
let lines = dc_split_into_lines(buf_terminated);
|
let lines: Vec<_> = buf_terminated.split('\n').collect();
|
||||||
let mut l_first: usize = 0;
|
let mut l_first: usize = 0;
|
||||||
let mut l_last = lines.len();
|
let mut is_cut_at_begin = false;
|
||||||
let mut line: *mut libc::c_char;
|
let (mut l_last, mut is_cut_at_end) = find_message_footer(&lines);
|
||||||
let mut footer_mark: libc::c_int = 0i32;
|
|
||||||
for l in l_first..l_last {
|
|
||||||
line = lines[l];
|
|
||||||
if strcmp(line, b"-- \x00" as *const u8 as *const libc::c_char) == 0i32
|
|
||||||
|| strcmp(line, b"-- \x00" as *const u8 as *const libc::c_char) == 0i32
|
|
||||||
{
|
|
||||||
footer_mark = 1i32
|
|
||||||
}
|
|
||||||
if strcmp(line, b"--\x00" as *const u8 as *const libc::c_char) == 0i32
|
|
||||||
|| strcmp(line, b"---\x00" as *const u8 as *const libc::c_char) == 0i32
|
|
||||||
|| strcmp(line, b"----\x00" as *const u8 as *const libc::c_char) == 0i32
|
|
||||||
{
|
|
||||||
footer_mark = 1i32;
|
|
||||||
self.is_cut_at_end = true
|
|
||||||
}
|
|
||||||
if 0 != footer_mark {
|
|
||||||
l_last = l;
|
|
||||||
/* done */
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if l_last > l_first + 2 {
|
if l_last > l_first + 2 {
|
||||||
let line0: *mut libc::c_char = lines[l_first];
|
let line0 = lines[l_first];
|
||||||
let line1: *mut libc::c_char = lines[l_first + 1];
|
let line1 = lines[l_first + 1];
|
||||||
let line2: *mut libc::c_char = lines[l_first + 2];
|
let line2 = lines[l_first + 2];
|
||||||
if strcmp(
|
if line0 == "---------- Forwarded message ----------"
|
||||||
line0,
|
&& line1.starts_with("From: ")
|
||||||
b"---------- Forwarded message ----------\x00" as *const u8 as *const libc::c_char,
|
&& line2.is_empty()
|
||||||
) == 0i32
|
|
||||||
&& strncmp(line1, b"From: \x00" as *const u8 as *const libc::c_char, 6) == 0i32
|
|
||||||
&& *line2.offset(0isize) as libc::c_int == 0i32
|
|
||||||
{
|
{
|
||||||
self.is_forwarded = true;
|
self.is_forwarded = true;
|
||||||
l_first += 3
|
l_first += 3
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for l in l_first..l_last {
|
for l in l_first..l_last {
|
||||||
line = lines[l];
|
let line = lines[l];
|
||||||
if strncmp(line, b"-----\x00" as *const u8 as *const libc::c_char, 5) == 0i32
|
if line == "-----"
|
||||||
|| strncmp(line, b"_____\x00" as *const u8 as *const libc::c_char, 5) == 0i32
|
|| line == "_____"
|
||||||
|| strncmp(line, b"=====\x00" as *const u8 as *const libc::c_char, 5) == 0i32
|
|| line == "====="
|
||||||
|| strncmp(line, b"*****\x00" as *const u8 as *const libc::c_char, 5) == 0i32
|
|| line == "*****"
|
||||||
|| strncmp(line, b"~~~~~\x00" as *const u8 as *const libc::c_char, 5) == 0i32
|
|| line == "~~~~~"
|
||||||
{
|
{
|
||||||
l_last = l;
|
l_last = l;
|
||||||
self.is_cut_at_end = true;
|
is_cut_at_end = true;
|
||||||
/* done */
|
/* done */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if 0 == is_msgrmsg {
|
if !is_msgrmsg {
|
||||||
let mut l_lastQuotedLine = None;
|
let mut l_lastQuotedLine = None;
|
||||||
for l in (l_first..l_last).rev() {
|
for l in (l_first..l_last).rev() {
|
||||||
line = lines[l];
|
let line = lines[l];
|
||||||
if is_plain_quote(line) {
|
if is_plain_quote(line) {
|
||||||
l_lastQuotedLine = Some(l)
|
l_lastQuotedLine = Some(l)
|
||||||
} else if !is_empty_line(line) {
|
} else if !is_empty_line(line) {
|
||||||
@@ -144,25 +102,25 @@ impl Simplify {
|
|||||||
}
|
}
|
||||||
if l_lastQuotedLine.is_some() {
|
if l_lastQuotedLine.is_some() {
|
||||||
l_last = l_lastQuotedLine.unwrap();
|
l_last = l_lastQuotedLine.unwrap();
|
||||||
self.is_cut_at_end = true;
|
is_cut_at_end = true;
|
||||||
if l_last > 1 {
|
if l_last > 1 {
|
||||||
if is_empty_line(lines[l_last - 1]) {
|
if is_empty_line(lines[l_last - 1]) {
|
||||||
l_last -= 1
|
l_last -= 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if l_last > 1 {
|
if l_last > 1 {
|
||||||
line = lines[l_last - 1];
|
let line = lines[l_last - 1];
|
||||||
if is_quoted_headline(line) {
|
if is_quoted_headline(line) {
|
||||||
l_last -= 1
|
l_last -= 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if 0 == is_msgrmsg {
|
if !is_msgrmsg {
|
||||||
let mut l_lastQuotedLine_0 = None;
|
let mut l_lastQuotedLine_0 = None;
|
||||||
let mut hasQuotedHeadline = 0;
|
let mut hasQuotedHeadline = 0;
|
||||||
for l in l_first..l_last {
|
for l in l_first..l_last {
|
||||||
line = lines[l];
|
let line = lines[l];
|
||||||
if is_plain_quote(line) {
|
if is_plain_quote(line) {
|
||||||
l_lastQuotedLine_0 = Some(l)
|
l_lastQuotedLine_0 = Some(l)
|
||||||
} else if !is_empty_line(line) {
|
} else if !is_empty_line(line) {
|
||||||
@@ -179,19 +137,19 @@ impl Simplify {
|
|||||||
}
|
}
|
||||||
if l_lastQuotedLine_0.is_some() {
|
if l_lastQuotedLine_0.is_some() {
|
||||||
l_first = l_lastQuotedLine_0.unwrap() + 1;
|
l_first = l_lastQuotedLine_0.unwrap() + 1;
|
||||||
self.is_cut_at_begin = true
|
is_cut_at_begin = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* re-create buffer from the remaining lines */
|
/* re-create buffer from the remaining lines */
|
||||||
let mut ret = String::new();
|
let mut ret = String::new();
|
||||||
if self.is_cut_at_begin {
|
if is_cut_at_begin {
|
||||||
ret += "[...]";
|
ret += "[...]";
|
||||||
}
|
}
|
||||||
/* we write empty lines only in case and non-empty line follows */
|
/* we write empty lines only in case and non-empty line follows */
|
||||||
let mut pending_linebreaks: libc::c_int = 0i32;
|
let mut pending_linebreaks: libc::c_int = 0i32;
|
||||||
let mut content_lines_added: libc::c_int = 0i32;
|
let mut content_lines_added: libc::c_int = 0i32;
|
||||||
for l in l_first..l_last {
|
for l in l_first..l_last {
|
||||||
line = lines[l];
|
let line = lines[l];
|
||||||
if is_empty_line(line) {
|
if is_empty_line(line) {
|
||||||
pending_linebreaks += 1
|
pending_linebreaks += 1
|
||||||
} else {
|
} else {
|
||||||
@@ -205,142 +163,105 @@ impl Simplify {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// the incoming message might contain invalid UTF8
|
// the incoming message might contain invalid UTF8
|
||||||
ret += &to_string_lossy(line);
|
ret += line;
|
||||||
content_lines_added += 1;
|
content_lines_added += 1;
|
||||||
pending_linebreaks = 1i32
|
pending_linebreaks = 1i32
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if self.is_cut_at_end && (!self.is_cut_at_begin || 0 != content_lines_added) {
|
if is_cut_at_end && (!is_cut_at_begin || 0 != content_lines_added) {
|
||||||
ret += " [...]";
|
ret += " [...]";
|
||||||
}
|
}
|
||||||
dc_free_splitted_lines(lines);
|
|
||||||
|
|
||||||
ret.strdup()
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tools
|
* Tools
|
||||||
*/
|
*/
|
||||||
unsafe fn is_empty_line(buf: *const libc::c_char) -> bool {
|
fn is_empty_line(buf: &str) -> bool {
|
||||||
/* force unsigned - otherwise the `> ' '` comparison will fail */
|
// XXX: can it be simplified to buf.chars().all(|c| c.is_whitespace())?
|
||||||
let mut p1: *const libc::c_uchar = buf as *const libc::c_uchar;
|
//
|
||||||
while 0 != *p1 {
|
// Strictly speaking, it is not equivalent (^A is not whitespace, but less than ' '),
|
||||||
if *p1 as libc::c_int > ' ' as i32 {
|
// but having control sequences in email body?!
|
||||||
|
//
|
||||||
|
// See discussion at: https://github.com/deltachat/deltachat-core-rust/pull/402#discussion_r317062392
|
||||||
|
for c in buf.chars() {
|
||||||
|
if c > ' ' {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
p1 = p1.offset(1isize)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn is_quoted_headline(buf: *const libc::c_char) -> bool {
|
fn is_quoted_headline(buf: &str) -> bool {
|
||||||
/* This function may be called for the line _directly_ before a quote.
|
/* This function may be called for the line _directly_ before a quote.
|
||||||
The function checks if the line contains sth. like "On 01.02.2016, xy@z wrote:" in various languages.
|
The function checks if the line contains sth. like "On 01.02.2016, xy@z wrote:" in various languages.
|
||||||
- Currently, we simply check if the last character is a ':'.
|
- Currently, we simply check if the last character is a ':'.
|
||||||
- Checking for the existence of an email address may fail (headlines may show the user's name instead of the address) */
|
- Checking for the existence of an email address may fail (headlines may show the user's name instead of the address) */
|
||||||
let buf_len: libc::c_int = strlen(buf) as libc::c_int;
|
|
||||||
if buf_len > 80i32 {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if buf_len > 0i32 && *buf.offset((buf_len - 1i32) as isize) as libc::c_int == ':' as i32 {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
false
|
buf.len() <= 80 && buf.ends_with(':')
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn is_plain_quote(buf: *const libc::c_char) -> bool {
|
fn is_plain_quote(buf: &str) -> bool {
|
||||||
if *buf.offset(0isize) as libc::c_int == '>' as i32 {
|
buf.starts_with(">")
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use std::ffi::CStr;
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_simplify_trim() {
|
fn test_simplify_trim() {
|
||||||
unsafe {
|
let mut simplify = Simplify::new();
|
||||||
let mut simplify = Simplify::new();
|
let html = "\r\r\nline1<br>\r\n\r\n\r\rline2\n\r";
|
||||||
let html: *const libc::c_char =
|
let plain = simplify.simplify(html, true, false);
|
||||||
b"\r\r\nline1<br>\r\n\r\n\r\rline2\n\r\x00" as *const u8 as *const libc::c_char;
|
|
||||||
let plain: *mut libc::c_char =
|
|
||||||
simplify.simplify(html, strlen(html) as libc::c_int, true, 0);
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(plain, "line1\nline2");
|
||||||
CStr::from_ptr(plain as *const libc::c_char)
|
|
||||||
.to_str()
|
|
||||||
.unwrap(),
|
|
||||||
"line1\nline2",
|
|
||||||
);
|
|
||||||
|
|
||||||
free(plain as *mut libc::c_void);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_simplify_parse_href() {
|
fn test_simplify_parse_href() {
|
||||||
unsafe {
|
let mut simplify = Simplify::new();
|
||||||
let mut simplify = Simplify::new();
|
let html = "<a href=url>text</a";
|
||||||
let html: *const libc::c_char =
|
let plain = simplify.simplify(html, true, false);
|
||||||
b"<a href=url>text</a\x00" as *const u8 as *const libc::c_char;
|
|
||||||
let plain: *mut libc::c_char =
|
|
||||||
simplify.simplify(html, strlen(html) as libc::c_int, true, 0);
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(plain, "[text](url)");
|
||||||
CStr::from_ptr(plain as *const libc::c_char)
|
|
||||||
.to_str()
|
|
||||||
.unwrap(),
|
|
||||||
"[text](url)",
|
|
||||||
);
|
|
||||||
|
|
||||||
free(plain as *mut libc::c_void);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_simplify_bold_text() {
|
fn test_simplify_bold_text() {
|
||||||
unsafe {
|
let mut simplify = Simplify::new();
|
||||||
let mut simplify = Simplify::new();
|
let html = "<!DOCTYPE name [<!DOCTYPE ...>]><!-- comment -->text <b><?php echo ... ?>bold</b><![CDATA[<>]]>";
|
||||||
let html: *const libc::c_char =
|
let plain = simplify.simplify(html, true, false);
|
||||||
b"<!DOCTYPE name [<!DOCTYPE ...>]><!-- comment -->text <b><?php echo ... ?>bold</b><![CDATA[<>]]>\x00"
|
|
||||||
as *const u8 as *const libc::c_char;
|
|
||||||
let plain = simplify.simplify(html, strlen(html) as libc::c_int, true, 0);
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(plain, "text *bold*<>");
|
||||||
CStr::from_ptr(plain as *const libc::c_char)
|
|
||||||
.to_str()
|
|
||||||
.unwrap(),
|
|
||||||
"text *bold*<>",
|
|
||||||
);
|
|
||||||
|
|
||||||
free(plain as *mut libc::c_void);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_simplify_html_encoded() {
|
fn test_simplify_html_encoded() {
|
||||||
unsafe {
|
let mut simplify = Simplify::new();
|
||||||
let mut simplify = Simplify::new();
|
let html =
|
||||||
let html =
|
"<>"'& äÄöÖüÜß fooÆçÇ ♦‎‏‌&noent;‍";
|
||||||
b"<>"'& äÄöÖüÜß fooÆçÇ ♦‎‏‌&noent;‍\x00"
|
|
||||||
as *const u8 as *const libc::c_char;
|
|
||||||
let plain = simplify.simplify(html, strlen(html) as libc::c_int, true, 0);
|
|
||||||
|
|
||||||
assert_eq!(
|
let plain = simplify.simplify(html, true, false);
|
||||||
CStr::from_ptr(plain as *const libc::c_char)
|
|
||||||
.to_str()
|
|
||||||
.unwrap(),
|
|
||||||
"<>\"\'& äÄöÖüÜß fooÆçÇ \u{2666}\u{200e}\u{200f}\u{200c}&noent;\u{200d}"
|
|
||||||
);
|
|
||||||
|
|
||||||
free(plain as *mut libc::c_void);
|
assert_eq!(
|
||||||
}
|
plain,
|
||||||
|
"<>\"\'& äÄöÖüÜß fooÆçÇ \u{2666}\u{200e}\u{200f}\u{200c}&noent;\u{200d}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_simplify_utilities() {
|
||||||
|
assert!(is_empty_line(" \t"));
|
||||||
|
assert!(is_empty_line(""));
|
||||||
|
assert!(is_empty_line(" \r"));
|
||||||
|
assert!(!is_empty_line(" x"));
|
||||||
|
assert!(is_plain_quote("> hello world"));
|
||||||
|
assert!(is_plain_quote(">>"));
|
||||||
|
assert!(!is_plain_quote("Life is pain"));
|
||||||
|
assert!(!is_plain_quote(""));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -355,33 +355,6 @@ unsafe fn dc_utf8_strnlen(s: *const libc::c_char, n: size_t) -> size_t {
|
|||||||
j
|
j
|
||||||
}
|
}
|
||||||
|
|
||||||
/* split string into lines*/
|
|
||||||
pub unsafe fn dc_split_into_lines(buf_terminated: *const libc::c_char) -> Vec<*mut libc::c_char> {
|
|
||||||
let mut lines = Vec::new();
|
|
||||||
let mut line_chars = 0;
|
|
||||||
let mut p1: *const libc::c_char = buf_terminated;
|
|
||||||
let mut line_start: *const libc::c_char = p1;
|
|
||||||
while 0 != *p1 {
|
|
||||||
if *p1 as libc::c_int == '\n' as i32 {
|
|
||||||
lines.push(strndup(line_start, line_chars));
|
|
||||||
p1 = p1.offset(1isize);
|
|
||||||
line_start = p1;
|
|
||||||
line_chars = 0;
|
|
||||||
} else {
|
|
||||||
p1 = p1.offset(1isize);
|
|
||||||
line_chars += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
lines.push(strndup(line_start, line_chars));
|
|
||||||
lines
|
|
||||||
}
|
|
||||||
|
|
||||||
pub unsafe fn dc_free_splitted_lines(lines: Vec<*mut libc::c_char>) {
|
|
||||||
for s in lines {
|
|
||||||
free(s as *mut libc::c_void);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub unsafe fn dc_str_from_clist(
|
pub unsafe fn dc_str_from_clist(
|
||||||
list: *const clist,
|
list: *const clist,
|
||||||
delimiter: *const libc::c_char,
|
delimiter: *const libc::c_char,
|
||||||
|
|||||||
Reference in New Issue
Block a user