From b2a6876a5032a65b0dd22e8f89464783b8305aed Mon Sep 17 00:00:00 2001 From: dignifiedquire Date: Fri, 16 Aug 2019 12:41:13 +0200 Subject: [PATCH] refactor(location): switch to rust based xml parsing --- Cargo.lock | 26 +++++ Cargo.toml | 1 + src/dc_location.rs | 277 +++++++++++++++++++++++++-------------------- tests/stress.rs | 38 ------- 4 files changed, 184 insertions(+), 158 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d85e25edc..b3a84924f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -487,6 +487,7 @@ dependencies = [ "pkg-config 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", "pretty_env_logger 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "quick-xml 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)", "r2d2 0.8.5 (registry+https://github.com/rust-lang/crates.io-index)", "r2d2_sqlite 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", @@ -545,6 +546,17 @@ dependencies = [ "syn 0.15.44 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "derive_more" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.44 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "des" version = "0.3.0" @@ -1622,6 +1634,18 @@ name = "quick-error" version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "quick-xml" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "derive_more 0.14.1 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)", + "failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "quote" version = "0.6.13" @@ -2770,6 +2794,7 @@ dependencies = [ "checksum darling_macro 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c6d8dac1c6f1d29a41c4712b4400f878cb4fcc4c7628f298dd75038e024998d1" "checksum derive_builder 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3ac53fa6a3cda160df823a9346442525dcaf1e171999a1cf23e67067e4fd64d4" "checksum derive_builder_core 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0288a23da9333c246bb18c143426074a6ae96747995c5819d2947b64cd942b37" +"checksum derive_more 0.14.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6d944ac6003ed268757ef1ee686753b57efc5fcf0ebe7b64c9fc81e7e32ff839" "checksum des 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "74ba5f1b5aee9772379c2670ba81306e65a93c0ee3caade7a1d22b188d88a3af" "checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" "checksum digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5" @@ -2887,6 +2912,7 @@ dependencies = [ "checksum publicsuffix 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5afecba86dcf1e4fd610246f89899d1924fe12e1e89f555eb7c7f710f3c5ad1d" "checksum pulldown-cmark 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "eef52fac62d0ea7b9b4dc7da092aa64ea7ec3d90af6679422d3d7e0e14b6ee15" "checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0" +"checksum quick-xml 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c2b074258da4f2ccb1c450380c5bd8e77db2508de2161f574c70930d8e880482" "checksum quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" "checksum quote 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7ab938ebe6f1c82426b5fb82eaf10c3e3028c53deaa3fbe38f5904b37cf4d767" "checksum r2d2 0.8.5 (registry+https://github.com/rust-lang/crates.io-index)" = "bc42ce75d9f4447fb2a04bbe1ed5d18dd949104572850ec19b164e274919f81b" diff --git a/Cargo.toml b/Cargo.toml index 41a3e4f87..a85a940e4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,7 @@ backtrace = "0.3.33" byteorder = "1.3.1" itertools = "0.8.0" image-meta = "0.1.0" +quick-xml = "0.15.0" [dev-dependencies] tempfile = "3.0" diff --git a/src/dc_location.rs b/src/dc_location.rs index 581ba6301..d67c94b6e 100644 --- a/src/dc_location.rs +++ b/src/dc_location.rs @@ -1,12 +1,14 @@ use std::ffi::CString; +use quick_xml; +use quick_xml::events::{BytesEnd, BytesStart, BytesText}; + use crate::constants::Event; use crate::constants::*; use crate::context::*; use crate::dc_chat::*; use crate::dc_job::*; use crate::dc_msg::*; -use crate::dc_saxparser::*; use crate::dc_tools::*; use crate::param::*; use crate::sql; @@ -478,161 +480,153 @@ pub unsafe fn dc_kml_parse( content_bytes: size_t, ) -> dc_kml_t { let mut kml = dc_kml_t::new(); - let mut content_nullterminated: *mut libc::c_char = 0 as *mut libc::c_char; - let mut saxparser: dc_saxparser_t = dc_saxparser_t { - starttag_cb: None, - endtag_cb: None, - text_cb: None, - userdata: 0 as *mut libc::c_void, - }; if content_bytes > (1 * 1024 * 1024) { warn!( context, 0, "A kml-files with {} bytes is larger than reasonably expected.", content_bytes, ); - } else { - content_nullterminated = dc_null_terminate(content, content_bytes as libc::c_int); - if !content_nullterminated.is_null() { - kml.locations = Some(Vec::with_capacity(100)); - dc_saxparser_init( - &mut saxparser, - &mut kml as *mut dc_kml_t as *mut libc::c_void, - ); - dc_saxparser_set_tag_handler( - &mut saxparser, - Some(kml_starttag_cb), - Some(kml_endtag_cb), - ); - dc_saxparser_set_text_handler(&mut saxparser, Some(kml_text_cb)); - dc_saxparser_parse(&mut saxparser, content_nullterminated); + return kml; + } + + let content_null = dc_null_terminate(content, content_bytes as libc::c_int); + if !content_null.is_null() { + let mut reader = quick_xml::Reader::from_str(as_str(content_null)); + reader.trim_text(true); + + kml.locations = Some(Vec::with_capacity(100)); + + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf) { + Ok(quick_xml::events::Event::Start(ref e)) => kml_starttag_cb(e, &mut kml, &reader), + Ok(quick_xml::events::Event::End(ref e)) => kml_endtag_cb(e, &mut kml), + Ok(quick_xml::events::Event::Text(ref e)) => kml_text_cb(e, &mut kml, &reader), + Err(e) => { + panic!( + "Location parsing: Error at position {}: {:?}", + reader.buffer_position(), + e + ); + } + Ok(quick_xml::events::Event::Eof) => break, + _ => (), + } + buf.clear(); } } - free(content_nullterminated as *mut libc::c_void); + free(content_null.cast()); kml } -unsafe fn kml_text_cb(userdata: *mut libc::c_void, text: *const libc::c_char, _len: libc::c_int) { - let mut kml: *mut dc_kml_t = userdata as *mut dc_kml_t; - if 0 != (*kml).tag & (0x4 | 0x10) { - let mut val: *mut libc::c_char = dc_strdup(text); - dc_str_replace( - &mut val, - b"\n\x00" as *const u8 as *const libc::c_char, - b"\x00" as *const u8 as *const libc::c_char, - ); - dc_str_replace( - &mut val, - b"\r\x00" as *const u8 as *const libc::c_char, - b"\x00" as *const u8 as *const libc::c_char, - ); - dc_str_replace( - &mut val, - b"\t\x00" as *const u8 as *const libc::c_char, - b"\x00" as *const u8 as *const libc::c_char, - ); - dc_str_replace( - &mut val, - b" \x00" as *const u8 as *const libc::c_char, - b"\x00" as *const u8 as *const libc::c_char, - ); - if 0 != (*kml).tag & 0x4 && strlen(val) >= 19 { +fn kml_text_cb( + event: &BytesText, + kml: &mut dc_kml_t, + reader: &quick_xml::Reader, +) { + if 0 != kml.tag & (0x4 | 0x10) { + let val = event.unescape_and_decode(reader).unwrap_or_default(); + + let val = val + .replace("\n", "") + .replace("\r", "") + .replace("\t", "") + .replace(" ", ""); + + if 0 != kml.tag & 0x4 && val.len() >= 19 { // YYYY-MM-DDTHH:MM:SSZ // 0 4 7 10 13 16 19 - let val_r = as_str(val); - match chrono::NaiveDateTime::parse_from_str(val_r, "%Y-%m-%dT%H:%M:%SZ") { + match chrono::NaiveDateTime::parse_from_str(&val, "%Y-%m-%dT%H:%M:%SZ") { Ok(res) => { - (*kml).curr.timestamp = res.timestamp(); - if (*kml).curr.timestamp > time() { - (*kml).curr.timestamp = time(); + kml.curr.timestamp = res.timestamp(); + if kml.curr.timestamp > time() { + kml.curr.timestamp = time(); } } Err(_err) => { - (*kml).curr.timestamp = time(); + kml.curr.timestamp = time(); } } - } else if 0 != (*kml).tag & 0x10 { - let mut comma: *mut libc::c_char = strchr(val, ',' as i32); - if !comma.is_null() { - let longitude: *mut libc::c_char = val; - let latitude: *mut libc::c_char = comma.offset(1isize); - *comma = 0 as libc::c_char; - comma = strchr(latitude, ',' as i32); - if !comma.is_null() { - *comma = 0 as libc::c_char - } - (*kml).curr.latitude = dc_atof(latitude); - (*kml).curr.longitude = dc_atof(longitude) + } else if 0 != kml.tag & 0x10 { + let parts = val.splitn(2, ',').collect::>(); + if parts.len() == 2 { + kml.curr.longitude = parts[0].parse().unwrap_or_default(); + kml.curr.latitude = parts[1].parse().unwrap_or_default(); } } - free(val as *mut libc::c_void); - }; + } } -unsafe fn kml_endtag_cb(userdata: *mut libc::c_void, tag: *const libc::c_char) { - let mut kml: *mut dc_kml_t = userdata as *mut dc_kml_t; - if strcmp(tag, b"placemark\x00" as *const u8 as *const libc::c_char) == 0 { - if 0 != (*kml).tag & 0x1 - && 0 != (*kml).curr.timestamp - && 0. != (*kml).curr.latitude - && 0. != (*kml).curr.longitude +fn kml_endtag_cb(event: &BytesEnd, kml: &mut dc_kml_t) { + let tag = String::from_utf8_lossy(event.name()).trim().to_lowercase(); + + if tag == "placemark" { + if 0 != kml.tag & 0x1 + && 0 != kml.curr.timestamp + && 0. != kml.curr.latitude + && 0. != kml.curr.longitude { - let location = (*kml).curr.clone(); - ((*kml).locations.as_mut().unwrap()).push(location); + if let Some(ref mut locations) = kml.locations { + locations.push(std::mem::replace(&mut kml.curr, dc_location::new())); + } } - (*kml).tag = 0 + kml.tag = 0 }; } -/******************************************************************************* - * parse kml-files - ******************************************************************************/ -unsafe fn kml_starttag_cb( - userdata: *mut libc::c_void, - tag: *const libc::c_char, - attr: *mut *mut libc::c_char, +fn kml_starttag_cb( + event: &BytesStart, + kml: &mut dc_kml_t, + reader: &quick_xml::Reader, ) { - let mut kml: *mut dc_kml_t = userdata as *mut dc_kml_t; - if strcmp(tag, b"document\x00" as *const u8 as *const libc::c_char) == 0 { - let addr: *const libc::c_char = - dc_attr_find(attr, b"addr\x00" as *const u8 as *const libc::c_char); - if !addr.is_null() { - (*kml).addr = dc_strdup(addr) + let tag = String::from_utf8_lossy(event.name()).trim().to_lowercase(); + if tag == "document" { + if let Some(addr) = event.attributes().find(|attr| { + attr.as_ref() + .map(|a| String::from_utf8_lossy(a.key).trim().to_lowercase() == "addr") + .unwrap_or_default() + }) { + kml.addr = unsafe { + addr.unwrap() + .unescape_and_decode_value(reader) + .unwrap_or_default() + .strdup() + }; } - } else if strcmp(tag, b"placemark\x00" as *const u8 as *const libc::c_char) == 0 { - (*kml).tag = 0x1; - (*kml).curr.timestamp = 0; - (*kml).curr.latitude = 0 as libc::c_double; - (*kml).curr.longitude = 0.0f64; - (*kml).curr.accuracy = 0.0f64 - } else if strcmp(tag, b"timestamp\x00" as *const u8 as *const libc::c_char) == 0 - && 0 != (*kml).tag & 0x1 - { - (*kml).tag = 0x1 | 0x2 - } else if strcmp(tag, b"when\x00" as *const u8 as *const libc::c_char) == 0 - && 0 != (*kml).tag & 0x2 - { - (*kml).tag = 0x1 | 0x2 | 0x4 - } else if strcmp(tag, b"point\x00" as *const u8 as *const libc::c_char) == 0 - && 0 != (*kml).tag & 0x1 - { - (*kml).tag = 0x1 | 0x8 - } else if strcmp(tag, b"coordinates\x00" as *const u8 as *const libc::c_char) == 0 - && 0 != (*kml).tag & 0x8 - { - (*kml).tag = 0x1 | 0x8 | 0x10; - let accuracy: *const libc::c_char = - dc_attr_find(attr, b"accuracy\x00" as *const u8 as *const libc::c_char); - if !accuracy.is_null() { - (*kml).curr.accuracy = dc_atof(accuracy) + } else if tag == "placemark" { + kml.tag = 0x1; + kml.curr.timestamp = 0; + kml.curr.latitude = 0 as libc::c_double; + kml.curr.longitude = 0.0f64; + kml.curr.accuracy = 0.0f64 + } else if tag == "timestamp" && 0 != kml.tag & 0x1 { + kml.tag = 0x1 | 0x2 + } else if tag == "when" && 0 != kml.tag & 0x2 { + kml.tag = 0x1 | 0x2 | 0x4 + } else if tag == "point" && 0 != kml.tag & 0x1 { + kml.tag = 0x1 | 0x8 + } else if tag == "coordinates" && 0 != kml.tag & 0x8 { + kml.tag = 0x1 | 0x8 | 0x10; + if let Some(acc) = event.attributes().find(|attr| { + attr.as_ref() + .map(|a| String::from_utf8_lossy(a.key).trim().to_lowercase() == "accuracy") + .unwrap_or_default() + }) { + let v = acc + .unwrap() + .unescape_and_decode_value(reader) + .unwrap_or_default(); + + kml.curr.accuracy = v.trim().parse().unwrap_or_default(); } - }; + } } pub unsafe fn dc_kml_unref(kml: &mut dc_kml_t) { - free((*kml).addr as *mut libc::c_void); + free(kml.addr as *mut libc::c_void); } #[allow(non_snake_case)] @@ -751,3 +745,46 @@ pub unsafe fn dc_job_do_DC_JOB_MAYBE_SEND_LOC_ENDED(context: &Context, job: &mut } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_utils::dummy_context; + + #[test] + fn test_dc_kml_parse() { + unsafe { + let context = dummy_context(); + + let xml = + b"\n\n\n2019-03-06T21:09:57Z9.423110,53.790302\n\n \n\t2018-12-13T22:11:12Z\t 19.423110 \t , \n 63.790302\n \n\n\x00" + as *const u8 as *const libc::c_char; + + let mut kml = dc_kml_parse(&context.ctx, xml, strlen(xml)); + + assert!(!kml.addr.is_null()); + assert_eq!(as_str(kml.addr as *const libc::c_char), "user@example.org",); + + let locations_ref = &kml.locations.as_ref().unwrap(); + assert_eq!(locations_ref.len(), 2); + + assert!(locations_ref[0].latitude > 53.6f64); + assert!(locations_ref[0].latitude < 53.8f64); + assert!(locations_ref[0].longitude > 9.3f64); + assert!(locations_ref[0].longitude < 9.5f64); + assert!(locations_ref[0].accuracy > 31.9f64); + assert!(locations_ref[0].accuracy < 32.1f64); + assert_eq!(locations_ref[0].timestamp, 1551906597); + + assert!(locations_ref[1].latitude > 63.6f64); + assert!(locations_ref[1].latitude < 63.8f64); + assert!(locations_ref[1].longitude > 19.3f64); + assert!(locations_ref[1].longitude < 19.5f64); + assert!(locations_ref[1].accuracy > 2.4f64); + assert!(locations_ref[1].accuracy < 2.6f64); + assert_eq!(locations_ref[1].timestamp, 1544739072); + + dc_kml_unref(&mut kml); + } + } +} diff --git a/tests/stress.rs b/tests/stress.rs index 72192732a..a3a83c51b 100644 --- a/tests/stress.rs +++ b/tests/stress.rs @@ -13,7 +13,6 @@ use deltachat::context::*; use deltachat::dc_chat::*; use deltachat::dc_configure::*; use deltachat::dc_imex::*; -use deltachat::dc_location::*; use deltachat::dc_lot::*; use deltachat::dc_mimeparser::*; use deltachat::dc_qr::*; @@ -689,43 +688,6 @@ unsafe fn create_test_context() -> TestContext { TestContext { ctx: ctx, dir: dir } } -#[test] -fn test_dc_kml_parse() { - unsafe { - let context = create_test_context(); - - let xml: *const libc::c_char = - b"\n\n\n2019-03-06T21:09:57Z9.423110,53.790302\n\n \n\t2018-12-13T22:11:12Z\t 19.423110 \t , \n 63.790302\n \n\n\x00" - as *const u8 as *const libc::c_char; - - let mut kml = dc_kml_parse(&context.ctx, xml, strlen(xml)); - - assert!(!kml.addr.is_null()); - assert_eq!(as_str(kml.addr as *const libc::c_char), "user@example.org",); - - let locations_ref = &kml.locations.as_ref().unwrap(); - assert_eq!(locations_ref.len(), 2); - - assert!(locations_ref[0].latitude > 53.6f64); - assert!(locations_ref[0].latitude < 53.8f64); - assert!(locations_ref[0].longitude > 9.3f64); - assert!(locations_ref[0].longitude < 9.5f64); - assert!(locations_ref[0].accuracy > 31.9f64); - assert!(locations_ref[0].accuracy < 32.1f64); - assert_eq!(locations_ref[0].timestamp, 1551906597); - - assert!(locations_ref[1].latitude > 63.6f64); - assert!(locations_ref[1].latitude < 63.8f64); - assert!(locations_ref[1].longitude > 19.3f64); - assert!(locations_ref[1].longitude < 19.5f64); - assert!(locations_ref[1].accuracy > 2.4f64); - assert!(locations_ref[1].accuracy < 2.6f64); - assert_eq!(locations_ref[1].timestamp, 1544739072); - - dc_kml_unref(&mut kml); - } -} - #[test] fn test_dc_mimeparser_with_context() { unsafe {