From 026f678452082c3df4c41ad9ae0f123a01e6ff97 Mon Sep 17 00:00:00 2001 From: Hocuri Date: Sat, 4 May 2024 03:44:09 +0200 Subject: [PATCH] feat: Parsing vCards for contacts sharing (#5482) Co-authored-by: iequidoo --- Cargo.lock | 3 +- Cargo.toml | 5 +- deltachat-contact-tools/Cargo.toml | 6 +- deltachat-contact-tools/src/lib.rs | 274 ++++++++++++++++++++++++++++- 4 files changed, 275 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 623520447..f2d2be291 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1239,9 +1239,10 @@ dependencies = [ [[package]] name = "deltachat-contact-tools" -version = "0.1.0" +version = "0.0.0" dependencies = [ "anyhow", + "chrono", "once_cell", "regex", "rusqlite", diff --git a/Cargo.toml b/Cargo.toml index 3f7ce01e5..16badaa40 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,7 +48,7 @@ async_zip = { version = "0.0.12", default-features = false, features = ["deflate backtrace = "0.3" base64 = "0.22" brotli = { version = "5", default-features=false, features = ["std"] } -chrono = { version = "0.4.37", default-features=false, features = ["clock", "std"] } +chrono = { workspace = true } email = { git = "https://github.com/deltachat/rust-email", branch = "master" } encoded-words = { git = "https://github.com/async-email/encoded-words", branch = "master" } escaper = "0.1" @@ -168,7 +168,8 @@ harness = false anyhow = "1" once_cell = "1.18.0" regex = "1.10" -rusqlite = { version = "0.31" } +rusqlite = "0.31" +chrono = { version = "0.4.37", default-features=false, features = ["clock", "std"] } [features] default = ["vendored"] diff --git a/deltachat-contact-tools/Cargo.toml b/deltachat-contact-tools/Cargo.toml index 98ea3e724..50ef5f7ba 100644 --- a/deltachat-contact-tools/Cargo.toml +++ b/deltachat-contact-tools/Cargo.toml @@ -1,10 +1,9 @@ [package] name = "deltachat-contact-tools" -version = "0.1.0" +version = "0.0.0" # No semver-stable versioning edition = "2021" -description = "Contact-related tools, like parsing vcards and sanitizing name and address" +description = "Contact-related tools, like parsing vcards and sanitizing name and address. Meant for internal use in the deltachat crate." license = "MPL-2.0" -# TODO maybe it should be called "deltachat-text-utils" or similar? # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -13,6 +12,7 @@ anyhow = { workspace = true } once_cell = { workspace = true } regex = { workspace = true } rusqlite = { workspace = true } # Needed in order to `impl rusqlite::types::ToSql for EmailAddress`. Could easily be put behind a feature. +chrono = { workspace = true } [dev-dependencies] anyhow = { workspace = true, features = ["backtrace"] } # Enable `backtrace` feature in tests. diff --git a/deltachat-contact-tools/src/lib.rs b/deltachat-contact-tools/src/lib.rs index 385a18a77..3930268eb 100644 --- a/deltachat-contact-tools/src/lib.rs +++ b/deltachat-contact-tools/src/lib.rs @@ -29,10 +29,142 @@ use std::fmt; use std::ops::Deref; use anyhow::bail; +use anyhow::Context as _; use anyhow::Result; +use chrono::{DateTime, NaiveDateTime}; use once_cell::sync::Lazy; use regex::Regex; +// TODOs to clean up: +// - Check if sanitizing is done correctly everywhere +// - Apply lints everywhere (https://doc.rust-lang.org/cargo/reference/workspaces.html#the-lints-table) + +#[derive(Debug)] +/// A Contact, as represented in a VCard. +pub struct VcardContact { + /// The email address, vcard property `email` + pub addr: String, + /// The contact's display name, vcard property `fn` + pub display_name: String, + /// The contact's public PGP key in Base64, vcard property `key` + pub key: Option, + /// The contact's profile image (=avatar) in Base64, vcard property `photo` + pub profile_image: Option, + /// The timestamp when the vcard was created / last updated, vcard property `rev` + pub timestamp: Result, +} + +/// Parses `VcardContact`s from a given `&str`. +pub fn parse_vcard(vcard: &str) -> Result> { + fn remove_prefix<'a>(s: &'a str, prefix: &str) -> Option<&'a str> { + let start_of_s = s.get(..prefix.len())?; + + if start_of_s.eq_ignore_ascii_case(prefix) { + s.get(prefix.len()..) + } else { + None + } + } + fn vcard_property<'a>(s: &'a str, property: &str) -> Option<&'a str> { + let remainder = remove_prefix(s, property)?; + // If `s` is `EMAIL;TYPE=work:alice@example.com` and `property` is `EMAIL`, + // then `remainder` is now `;TYPE=work:alice@example.com` + + // TODO this doesn't handle the case where there are quotes around a colon + let (params, value) = remainder.split_once(':')?; + // In the example from above, `params` is now `;TYPE=work` + // and `value` is now `alice@example.com` + + if params + .chars() + .next() + .filter(|c| !c.is_ascii_punctuation() || *c == '_') + .is_some() + { + // `s` started with `property`, but the next character after it was not punctuation, + // so this line's property is actually something else + return None; + } + Some(value) + } + fn parse_datetime(datetime: &str) -> Result { + // According to https://www.rfc-editor.org/rfc/rfc6350#section-4.3.5, the timestamp + // is in ISO.8601.2004 format. DateTime::parse_from_rfc3339() apparently parses + // ISO.8601, but fails to parse any of the examples given. + // So, instead just parse using a format string. + + // Parses 19961022T140000Z, 19961022T140000-05, or 19961022T140000-0500. + let timestamp = match DateTime::parse_from_str(datetime, "%Y%m%dT%H%M%S%#z") { + Ok(datetime) => datetime.timestamp(), + // Parses 19961022T140000. + Err(e) => match NaiveDateTime::parse_from_str(datetime, "%Y%m%dT%H%M%S") { + Ok(datetime) => datetime + .and_local_timezone(chrono::offset::Local) + .single() + .context("Could not apply local timezone to parsed date and time")? + .timestamp(), + Err(_) => return Err(e.into()), + }, + }; + Ok(timestamp.try_into()?) + } + + let mut lines = vcard.lines().peekable(); + let mut contacts = Vec::new(); + + while lines.peek().is_some() { + // Skip to the start of the vcard: + for line in lines.by_ref() { + if line.eq_ignore_ascii_case("BEGIN:VCARD") { + break; + } + } + + let mut display_name = None; + let mut addr = None; + let mut key = None; + let mut photo = None; + let mut datetime = None; + + for line in lines.by_ref() { + if let Some(email) = vcard_property(line, "email") { + addr.get_or_insert(email); + } else if let Some(name) = vcard_property(line, "fn") { + display_name.get_or_insert(name); + } else if let Some(k) = remove_prefix(line, "KEY;PGP;ENCODING=BASE64:") + .or_else(|| remove_prefix(line, "KEY;TYPE=PGP;ENCODING=b:")) + .or_else(|| remove_prefix(line, "KEY:data:application/pgp-keys;base64,")) + { + key.get_or_insert(k); + } else if let Some(p) = remove_prefix(line, "PHOTO;JPEG;ENCODING=BASE64:") + .or_else(|| remove_prefix(line, "PHOTO;TYPE=JPEG;ENCODING=b:")) + .or_else(|| remove_prefix(line, "PHOTO;ENCODING=BASE64;TYPE=JPEG:")) + { + photo.get_or_insert(p); + } else if let Some(rev) = vcard_property(line, "rev") { + datetime.get_or_insert(rev); + } else if line.eq_ignore_ascii_case("END:VCARD") { + break; + } + } + + let (display_name, addr) = + sanitize_name_and_addr(display_name.unwrap_or(""), addr.unwrap_or("")); + + contacts.push(VcardContact { + display_name, + addr, + key: key.map(|s| s.to_string()), + profile_image: photo.map(|s| s.to_string()), + timestamp: datetime + .context("No timestamp in vcard") + .and_then(parse_datetime), + }); + } + + Ok(contacts) +} + /// Valid contact address. #[derive(Debug, Clone)] pub struct ContactAddress(String); @@ -81,14 +213,10 @@ impl rusqlite::types::ToSql for ContactAddress { /// Make the name and address pub fn sanitize_name_and_addr(name: &str, addr: &str) -> (String, String) { static ADDR_WITH_NAME_REGEX: Lazy = Lazy::new(|| Regex::new("(.*)<(.*)>").unwrap()); - if let Some(captures) = ADDR_WITH_NAME_REGEX.captures(addr.as_ref()) { + let (name, addr) = if let Some(captures) = ADDR_WITH_NAME_REGEX.captures(addr.as_ref()) { ( if name.is_empty() { - strip_rtlo_characters( - &captures - .get(1) - .map_or("".to_string(), |m| normalize_name(m.as_str())), - ) + strip_rtlo_characters(captures.get(1).map_or("", |m| m.as_str())) } else { strip_rtlo_characters(name) }, @@ -97,8 +225,21 @@ pub fn sanitize_name_and_addr(name: &str, addr: &str) -> (String, String) { .map_or("".to_string(), |m| m.as_str().to_string()), ) } else { - (strip_rtlo_characters(name), addr.to_string()) + ( + strip_rtlo_characters(&normalize_name(name)), + addr.to_string(), + ) + }; + let mut name = normalize_name(&name); + + // If the 'display name' is just the address, remove it: + // Otherwise, the contact would sometimes be shown as "alice@example.com (alice@example.com)" (see `get_name_n_addr()`). + // If the display name is empty, DC will just show the address when it needs a display name. + if name == addr { + name = "".to_string(); } + + (name, addr) } /// Normalize a name. @@ -230,8 +371,69 @@ impl rusqlite::types::ToSql for EmailAddress { #[cfg(test)] mod tests { + use chrono::TimeZone; + use super::*; + #[test] + fn test_vcard_thunderbird() { + let contacts = parse_vcard( + "BEGIN:VCARD +VERSION:4.0 +FN:'Alice Mueller' +EMAIL;PREF=1:alice.mueller@posteo.de +UID:a8083264-ca47-4be7-98a8-8ec3db1447ca +END:VCARD +BEGIN:VCARD +VERSION:4.0 +FN:'bobzzz@freenet.de' +EMAIL;PREF=1:bobzzz@freenet.de +UID:cac4fef4-6351-4854-bbe4-9b6df857eaed +END:VCARD +", + ) + .unwrap(); + + assert_eq!(contacts[0].addr, "alice.mueller@posteo.de".to_string()); + assert_eq!(contacts[0].display_name, "Alice Mueller".to_string()); + assert_eq!(contacts[0].key, None); + assert_eq!(contacts[0].profile_image, None); + assert!(contacts[0].timestamp.is_err()); + + assert_eq!(contacts[1].addr, "bobzzz@freenet.de".to_string()); + assert_eq!(contacts[1].display_name, "".to_string()); + assert_eq!(contacts[1].key, None); + assert_eq!(contacts[1].profile_image, None); + assert!(contacts[1].timestamp.is_err()); + + assert_eq!(contacts.len(), 2); + } + + #[test] + fn test_vcard_simple_example() { + let contacts = parse_vcard( + "BEGIN:VCARD +VERSION:4.0 +FN:Alice Wonderland +N:Wonderland;Alice;;;Ms. +GENDER:W +EMAIL;TYPE=work:alice@example.com +KEY;TYPE=PGP;ENCODING=b:[base64-data] +REV:20240418T184242Z + +END:VCARD", + ) + .unwrap(); + + assert_eq!(contacts[0].addr, "alice@example.com".to_string()); + assert_eq!(contacts[0].display_name, "Alice Wonderland".to_string()); + assert_eq!(contacts[0].key, Some("[base64-data]".to_string())); + assert_eq!(contacts[0].profile_image, None); + assert_eq!(*contacts[0].timestamp.as_ref().unwrap(), 1713465762); + + assert_eq!(contacts.len(), 1); + } + #[test] fn test_contact_address() -> Result<()> { let alice_addr = "alice@example.org"; @@ -277,4 +479,62 @@ mod tests { assert!(EmailAddress::new("u@tt").is_ok()); assert_eq!(EmailAddress::new("@d.tt").is_ok(), false); } + + #[test] + fn test_vcard_android() { + let contacts = parse_vcard( + "BEGIN:VCARD +VERSION:2.1 +N:;Bob;;; +FN:Bob +TEL;CELL:+1-234-567-890 +EMAIL;HOME:bob@example.org +END:VCARD +BEGIN:VCARD +VERSION:2.1 +N:;Alice;;; +FN:Alice +EMAIL;HOME:alice@example.org +END:VCARD +", + ) + .unwrap(); + + assert_eq!(contacts[0].addr, "bob@example.org".to_string()); + assert_eq!(contacts[0].display_name, "Bob".to_string()); + assert_eq!(contacts[0].key, None); + assert_eq!(contacts[0].profile_image, None); + + assert_eq!(contacts[1].addr, "alice@example.org".to_string()); + assert_eq!(contacts[1].display_name, "Alice".to_string()); + assert_eq!(contacts[1].key, None); + assert_eq!(contacts[1].profile_image, None); + + assert_eq!(contacts.len(), 2); + } + + #[test] + fn test_vcard_local_datetime() { + let contacts = parse_vcard( + "BEGIN:VCARD\n\ + VERSION:4.0\n\ + FN:Alice Wonderland\n\ + EMAIL;TYPE=work:alice@example.org\n\ + REV:20240418T184242\n\ + END:VCARD", + ) + .unwrap(); + assert_eq!(contacts.len(), 1); + assert_eq!(contacts[0].addr, "alice@example.org".to_string()); + assert_eq!(contacts[0].display_name, "Alice Wonderland".to_string()); + assert_eq!( + *contacts[0].timestamp.as_ref().unwrap(), + chrono::offset::Local + .with_ymd_and_hms(2024, 4, 18, 18, 42, 42) + .unwrap() + .timestamp() + .try_into() + .unwrap() + ); + } }