diff --git a/deltachat-ffi/deltachat.h b/deltachat-ffi/deltachat.h index ed90de3d1..ff76c3d7c 100644 --- a/deltachat-ffi/deltachat.h +++ b/deltachat-ffi/deltachat.h @@ -1334,6 +1334,20 @@ int dc_get_msg_cnt (dc_context_t* context, uint32_t ch int dc_get_fresh_msg_cnt (dc_context_t* context, uint32_t chat_id); +/** + * Returns a list of similar chats. + * + * @warning This is an experimental API which may change or be removed in the future. + * + * @memberof dc_context_t + * @param context The context object as returned from dc_context_new(). + * @param chat_id The ID of the chat for which to find similar chats. + * @return The list of similar chats. + * On errors, NULL is returned. + * Must be freed using dc_chatlist_unref() when no longer used. + */ +dc_chatlist_t* dc_get_similar_chatlist (dc_context_t* context, uint32_t chat_id); + /** * Estimate the number of messages that will be deleted diff --git a/deltachat-ffi/src/lib.rs b/deltachat-ffi/src/lib.rs index 64ee55c41..7548b8a1c 100644 --- a/deltachat-ffi/src/lib.rs +++ b/deltachat-ffi/src/lib.rs @@ -1260,6 +1260,30 @@ pub unsafe extern "C" fn dc_get_fresh_msg_cnt( }) } +#[no_mangle] +pub unsafe extern "C" fn dc_get_similar_chatlist( + context: *mut dc_context_t, + chat_id: u32, +) -> *mut dc_chatlist_t { + if context.is_null() { + eprintln!("ignoring careless call to dc_get_similar_chatlist()"); + return ptr::null_mut(); + } + let ctx = &*context; + + let chat_id = ChatId::new(chat_id); + match block_on(chat_id.get_similar_chatlist(ctx)) + .context("failed to get similar chatlist") + .log_err(ctx) + { + Ok(list) => { + let ffi_list = ChatlistWrapper { context, list }; + Box::into_raw(Box::new(ffi_list)) + } + Err(_) => ptr::null_mut(), + } +} + #[no_mangle] pub unsafe extern "C" fn dc_estimate_deletion_cnt( context: *mut dc_context_t, diff --git a/deltachat-jsonrpc/src/api/mod.rs b/deltachat-jsonrpc/src/api/mod.rs index d9b6db8aa..beb5a3c7e 100644 --- a/deltachat-jsonrpc/src/api/mod.rs +++ b/deltachat-jsonrpc/src/api/mod.rs @@ -39,6 +39,7 @@ pub mod types; use num_traits::FromPrimitive; use types::account::Account; use types::chat::FullChat; +use types::chat_list::ChatListEntry; use types::contact::ContactObject; use types::events::Event; use types::http::HttpResponse; @@ -566,6 +567,25 @@ impl CommandApi { Ok(l) } + /// Returns chats similar to the given one. + async fn get_similar_chatlist_entries( + &self, + account_id: u32, + chat_id: u32, + ) -> Result> { + let ctx = self.get_context(account_id).await?; + let chat_id = ChatId::new(chat_id); + let list = chat_id.get_similar_chatlist(&ctx).await?; + let mut l: Vec = Vec::with_capacity(list.len()); + for i in 0..list.len() { + l.push(ChatListEntry( + list.get_chat_id(i)?.to_u32(), + list.get_msg_id(i)?.unwrap_or_default().to_u32(), + )); + } + Ok(l) + } + async fn get_chatlist_items_by_entries( &self, account_id: u32, diff --git a/deltachat-repl/src/cmdline.rs b/deltachat-repl/src/cmdline.rs index c3e473dbe..9e479685b 100644 --- a/deltachat-repl/src/cmdline.rs +++ b/deltachat-repl/src/cmdline.rs @@ -805,15 +805,30 @@ pub async fn cmdline(context: Context, line: &str, chat_id: &mut ChatId) -> Resu } "chatinfo" => { ensure!(sel_chat.is_some(), "No chat selected."); + let sel_chat_id = sel_chat.as_ref().unwrap().get_id(); - let contacts = - chat::get_chat_contacts(&context, sel_chat.as_ref().unwrap().get_id()).await?; + let contacts = chat::get_chat_contacts(&context, sel_chat_id).await?; println!("Memberlist:"); log_contactlist(&context, &contacts).await?; + println!("{} contacts", contacts.len()); + + let similar_chats = sel_chat_id.get_similar_chat_ids(&context).await?; + if !similar_chats.is_empty() { + println!("Similar chats: "); + for (similar_chat_id, metric) in similar_chats { + let similar_chat = Chat::load_from_db(&context, similar_chat_id).await?; + println!( + "{} (#{}) {:.1}", + similar_chat.name, + similar_chat_id, + 100.0 * metric + ); + } + } + println!( - "{} contacts\nLocation streaming: {}", - contacts.len(), + "Location streaming: {}", location::is_sending_locations_to_chat( &context, Some(sel_chat.as_ref().unwrap().get_id()) diff --git a/src/chat.rs b/src/chat.rs index a1e3b9f86..be163f421 100644 --- a/src/chat.rs +++ b/src/chat.rs @@ -13,6 +13,7 @@ use serde::{Deserialize, Serialize}; use crate::aheader::EncryptPreference; use crate::blob::BlobObject; +use crate::chatlist::Chatlist; use crate::color::str_to_color; use crate::config::Config; use crate::constants::{ @@ -880,6 +881,124 @@ impl ChatId { Ok(timestamp) } + /// Returns a list of active similar chat IDs sorted by similarity metric. + /// + /// Jaccard similarity coefficient is used to estimate similarity of chat member sets. + /// + /// Chat is considered active if something was posted there within the last 42 days. + pub async fn get_similar_chat_ids(self, context: &Context) -> Result> { + // Count number of common members in this and other chats. + let intersection: Vec<(ChatId, f64)> = context + .sql + .query_map( + "SELECT y.chat_id, SUM(x.contact_id = y.contact_id) + FROM chats_contacts as x + JOIN chats_contacts as y + WHERE x.contact_id > 9 + AND y.contact_id > 9 + AND x.chat_id=? + AND y.chat_id<>x.chat_id + GROUP BY y.chat_id", + (self,), + |row| { + let chat_id: ChatId = row.get(0)?; + let intersection: f64 = row.get(1)?; + Ok((chat_id, intersection)) + }, + |rows| { + rows.collect::, _>>() + .map_err(Into::into) + }, + ) + .await + .context("failed to calculate member set intersections")?; + + let chat_size: HashMap = context + .sql + .query_map( + "SELECT chat_id, count(*) AS n + FROM chats_contacts where contact_id > 9 + GROUP BY chat_id", + (), + |row| { + let chat_id: ChatId = row.get(0)?; + let size: f64 = row.get(1)?; + Ok((chat_id, size)) + }, + |rows| { + rows.collect::, _>>() + .map_err(Into::into) + }, + ) + .await + .context("failed to count chat member sizes")?; + + let our_chat_size = chat_size.get(&self).copied().unwrap_or_default(); + let mut chats_with_metrics = Vec::new(); + for (chat_id, intersection_size) in intersection { + if intersection_size > 0.0 { + let other_chat_size = chat_size.get(&chat_id).copied().unwrap_or_default(); + let union_size = our_chat_size + other_chat_size - intersection_size; + let metric = intersection_size / union_size; + chats_with_metrics.push((chat_id, metric)) + } + } + chats_with_metrics.sort_unstable_by(|(chat_id1, metric1), (chat_id2, metric2)| { + metric2 + .partial_cmp(metric1) + .unwrap_or(chat_id2.cmp(chat_id1)) + }); + + // Select up to five similar active chats. + let mut res = Vec::new(); + let now = time(); + for (chat_id, metric) in chats_with_metrics { + if let Some(chat_timestamp) = chat_id.get_timestamp(context).await? { + if now > chat_timestamp + 42 * 24 * 3600 { + // Chat was inactive for 42 days, skip. + continue; + } + } + + if metric < 0.1 { + // Chat is unrelated. + break; + } + + let chat = Chat::load_from_db(context, chat_id).await?; + if chat.typ != Chattype::Group { + continue; + } + + match chat.visibility { + ChatVisibility::Normal | ChatVisibility::Pinned => {} + ChatVisibility::Archived => continue, + } + + res.push((chat_id, metric)); + if res.len() >= 5 { + break; + } + } + + Ok(res) + } + + /// Returns similar chats as a [`Chatlist`]. + /// + /// [`Chatlist`]: crate::chatlist::Chatlist + pub async fn get_similar_chatlist(self, context: &Context) -> Result { + let chat_ids: Vec = self + .get_similar_chat_ids(context) + .await + .context("failed to get similar chat IDs")? + .into_iter() + .map(|(chat_id, _metric)| chat_id) + .collect(); + let chatlist = Chatlist::from_chat_ids(context, &chat_ids).await?; + Ok(chatlist) + } + pub(crate) async fn get_param(self, context: &Context) -> Result { let res: Option = context .sql diff --git a/src/chatlist.rs b/src/chatlist.rs index c7a2e4b8a..2cb14c012 100644 --- a/src/chatlist.rs +++ b/src/chatlist.rs @@ -296,6 +296,27 @@ impl Chatlist { Ok(Chatlist { ids }) } + /// Converts list of chat IDs to a chatlist. + pub(crate) async fn from_chat_ids(context: &Context, chat_ids: &[ChatId]) -> Result { + let mut ids = Vec::new(); + for &chat_id in chat_ids { + let msg_id: Option = context + .sql + .query_get_value( + "SELECT id + FROM msgs + WHERE chat_id=?1 + AND (hidden=0 OR state=?2) + ORDER BY timestamp DESC, id DESC LIMIT 1", + (chat_id, MessageState::OutDraft), + ) + .await + .with_context(|| format!("failed to get msg ID for chat {}", chat_id))?; + ids.push((chat_id, msg_id)); + } + Ok(Chatlist { ids }) + } + /// Find out the number of chats. pub fn len(&self) -> usize { self.ids.len()