feat: add API to get similar chats

This commit is contained in:
link2xt
2023-01-07 13:35:31 +00:00
parent 1d9702e9e7
commit 30024abb6c
6 changed files with 217 additions and 4 deletions

View File

@@ -1334,6 +1334,20 @@ int dc_get_msg_cnt (dc_context_t* context, uint32_t ch
int dc_get_fresh_msg_cnt (dc_context_t* context, uint32_t chat_id);
/**
* Returns a list of similar chats.
*
* @warning This is an experimental API which may change or be removed in the future.
*
* @memberof dc_context_t
* @param context The context object as returned from dc_context_new().
* @param chat_id The ID of the chat for which to find similar chats.
* @return The list of similar chats.
* On errors, NULL is returned.
* Must be freed using dc_chatlist_unref() when no longer used.
*/
dc_chatlist_t* dc_get_similar_chatlist (dc_context_t* context, uint32_t chat_id);
/**
* Estimate the number of messages that will be deleted

View File

@@ -1260,6 +1260,30 @@ pub unsafe extern "C" fn dc_get_fresh_msg_cnt(
})
}
#[no_mangle]
pub unsafe extern "C" fn dc_get_similar_chatlist(
context: *mut dc_context_t,
chat_id: u32,
) -> *mut dc_chatlist_t {
if context.is_null() {
eprintln!("ignoring careless call to dc_get_similar_chatlist()");
return ptr::null_mut();
}
let ctx = &*context;
let chat_id = ChatId::new(chat_id);
match block_on(chat_id.get_similar_chatlist(ctx))
.context("failed to get similar chatlist")
.log_err(ctx)
{
Ok(list) => {
let ffi_list = ChatlistWrapper { context, list };
Box::into_raw(Box::new(ffi_list))
}
Err(_) => ptr::null_mut(),
}
}
#[no_mangle]
pub unsafe extern "C" fn dc_estimate_deletion_cnt(
context: *mut dc_context_t,

View File

@@ -39,6 +39,7 @@ pub mod types;
use num_traits::FromPrimitive;
use types::account::Account;
use types::chat::FullChat;
use types::chat_list::ChatListEntry;
use types::contact::ContactObject;
use types::events::Event;
use types::http::HttpResponse;
@@ -566,6 +567,25 @@ impl CommandApi {
Ok(l)
}
/// Returns chats similar to the given one.
async fn get_similar_chatlist_entries(
&self,
account_id: u32,
chat_id: u32,
) -> Result<Vec<ChatListEntry>> {
let ctx = self.get_context(account_id).await?;
let chat_id = ChatId::new(chat_id);
let list = chat_id.get_similar_chatlist(&ctx).await?;
let mut l: Vec<ChatListEntry> = Vec::with_capacity(list.len());
for i in 0..list.len() {
l.push(ChatListEntry(
list.get_chat_id(i)?.to_u32(),
list.get_msg_id(i)?.unwrap_or_default().to_u32(),
));
}
Ok(l)
}
async fn get_chatlist_items_by_entries(
&self,
account_id: u32,

View File

@@ -805,15 +805,30 @@ pub async fn cmdline(context: Context, line: &str, chat_id: &mut ChatId) -> Resu
}
"chatinfo" => {
ensure!(sel_chat.is_some(), "No chat selected.");
let sel_chat_id = sel_chat.as_ref().unwrap().get_id();
let contacts =
chat::get_chat_contacts(&context, sel_chat.as_ref().unwrap().get_id()).await?;
let contacts = chat::get_chat_contacts(&context, sel_chat_id).await?;
println!("Memberlist:");
log_contactlist(&context, &contacts).await?;
println!("{} contacts", contacts.len());
let similar_chats = sel_chat_id.get_similar_chat_ids(&context).await?;
if !similar_chats.is_empty() {
println!("Similar chats: ");
for (similar_chat_id, metric) in similar_chats {
let similar_chat = Chat::load_from_db(&context, similar_chat_id).await?;
println!(
"{} (#{}) {:.1}",
similar_chat.name,
similar_chat_id,
100.0 * metric
);
}
}
println!(
"{} contacts\nLocation streaming: {}",
contacts.len(),
"Location streaming: {}",
location::is_sending_locations_to_chat(
&context,
Some(sel_chat.as_ref().unwrap().get_id())

View File

@@ -13,6 +13,7 @@ use serde::{Deserialize, Serialize};
use crate::aheader::EncryptPreference;
use crate::blob::BlobObject;
use crate::chatlist::Chatlist;
use crate::color::str_to_color;
use crate::config::Config;
use crate::constants::{
@@ -880,6 +881,124 @@ impl ChatId {
Ok(timestamp)
}
/// Returns a list of active similar chat IDs sorted by similarity metric.
///
/// Jaccard similarity coefficient is used to estimate similarity of chat member sets.
///
/// Chat is considered active if something was posted there within the last 42 days.
pub async fn get_similar_chat_ids(self, context: &Context) -> Result<Vec<(ChatId, f64)>> {
// Count number of common members in this and other chats.
let intersection: Vec<(ChatId, f64)> = context
.sql
.query_map(
"SELECT y.chat_id, SUM(x.contact_id = y.contact_id)
FROM chats_contacts as x
JOIN chats_contacts as y
WHERE x.contact_id > 9
AND y.contact_id > 9
AND x.chat_id=?
AND y.chat_id<>x.chat_id
GROUP BY y.chat_id",
(self,),
|row| {
let chat_id: ChatId = row.get(0)?;
let intersection: f64 = row.get(1)?;
Ok((chat_id, intersection))
},
|rows| {
rows.collect::<std::result::Result<Vec<_>, _>>()
.map_err(Into::into)
},
)
.await
.context("failed to calculate member set intersections")?;
let chat_size: HashMap<ChatId, f64> = context
.sql
.query_map(
"SELECT chat_id, count(*) AS n
FROM chats_contacts where contact_id > 9
GROUP BY chat_id",
(),
|row| {
let chat_id: ChatId = row.get(0)?;
let size: f64 = row.get(1)?;
Ok((chat_id, size))
},
|rows| {
rows.collect::<std::result::Result<HashMap<ChatId, f64>, _>>()
.map_err(Into::into)
},
)
.await
.context("failed to count chat member sizes")?;
let our_chat_size = chat_size.get(&self).copied().unwrap_or_default();
let mut chats_with_metrics = Vec::new();
for (chat_id, intersection_size) in intersection {
if intersection_size > 0.0 {
let other_chat_size = chat_size.get(&chat_id).copied().unwrap_or_default();
let union_size = our_chat_size + other_chat_size - intersection_size;
let metric = intersection_size / union_size;
chats_with_metrics.push((chat_id, metric))
}
}
chats_with_metrics.sort_unstable_by(|(chat_id1, metric1), (chat_id2, metric2)| {
metric2
.partial_cmp(metric1)
.unwrap_or(chat_id2.cmp(chat_id1))
});
// Select up to five similar active chats.
let mut res = Vec::new();
let now = time();
for (chat_id, metric) in chats_with_metrics {
if let Some(chat_timestamp) = chat_id.get_timestamp(context).await? {
if now > chat_timestamp + 42 * 24 * 3600 {
// Chat was inactive for 42 days, skip.
continue;
}
}
if metric < 0.1 {
// Chat is unrelated.
break;
}
let chat = Chat::load_from_db(context, chat_id).await?;
if chat.typ != Chattype::Group {
continue;
}
match chat.visibility {
ChatVisibility::Normal | ChatVisibility::Pinned => {}
ChatVisibility::Archived => continue,
}
res.push((chat_id, metric));
if res.len() >= 5 {
break;
}
}
Ok(res)
}
/// Returns similar chats as a [`Chatlist`].
///
/// [`Chatlist`]: crate::chatlist::Chatlist
pub async fn get_similar_chatlist(self, context: &Context) -> Result<Chatlist> {
let chat_ids: Vec<ChatId> = self
.get_similar_chat_ids(context)
.await
.context("failed to get similar chat IDs")?
.into_iter()
.map(|(chat_id, _metric)| chat_id)
.collect();
let chatlist = Chatlist::from_chat_ids(context, &chat_ids).await?;
Ok(chatlist)
}
pub(crate) async fn get_param(self, context: &Context) -> Result<Params> {
let res: Option<String> = context
.sql

View File

@@ -296,6 +296,27 @@ impl Chatlist {
Ok(Chatlist { ids })
}
/// Converts list of chat IDs to a chatlist.
pub(crate) async fn from_chat_ids(context: &Context, chat_ids: &[ChatId]) -> Result<Self> {
let mut ids = Vec::new();
for &chat_id in chat_ids {
let msg_id: Option<MsgId> = context
.sql
.query_get_value(
"SELECT id
FROM msgs
WHERE chat_id=?1
AND (hidden=0 OR state=?2)
ORDER BY timestamp DESC, id DESC LIMIT 1",
(chat_id, MessageState::OutDraft),
)
.await
.with_context(|| format!("failed to get msg ID for chat {}", chat_id))?;
ids.push((chat_id, msg_id));
}
Ok(Chatlist { ids })
}
/// Find out the number of chats.
pub fn len(&self) -> usize {
self.ids.len()