feat: Case-insensitive search for non-ASCII chat and contact names (#7477)

This makes `Contact::get_all()` and `Chatlist::try_load()` case-insensitive for non-ASCII chat and
contact names as well. The same approach as in f6f4ccc6ea "feat:
Case-insensitive search for non-ASCII messages (#5052)" is used: `chats.name_normalized` and
`contacts.name_normalized` colums are added which store lowercased/normalized names (for a contact,
if the name is unset, it's a normalized authname). If a normalized name is the same as the
chat/contact name, it's not stored to reduce the db size. A db migration is added for 10000 random
chats and the same number of the most recently seen contacts, for users it will probably migrate all
chats/contacts and for bots which may have more data it's not important.
This commit is contained in:
iequidoo
2025-11-30 21:16:26 -03:00
committed by iequidoo
parent aa5ee19340
commit dea1b414db
10 changed files with 212 additions and 88 deletions

View File

@@ -10,11 +10,11 @@ use crate::context::{Context, WeakContext};
use crate::events::EventType;
use crate::headerdef::HeaderDef;
use crate::log::warn;
use crate::message::{self, Message, MsgId, Viewtype};
use crate::message::{Message, MsgId, Viewtype};
use crate::mimeparser::{MimeMessage, SystemMessage};
use crate::net::dns::lookup_host_with_cache;
use crate::param::Param;
use crate::tools::time;
use crate::tools::{normalize_text, time};
use anyhow::{Context as _, Result, ensure};
use sdp::SessionDescription;
use serde::Serialize;
@@ -86,7 +86,7 @@ impl CallInfo {
.sql
.execute(
"UPDATE msgs SET txt=?, txt_normalized=? WHERE id=?",
(text, message::normalize_text(text), self.msg.id),
(text, normalize_text(text), self.msg.id),
)
.await?;
Ok(())

View File

@@ -45,7 +45,7 @@ use crate::sync::{self, Sync::*, SyncData};
use crate::tools::{
IsNoneOrEmpty, SystemTime, buf_compress, create_broadcast_secret, create_id,
create_outgoing_rfc724_mid, create_smeared_timestamp, create_smeared_timestamps, get_abs_path,
gm2local_offset, smeared_time, time, truncate_msg_text,
gm2local_offset, normalize_text, smeared_time, time, truncate_msg_text,
};
use crate::webxdc::StatusUpdateSerial;
use crate::{chatlist_events, imap};
@@ -286,10 +286,11 @@ impl ChatId {
let timestamp = cmp::min(timestamp, smeared_time(context));
let row_id =
context.sql.insert(
"INSERT INTO chats (type, name, grpid, blocked, created_timestamp, protected, param) VALUES(?, ?, ?, ?, ?, 0, ?);",
"INSERT INTO chats (type, name, name_normalized, grpid, blocked, created_timestamp, protected, param) VALUES(?, ?, ?, ?, ?, ?, 0, ?)",
(
chattype,
&grpname,
normalize_text(&grpname),
grpid,
create_blocked,
timestamp,
@@ -782,7 +783,7 @@ impl ChatId {
time(),
msg.viewtype,
&msg.text,
message::normalize_text(&msg.text),
normalize_text(&msg.text),
msg.param.to_string(),
msg.in_reply_to.as_deref().unwrap_or_default(),
msg.id,
@@ -823,7 +824,7 @@ impl ChatId {
msg.viewtype,
MessageState::OutDraft,
&msg.text,
message::normalize_text(&msg.text),
normalize_text(&msg.text),
msg.param.to_string(),
1,
msg.in_reply_to.as_deref().unwrap_or_default(),
@@ -1919,7 +1920,7 @@ impl Chat {
msg.viewtype,
msg.state,
msg_text,
message::normalize_text(&msg_text),
normalize_text(&msg_text),
&msg.subject,
msg.param.to_string(),
msg.hidden,
@@ -1970,7 +1971,7 @@ impl Chat {
msg.viewtype,
msg.state,
msg_text,
message::normalize_text(&msg_text),
normalize_text(&msg_text),
&msg.subject,
msg.param.to_string(),
msg.hidden,
@@ -2274,8 +2275,8 @@ async fn update_special_chat_name(
context
.sql
.execute(
"UPDATE chats SET name=? WHERE id=? AND name!=?",
(&name, chat_id, &name),
"UPDATE chats SET name=?, name_normalized=? WHERE id=? AND name!=?",
(&name, normalize_text(&name), chat_id, &name),
)
.await?;
}
@@ -2388,11 +2389,12 @@ impl ChatIdBlocked {
.transaction(move |transaction| {
transaction.execute(
"INSERT INTO chats
(type, name, param, blocked, created_timestamp)
VALUES(?, ?, ?, ?, ?)",
(type, name, name_normalized, param, blocked, created_timestamp)
VALUES(?, ?, ?, ?, ?, ?)",
(
Chattype::Single,
chat_name,
&chat_name,
normalize_text(&chat_name),
params.to_string(),
create_blocked as u8,
smeared_time,
@@ -2944,7 +2946,7 @@ pub(crate) async fn save_text_edit_to_db(
"UPDATE msgs SET txt=?, txt_normalized=?, param=? WHERE id=?",
(
new_text,
message::normalize_text(new_text),
normalize_text(new_text),
original_msg.param.to_string(),
original_msg.id,
),
@@ -3433,9 +3435,15 @@ pub(crate) async fn create_group_ex(
.sql
.insert(
"INSERT INTO chats
(type, name, grpid, param, created_timestamp)
VALUES(?, ?, ?, \'U=1\', ?);",
(Chattype::Group, &chat_name, &grpid, timestamp),
(type, name, name_normalized, grpid, param, created_timestamp)
VALUES(?, ?, ?, ?, \'U=1\', ?)",
(
Chattype::Group,
&chat_name,
normalize_text(&chat_name),
&grpid,
timestamp,
),
)
.await?;
@@ -3519,9 +3527,15 @@ pub(crate) async fn create_out_broadcast_ex(
t.execute(
"INSERT INTO chats
(type, name, grpid, created_timestamp)
VALUES(?, ?, ?, ?);",
(Chattype::OutBroadcast, &chat_name, &grpid, timestamp),
(type, name, name_normalized, grpid, created_timestamp)
VALUES(?, ?, ?, ?, ?)",
(
Chattype::OutBroadcast,
&chat_name,
normalize_text(&chat_name),
&grpid,
timestamp,
),
)?;
let chat_id = ChatId::new(t.last_insert_rowid().try_into()?);
@@ -4094,8 +4108,8 @@ async fn rename_ex(
context
.sql
.execute(
"UPDATE chats SET name=? WHERE id=?;",
(new_name.to_string(), chat_id),
"UPDATE chats SET name=?, name_normalized=? WHERE id=?",
(&new_name, normalize_text(&new_name), chat_id),
)
.await?;
if chat.is_promoted()
@@ -4529,7 +4543,7 @@ pub async fn add_device_msg_with_importance(
msg.viewtype,
state,
&msg.text,
message::normalize_text(&msg.text),
normalize_text(&msg.text),
msg.param.to_string(),
rfc724_mid,
),
@@ -4668,7 +4682,7 @@ pub(crate) async fn add_info_msg_with_cmd(
Viewtype::Text,
MessageState::InNoticed,
text,
message::normalize_text(text),
normalize_text(text),
rfc724_mid,
ephemeral_timer,
param.to_string(),
@@ -4710,7 +4724,7 @@ pub(crate) async fn update_msg_text_and_timestamp(
.sql
.execute(
"UPDATE msgs SET txt=?, txt_normalized=?, timestamp=? WHERE id=?;",
(text, message::normalize_text(text), timestamp, msg_id),
(text, normalize_text(text), timestamp, msg_id),
)
.await?;
context.emit_msgs_changed(chat_id, msg_id);

View File

@@ -185,7 +185,7 @@ impl Chatlist {
warn!(context, "Cannot update special chat names: {err:#}.")
}
let str_like_cmd = format!("%{query}%");
let str_like_cmd = format!("%{}%", query.to_lowercase());
context
.sql
.query_map_vec(
@@ -201,7 +201,7 @@ impl Chatlist {
ORDER BY timestamp DESC, id DESC LIMIT 1)
WHERE c.id>9 AND c.id!=?2
AND c.blocked!=1
AND c.name LIKE ?3
AND IFNULL(c.name_normalized,c.name) LIKE ?3
AND (NOT ?4 OR EXISTS (SELECT 1 FROM msgs m WHERE m.chat_id = c.id AND m.state == ?5 AND hidden=0))
GROUP BY c.id
ORDER BY IFNULL(m.timestamp,c.created_timestamp) DESC, m.id DESC;",
@@ -472,7 +472,7 @@ mod tests {
use crate::chat::save_msgs;
use crate::chat::{
add_contact_to_chat, create_group, get_chat_contacts, remove_contact_from_chat,
send_text_msg,
send_text_msg, set_chat_name,
};
use crate::receive_imf::receive_imf;
use crate::stock_str::StockMessage;
@@ -482,7 +482,7 @@ mod tests {
use std::time::Duration;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_try_load() {
async fn test_try_load() -> Result<()> {
let mut tcm = TestContextManager::new();
let bob = &tcm.bob().await;
let chat_id1 = create_group(bob, "a chat").await.unwrap();
@@ -552,6 +552,15 @@ mod tests {
.await
.unwrap();
assert_eq!(chats.len(), 1);
let chat_id = create_group(bob, "Δ-chat").await.unwrap();
let chats = Chatlist::try_load(bob, 0, Some("δ"), None).await?;
assert_eq!(chats.len(), 1);
assert_eq!(chats.ids[0].0, chat_id);
set_chat_name(bob, chat_id, "abcδe").await?;
let chats = Chatlist::try_load(bob, 0, Some("Δ"), None).await?;
assert_eq!(chats.len(), 1);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]

View File

@@ -36,7 +36,7 @@ use crate::message::MessageState;
use crate::mimeparser::AvatarAction;
use crate::param::{Param, Params};
use crate::sync::{self, Sync::*};
use crate::tools::{SystemTime, duration_to_str, get_abs_path, time, to_lowercase};
use crate::tools::{SystemTime, duration_to_str, get_abs_path, normalize_text, time, to_lowercase};
use crate::{chat, chatlist_events, ensure_and_debug_assert_ne, stock_str};
/// Time during which a contact is considered as seen recently.
@@ -115,9 +115,23 @@ impl ContactId {
let row = context
.sql
.transaction(|transaction| {
let authname;
let name_or_authname = if !name.is_empty() {
name
} else {
authname = transaction.query_row(
"SELECT authname FROM contacts WHERE id=?",
(self,),
|row| {
let authname: String = row.get(0)?;
Ok(authname)
},
)?;
&authname
};
let is_changed = transaction.execute(
"UPDATE contacts SET name=?1 WHERE id=?2 AND name!=?1",
(name, self),
"UPDATE contacts SET name=?1, name_normalized=?2 WHERE id=?3 AND name!=?1",
(name, normalize_text(name_or_authname), self),
)? > 0;
if is_changed {
update_chat_names(context, transaction, self)?;
@@ -967,11 +981,22 @@ impl Contact {
} else {
row_name
};
let new_authname = if update_authname {
name.to_string()
} else {
row_authname
};
transaction.execute(
"UPDATE contacts SET name=?, addr=?, origin=?, authname=? WHERE id=?;",
"UPDATE contacts SET name=?, name_normalized=?, addr=?, origin=?, authname=? WHERE id=?",
(
new_name,
&new_name,
normalize_text(
if !new_name.is_empty() {
&new_name
} else {
&new_authname
}),
if update_addr {
addr.to_string()
} else {
@@ -982,11 +1007,7 @@ impl Contact {
} else {
row_origin
},
if update_authname {
name.to_string()
} else {
row_authname
},
&new_authname,
row_id,
),
)?;
@@ -998,18 +1019,18 @@ impl Contact {
sth_modified = Modifier::Modified;
}
} else {
let update_name = manual;
let update_authname = !manual;
transaction.execute(
"INSERT INTO contacts (name, addr, fingerprint, origin, authname)
VALUES (?, ?, ?, ?, ?);",
"
INSERT INTO contacts (name, name_normalized, addr, fingerprint, origin, authname)
VALUES (?, ?, ?, ?, ?, ?)
",
(
if update_name { &name } else { "" },
if manual { &name } else { "" },
normalize_text(&name),
&addr,
fingerprint,
origin,
if update_authname { &name } else { "" },
if manual { "" } else { &name },
),
)?;
@@ -1112,17 +1133,19 @@ impl Contact {
Origin::IncomingReplyTo
};
if query.is_some() {
let s3str_like_cmd = format!("%{}%", query.unwrap_or(""));
let s3str_like_cmd = format!("%{}%", query.unwrap_or("").to_lowercase());
context
.sql
.query_map(
"SELECT c.id, c.addr FROM contacts c
WHERE c.id>?
AND (c.fingerprint='')=?
AND c.origin>=? \
AND c.blocked=0 \
AND (iif(c.name='',c.authname,c.name) LIKE ? OR c.addr LIKE ?) \
ORDER BY c.last_seen DESC, c.id DESC;",
"
SELECT c.id, c.addr FROM contacts c
WHERE c.id>?
AND (c.fingerprint='')=?
AND c.origin>=?
AND c.blocked=0
AND (IFNULL(c.name_normalized,IIF(c.name='',c.authname,c.name)) LIKE ? OR c.addr LIKE ?)
ORDER BY c.last_seen DESC, c.id DESC
",
(
ContactId::LAST_SPECIAL,
flag_address,
@@ -1249,8 +1272,18 @@ impl Contact {
};
// Always do an update in case the blocking is reset or name is changed.
transaction.execute(
"UPDATE contacts SET name=?, origin=?, blocked=1, fingerprint=? WHERE addr=?",
(&name, Origin::MailinglistAddress, fingerprint, &grpid),
"
UPDATE contacts
SET name=?, name_normalized=IIF(?1='',name_normalized,?), origin=?, blocked=1, fingerprint=?
WHERE addr=?
",
(
&name,
normalize_text(&name),
Origin::MailinglistAddress,
fingerprint,
&grpid,
),
)?;
}
Ok(())
@@ -1725,8 +1758,8 @@ fn update_chat_names(
};
let count = transaction.execute(
"UPDATE chats SET name=?1 WHERE id=?2 AND name!=?1",
(chat_name, chat_id),
"UPDATE chats SET name=?1, name_normalized=?2 WHERE id=?3 AND name!=?1",
(&chat_name, normalize_text(&chat_name), chat_id),
)?;
if count > 0 {

View File

@@ -60,16 +60,16 @@ async fn test_get_contacts() -> Result<()> {
let context = tcm.bob().await;
let alice = tcm.alice().await;
alice
.set_config(Config::Displayname, Some("MyName"))
.set_config(Config::Displayname, Some("MyNameIsΔ"))
.await?;
// Alice is not in the contacts yet.
let contacts = Contact::get_all(&context.ctx, 0, Some("Alice")).await?;
assert_eq!(contacts.len(), 0);
let contacts = Contact::get_all(&context.ctx, 0, Some("MyName")).await?;
let contacts = Contact::get_all(&context.ctx, 0, Some("MyNameIsΔ")).await?;
assert_eq!(contacts.len(), 0);
let claire_id = Contact::create(&context, "someone", "claire@example.org").await?;
let claire_id = Contact::create(&context, "Δ-someone", "claire@example.org").await?;
let dave_id = Contact::create(&context, "", "dave@example.org").await?;
let id = context.add_or_lookup_contact_id(&alice).await;
@@ -77,8 +77,8 @@ async fn test_get_contacts() -> Result<()> {
let contact = Contact::get_by_id(&context, id).await.unwrap();
assert_eq!(contact.get_name(), "");
assert_eq!(contact.get_authname(), "MyName");
assert_eq!(contact.get_display_name(), "MyName");
assert_eq!(contact.get_authname(), "MyNameIsΔ");
assert_eq!(contact.get_display_name(), "MyNameIsΔ");
// Search by name.
let contacts = Contact::get_all(&context, 0, Some("myname")).await?;
@@ -93,12 +93,12 @@ async fn test_get_contacts() -> Result<()> {
let contacts = Contact::get_all(&context, 0, Some("Foobar")).await?;
assert_eq!(contacts.len(), 0);
// Set Alice name to "someone" manually.
id.set_name(&context, "someone").await?;
// Set Alice name manually.
id.set_name(&context, "Δ-someone").await?;
let contact = Contact::get_by_id(&context.ctx, id).await.unwrap();
assert_eq!(contact.get_name(), "someone");
assert_eq!(contact.get_authname(), "MyName");
assert_eq!(contact.get_display_name(), "someone");
assert_eq!(contact.get_name(), "Δ-someone");
assert_eq!(contact.get_authname(), "MyNameIsΔ");
assert_eq!(contact.get_display_name(), "Δ-someone");
// Not searchable by authname, because it is not displayed.
let contacts = Contact::get_all(&context, 0, Some("MyName")).await?;
@@ -108,7 +108,9 @@ async fn test_get_contacts() -> Result<()> {
info!(&context, "add_self={add_self}");
// Search key-contacts by display name (same as manually set name).
let contacts = Contact::get_all(&context.ctx, add_self, Some("someone")).await?;
let contacts = Contact::get_all(&context.ctx, add_self, Some("Δ-someone")).await?;
assert_eq!(contacts, vec![id]);
let contacts = Contact::get_all(&context.ctx, add_self, Some("δ-someon")).await?;
assert_eq!(contacts, vec![id]);
// Get all key-contacts.
@@ -120,7 +122,7 @@ async fn test_get_contacts() -> Result<()> {
}
// Search address-contacts by display name.
let contacts = Contact::get_all(&context, constants::DC_GCL_ADDRESS, Some("someone")).await?;
let contacts = Contact::get_all(&context, constants::DC_GCL_ADDRESS, Some("Δ-someone")).await?;
assert_eq!(contacts, vec![claire_id]);
// Get all address-contacts. Newer contacts go first.
@@ -134,6 +136,16 @@ async fn test_get_contacts() -> Result<()> {
.await?;
assert_eq!(contacts, vec![dave_id, claire_id, ContactId::SELF]);
// Reset the user-provided name for Alice.
id.set_name(&context, "").await?;
let contact = Contact::get_by_id(&context.ctx, id).await.unwrap();
assert_eq!(contact.get_name(), "");
assert_eq!(contact.get_authname(), "MyNameIsΔ");
assert_eq!(contact.get_display_name(), "MyNameIsΔ");
let contacts = Contact::get_all(&context, 0, Some("MyName")).await?;
assert_eq!(contacts.len(), 1);
let contacts = Contact::get_all(&context, 0, Some("δ")).await?;
assert_eq!(contacts.len(), 1);
Ok(())
}

View File

@@ -2248,14 +2248,5 @@ impl Viewtype {
}
}
/// Returns text for storing in the `msgs.txt_normalized` column (to make case-insensitive search
/// possible for non-ASCII messages).
pub(crate) fn normalize_text(text: &str) -> Option<String> {
if text.is_ascii() {
return None;
};
Some(text.to_lowercase()).filter(|t| t != text)
}
#[cfg(test)]
mod message_tests;

View File

@@ -43,7 +43,9 @@ use crate::simplify;
use crate::stats::STATISTICS_BOT_EMAIL;
use crate::stock_str;
use crate::sync::Sync::*;
use crate::tools::{self, buf_compress, remove_subject_prefix, validate_broadcast_secret};
use crate::tools::{
self, buf_compress, normalize_text, remove_subject_prefix, validate_broadcast_secret,
};
use crate::{chatlist_events, ensure_and_debug_assert, ensure_and_debug_assert_eq, location};
/// This is the struct that is returned after receiving one email (aka MIME message).
@@ -2094,7 +2096,7 @@ RETURNING id
if trash { MessageState::Undefined } else { state },
if trash { MessengerMessage::No } else { is_dc_message },
if trash || hidden { "" } else { msg },
if trash || hidden { None } else { message::normalize_text(msg) },
if trash || hidden { None } else { normalize_text(msg) },
if trash || hidden { "" } else { &subject },
if trash {
"".to_string()
@@ -3091,7 +3093,10 @@ async fn apply_chat_name_and_avatar_changes(
info!(context, "Updating grpname for chat {}.", chat.id);
context
.sql
.execute("UPDATE chats SET name=? WHERE id=?;", (grpname, chat.id))
.execute(
"UPDATE chats SET name=?, name_normalized=? WHERE id=?",
(grpname, normalize_text(grpname), chat.id),
)
.await?;
*send_event_chat_modified = true;
}
@@ -3380,7 +3385,10 @@ async fn apply_mailinglist_changes(
info!(context, "Updating listname for chat {chat_id}.");
context
.sql
.execute("UPDATE chats SET name=? WHERE id=?;", (new_name, chat_id))
.execute(
"UPDATE chats SET name=?, name_normalized=? WHERE id=?",
(&new_name, normalize_text(&new_name), chat_id),
)
.await?;
context.emit_event(EventType::ChatModified(chat_id));
}

View File

@@ -19,7 +19,7 @@ use crate::log::warn;
use crate::message::MsgId;
use crate::provider::get_provider_info;
use crate::sql::Sql;
use crate::tools::{Time, inc_and_check, time_elapsed};
use crate::tools::{Time, inc_and_check, normalize_text, time_elapsed};
use crate::transport::ConfiguredLoginParam;
const DBVERSION: i32 = 68;
@@ -1454,6 +1454,56 @@ CREATE INDEX imap_sync_index ON imap_sync(transport_id, folder);
.await?;
}
inc_and_check(&mut migration_version, 143)?;
if dbversion < migration_version {
let trans_fn = |t: &mut rusqlite::Transaction| {
t.execute_batch(
"
ALTER TABLE chats ADD COLUMN name_normalized TEXT;
ALTER TABLE contacts ADD COLUMN name_normalized TEXT;
",
)?;
let mut stmt = t.prepare("UPDATE chats SET name_normalized=? WHERE id=?")?;
for res in t
.prepare("SELECT id, name FROM chats LIMIT 10000")?
.query_map((), |row| {
let id: u32 = row.get(0)?;
let name: String = row.get(1)?;
Ok((id, name))
})?
{
let (id, name) = res?;
if let Some(name_normalized) = normalize_text(&name) {
stmt.execute((name_normalized, id))?;
}
}
let mut stmt = t.prepare("UPDATE contacts SET name_normalized=? WHERE id=?")?;
for res in t
.prepare(
"
SELECT id, IIF(name='', authname, name) FROM contacts
ORDER BY last_seen DESC LIMIT 10000
",
)?
.query_map((), |row| {
let id: u32 = row.get(0)?;
let name: String = row.get(1)?;
Ok((id, name))
})?
{
let (id, name) = res?;
if let Some(name_normalized) = normalize_text(&name) {
stmt.execute((name_normalized, id))?;
}
}
Ok(())
};
sql.execute_migration_transaction(trans_fn, migration_version)
.await?;
}
let new_version = sql
.get_raw_config_int(VERSION_CFG)
.await?

View File

@@ -160,9 +160,7 @@ async fn test_key_contacts_migration_verified() -> Result<()> {
"#,
)?)).await?;
STOP_MIGRATIONS_AT
.scope(133, t.sql.run_migrations(&t))
.await?;
t.sql.run_migrations(&t).await?;
// Hidden address-contact can't be looked up.
assert!(

View File

@@ -779,6 +779,15 @@ pub(crate) fn to_lowercase(s: &str) -> Cow<'_, str> {
}
}
/// Returns text for storing in special db columns to make case-insensitive search possible for
/// non-ASCII messages, chat and contact names.
pub(crate) fn normalize_text(text: &str) -> Option<String> {
if text.is_ascii() {
return None;
};
Some(text.to_lowercase()).filter(|t| t != text)
}
/// Increments `*t` and checks that it equals to `expected` after that.
pub(crate) fn inc_and_check<T: PrimInt + AddAssign + std::fmt::Debug>(
t: &mut T,