mirror of
https://github.com/chatmail/core.git
synced 2026-04-17 21:46:35 +03:00
feat: Case-insensitive search for non-ASCII messages (#5052)
SQLite search with `LIKE` is case-insensitive only for ASCII chars. To make it case-insensitive for all messages, create a new column `msgs.txt_normalized` defaulting to `NULL` (so we do not bump up the database size in a migration) and storing lowercased/normalized text there when the row is created/updated. When doing a search, search over `IFNULL(txt_normalized, txt)`.
This commit is contained in:
27
src/chat.rs
27
src/chat.rs
@@ -927,12 +927,13 @@ impl ChatId {
|
||||
.sql
|
||||
.execute(
|
||||
"UPDATE msgs
|
||||
SET timestamp=?,type=?,txt=?, param=?,mime_in_reply_to=?
|
||||
SET timestamp=?,type=?,txt=?,txt_normalized=?,param=?,mime_in_reply_to=?
|
||||
WHERE id=?;",
|
||||
(
|
||||
time(),
|
||||
msg.viewtype,
|
||||
&msg.text,
|
||||
message::normalize_text(&msg.text),
|
||||
msg.param.to_string(),
|
||||
msg.in_reply_to.as_deref().unwrap_or_default(),
|
||||
msg.id,
|
||||
@@ -956,10 +957,11 @@ impl ChatId {
|
||||
type,
|
||||
state,
|
||||
txt,
|
||||
txt_normalized,
|
||||
param,
|
||||
hidden,
|
||||
mime_in_reply_to)
|
||||
VALUES (?,?,?, ?,?,?,?,?,?);",
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?);",
|
||||
(
|
||||
self,
|
||||
ContactId::SELF,
|
||||
@@ -967,6 +969,7 @@ impl ChatId {
|
||||
msg.viewtype,
|
||||
MessageState::OutDraft,
|
||||
&msg.text,
|
||||
message::normalize_text(&msg.text),
|
||||
msg.param.to_string(),
|
||||
1,
|
||||
msg.in_reply_to.as_deref().unwrap_or_default(),
|
||||
@@ -2075,7 +2078,7 @@ impl Chat {
|
||||
.execute(
|
||||
"UPDATE msgs
|
||||
SET rfc724_mid=?, chat_id=?, from_id=?, to_id=?, timestamp=?, type=?,
|
||||
state=?, txt=?, subject=?, param=?,
|
||||
state=?, txt=?, txt_normalized=?, subject=?, param=?,
|
||||
hidden=?, mime_in_reply_to=?, mime_references=?, mime_modified=?,
|
||||
mime_headers=?, mime_compressed=1, location_id=?, ephemeral_timer=?,
|
||||
ephemeral_timestamp=?
|
||||
@@ -2089,6 +2092,7 @@ impl Chat {
|
||||
msg.viewtype,
|
||||
msg.state,
|
||||
msg.text,
|
||||
message::normalize_text(&msg.text),
|
||||
&msg.subject,
|
||||
msg.param.to_string(),
|
||||
msg.hidden,
|
||||
@@ -2117,6 +2121,7 @@ impl Chat {
|
||||
type,
|
||||
state,
|
||||
txt,
|
||||
txt_normalized,
|
||||
subject,
|
||||
param,
|
||||
hidden,
|
||||
@@ -2128,7 +2133,7 @@ impl Chat {
|
||||
location_id,
|
||||
ephemeral_timer,
|
||||
ephemeral_timestamp)
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1,?,?,?);",
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1,?,?,?);",
|
||||
params_slice![
|
||||
msg.rfc724_mid,
|
||||
msg.chat_id,
|
||||
@@ -2138,6 +2143,7 @@ impl Chat {
|
||||
msg.viewtype,
|
||||
msg.state,
|
||||
msg.text,
|
||||
message::normalize_text(&msg.text),
|
||||
&msg.subject,
|
||||
msg.param.to_string(),
|
||||
msg.hidden,
|
||||
@@ -4370,9 +4376,10 @@ pub async fn add_device_msg_with_importance(
|
||||
timestamp_rcvd,
|
||||
type,state,
|
||||
txt,
|
||||
txt_normalized,
|
||||
param,
|
||||
rfc724_mid)
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?);",
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?,?);",
|
||||
(
|
||||
chat_id,
|
||||
ContactId::DEVICE,
|
||||
@@ -4383,6 +4390,7 @@ pub async fn add_device_msg_with_importance(
|
||||
msg.viewtype,
|
||||
state,
|
||||
&msg.text,
|
||||
message::normalize_text(&msg.text),
|
||||
msg.param.to_string(),
|
||||
rfc724_mid,
|
||||
),
|
||||
@@ -4486,8 +4494,8 @@ pub(crate) async fn add_info_msg_with_cmd(
|
||||
|
||||
let row_id =
|
||||
context.sql.insert(
|
||||
"INSERT INTO msgs (chat_id,from_id,to_id,timestamp,timestamp_sent,timestamp_rcvd,type,state,txt,rfc724_mid,ephemeral_timer, param,mime_in_reply_to)
|
||||
VALUES (?,?,?, ?,?,?,?,?, ?,?,?, ?,?);",
|
||||
"INSERT INTO msgs (chat_id,from_id,to_id,timestamp,timestamp_sent,timestamp_rcvd,type,state,txt,txt_normalized,rfc724_mid,ephemeral_timer,param,mime_in_reply_to)
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);",
|
||||
(
|
||||
chat_id,
|
||||
from_id.unwrap_or(ContactId::INFO),
|
||||
@@ -4498,6 +4506,7 @@ pub(crate) async fn add_info_msg_with_cmd(
|
||||
Viewtype::Text,
|
||||
MessageState::InNoticed,
|
||||
text,
|
||||
message::normalize_text(text),
|
||||
rfc724_mid,
|
||||
ephemeral_timer,
|
||||
param.to_string(),
|
||||
@@ -4542,8 +4551,8 @@ pub(crate) async fn update_msg_text_and_timestamp(
|
||||
context
|
||||
.sql
|
||||
.execute(
|
||||
"UPDATE msgs SET txt=?, timestamp=? WHERE id=?;",
|
||||
(text, timestamp, msg_id),
|
||||
"UPDATE msgs SET txt=?, txt_normalized=?, timestamp=? WHERE id=?;",
|
||||
(text, message::normalize_text(text), timestamp, msg_id),
|
||||
)
|
||||
.await?;
|
||||
context.emit_msgs_changed(chat_id, msg_id);
|
||||
|
||||
@@ -1259,12 +1259,12 @@ impl Context {
|
||||
Ok(list)
|
||||
}
|
||||
|
||||
/// Searches for messages containing the query string.
|
||||
/// Searches for messages containing the query string case-insensitively.
|
||||
///
|
||||
/// If `chat_id` is provided this searches only for messages in this chat, if `chat_id`
|
||||
/// is `None` this searches messages from all chats.
|
||||
pub async fn search_msgs(&self, chat_id: Option<ChatId>, query: &str) -> Result<Vec<MsgId>> {
|
||||
let real_query = query.trim();
|
||||
let real_query = query.trim().to_lowercase();
|
||||
if real_query.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
@@ -1280,7 +1280,7 @@ impl Context {
|
||||
WHERE m.chat_id=?
|
||||
AND m.hidden=0
|
||||
AND ct.blocked=0
|
||||
AND txt LIKE ?
|
||||
AND IFNULL(txt_normalized, txt) LIKE ?
|
||||
ORDER BY m.timestamp,m.id;",
|
||||
(chat_id, str_like_in_text),
|
||||
|row| row.get::<_, MsgId>("id"),
|
||||
@@ -1316,7 +1316,7 @@ impl Context {
|
||||
AND m.hidden=0
|
||||
AND c.blocked!=1
|
||||
AND ct.blocked=0
|
||||
AND m.txt LIKE ?
|
||||
AND IFNULL(txt_normalized, txt) LIKE ?
|
||||
ORDER BY m.id DESC LIMIT 1000",
|
||||
(str_like_in_text,),
|
||||
|row| row.get::<_, MsgId>("id"),
|
||||
@@ -1721,6 +1721,8 @@ mod tests {
|
||||
msg2.set_text("barbaz".to_string());
|
||||
send_msg(&alice, chat.id, &mut msg2).await?;
|
||||
|
||||
alice.send_text(chat.id, "Δ-Chat").await;
|
||||
|
||||
// Global search with a part of text finds the message.
|
||||
let res = alice.search_msgs(None, "ob").await?;
|
||||
assert_eq!(res.len(), 1);
|
||||
@@ -1733,6 +1735,12 @@ mod tests {
|
||||
assert_eq!(res.first(), Some(&msg2.id));
|
||||
assert_eq!(res.get(1), Some(&msg1.id));
|
||||
|
||||
// Search is case-insensitive.
|
||||
for chat_id in [None, Some(chat.id)] {
|
||||
let res = alice.search_msgs(chat_id, "δ-chat").await?;
|
||||
assert_eq!(res.len(), 1);
|
||||
}
|
||||
|
||||
// Global search with longer text does not find any message.
|
||||
let res = alice.search_msgs(None, "foobarbaz").await?;
|
||||
assert!(res.is_empty());
|
||||
|
||||
@@ -447,7 +447,7 @@ pub(crate) async fn delete_expired_messages(context: &Context, now: i64) -> Resu
|
||||
for (msg_id, chat_id, viewtype, location_id) in rows {
|
||||
transaction.execute(
|
||||
"UPDATE msgs
|
||||
SET chat_id=?, txt='', subject='', txt_raw='',
|
||||
SET chat_id=?, txt='', txt_normalized=NULL, subject='', txt_raw='',
|
||||
mime_headers='', from_id=0, to_id=0, param=''
|
||||
WHERE id=?",
|
||||
(DC_CHAT_ID_TRASH, msg_id),
|
||||
|
||||
@@ -113,7 +113,7 @@ impl MsgId {
|
||||
r#"
|
||||
UPDATE msgs
|
||||
SET
|
||||
chat_id=?, txt='',
|
||||
chat_id=?, txt='', txt_normalized=NULL,
|
||||
subject='', txt_raw='',
|
||||
mime_headers='',
|
||||
from_id=0, to_id=0,
|
||||
@@ -2072,6 +2072,15 @@ impl Viewtype {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns text for storing in the `msgs.txt_normalized` column (to make case-insensitive search
|
||||
/// possible for non-ASCII messages).
|
||||
pub(crate) fn normalize_text(text: &str) -> Option<String> {
|
||||
if text.is_ascii() {
|
||||
return None;
|
||||
};
|
||||
Some(text.to_lowercase()).filter(|t| t != text)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use num_traits::FromPrimitive;
|
||||
|
||||
@@ -1540,7 +1540,7 @@ INSERT INTO msgs
|
||||
rfc724_mid, chat_id,
|
||||
from_id, to_id, timestamp, timestamp_sent,
|
||||
timestamp_rcvd, type, state, msgrmsg,
|
||||
txt, subject, txt_raw, param, hidden,
|
||||
txt, txt_normalized, subject, txt_raw, param, hidden,
|
||||
bytes, mime_headers, mime_compressed, mime_in_reply_to,
|
||||
mime_references, mime_modified, error, ephemeral_timer,
|
||||
ephemeral_timestamp, download_state, hop_info
|
||||
@@ -1550,7 +1550,7 @@ INSERT INTO msgs
|
||||
?, ?, ?, ?,
|
||||
?, ?, ?, ?,
|
||||
?, ?, ?, ?, ?,
|
||||
?, ?, ?, ?, 1,
|
||||
?, ?, ?, ?, ?, 1,
|
||||
?, ?, ?, ?,
|
||||
?, ?, ?, ?
|
||||
)
|
||||
@@ -1558,7 +1558,8 @@ ON CONFLICT (id) DO UPDATE
|
||||
SET rfc724_mid=excluded.rfc724_mid, chat_id=excluded.chat_id,
|
||||
from_id=excluded.from_id, to_id=excluded.to_id, timestamp_sent=excluded.timestamp_sent,
|
||||
type=excluded.type, msgrmsg=excluded.msgrmsg,
|
||||
txt=excluded.txt, subject=excluded.subject, txt_raw=excluded.txt_raw, param=excluded.param,
|
||||
txt=excluded.txt, txt_normalized=excluded.txt_normalized, subject=excluded.subject,
|
||||
txt_raw=excluded.txt_raw, param=excluded.param,
|
||||
hidden=excluded.hidden,bytes=excluded.bytes, mime_headers=excluded.mime_headers,
|
||||
mime_compressed=excluded.mime_compressed, mime_in_reply_to=excluded.mime_in_reply_to,
|
||||
mime_references=excluded.mime_references, mime_modified=excluded.mime_modified, error=excluded.error, ephemeral_timer=excluded.ephemeral_timer,
|
||||
@@ -1578,6 +1579,7 @@ RETURNING id
|
||||
state,
|
||||
is_dc_message,
|
||||
if trash { "" } else { msg },
|
||||
if trash { None } else { message::normalize_text(msg) },
|
||||
if trash { "" } else { &subject },
|
||||
// txt_raw might contain invalid utf8
|
||||
if trash { "" } else { &txt_raw },
|
||||
|
||||
@@ -937,6 +937,11 @@ CREATE INDEX msgs_status_updates_index2 ON msgs_status_updates (uid);
|
||||
.await?;
|
||||
}
|
||||
|
||||
if dbversion < 115 {
|
||||
sql.execute_migration("ALTER TABLE msgs ADD COLUMN txt_normalized TEXT", 115)
|
||||
.await?;
|
||||
}
|
||||
|
||||
let new_version = sql
|
||||
.get_raw_config_int(VERSION_CFG)
|
||||
.await?
|
||||
|
||||
Reference in New Issue
Block a user