mirror of
https://github.com/chatmail/core.git
synced 2026-05-07 17:06:35 +03:00
feat: Case-insensitive search for non-ASCII messages (#5052)
SQLite search with `LIKE` is case-insensitive only for ASCII chars. To make it case-insensitive for all messages, create a new column `msgs.txt_normalized` defaulting to `NULL` (so we do not bump up the database size in a migration) and storing lowercased/normalized text there when the row is created/updated. When doing a search, search over `IFNULL(txt_normalized, txt)`.
This commit is contained in:
27
src/chat.rs
27
src/chat.rs
@@ -927,12 +927,13 @@ impl ChatId {
|
|||||||
.sql
|
.sql
|
||||||
.execute(
|
.execute(
|
||||||
"UPDATE msgs
|
"UPDATE msgs
|
||||||
SET timestamp=?,type=?,txt=?, param=?,mime_in_reply_to=?
|
SET timestamp=?,type=?,txt=?,txt_normalized=?,param=?,mime_in_reply_to=?
|
||||||
WHERE id=?;",
|
WHERE id=?;",
|
||||||
(
|
(
|
||||||
time(),
|
time(),
|
||||||
msg.viewtype,
|
msg.viewtype,
|
||||||
&msg.text,
|
&msg.text,
|
||||||
|
message::normalize_text(&msg.text),
|
||||||
msg.param.to_string(),
|
msg.param.to_string(),
|
||||||
msg.in_reply_to.as_deref().unwrap_or_default(),
|
msg.in_reply_to.as_deref().unwrap_or_default(),
|
||||||
msg.id,
|
msg.id,
|
||||||
@@ -956,10 +957,11 @@ impl ChatId {
|
|||||||
type,
|
type,
|
||||||
state,
|
state,
|
||||||
txt,
|
txt,
|
||||||
|
txt_normalized,
|
||||||
param,
|
param,
|
||||||
hidden,
|
hidden,
|
||||||
mime_in_reply_to)
|
mime_in_reply_to)
|
||||||
VALUES (?,?,?, ?,?,?,?,?,?);",
|
VALUES (?,?,?,?,?,?,?,?,?,?);",
|
||||||
(
|
(
|
||||||
self,
|
self,
|
||||||
ContactId::SELF,
|
ContactId::SELF,
|
||||||
@@ -967,6 +969,7 @@ impl ChatId {
|
|||||||
msg.viewtype,
|
msg.viewtype,
|
||||||
MessageState::OutDraft,
|
MessageState::OutDraft,
|
||||||
&msg.text,
|
&msg.text,
|
||||||
|
message::normalize_text(&msg.text),
|
||||||
msg.param.to_string(),
|
msg.param.to_string(),
|
||||||
1,
|
1,
|
||||||
msg.in_reply_to.as_deref().unwrap_or_default(),
|
msg.in_reply_to.as_deref().unwrap_or_default(),
|
||||||
@@ -2075,7 +2078,7 @@ impl Chat {
|
|||||||
.execute(
|
.execute(
|
||||||
"UPDATE msgs
|
"UPDATE msgs
|
||||||
SET rfc724_mid=?, chat_id=?, from_id=?, to_id=?, timestamp=?, type=?,
|
SET rfc724_mid=?, chat_id=?, from_id=?, to_id=?, timestamp=?, type=?,
|
||||||
state=?, txt=?, subject=?, param=?,
|
state=?, txt=?, txt_normalized=?, subject=?, param=?,
|
||||||
hidden=?, mime_in_reply_to=?, mime_references=?, mime_modified=?,
|
hidden=?, mime_in_reply_to=?, mime_references=?, mime_modified=?,
|
||||||
mime_headers=?, mime_compressed=1, location_id=?, ephemeral_timer=?,
|
mime_headers=?, mime_compressed=1, location_id=?, ephemeral_timer=?,
|
||||||
ephemeral_timestamp=?
|
ephemeral_timestamp=?
|
||||||
@@ -2089,6 +2092,7 @@ impl Chat {
|
|||||||
msg.viewtype,
|
msg.viewtype,
|
||||||
msg.state,
|
msg.state,
|
||||||
msg.text,
|
msg.text,
|
||||||
|
message::normalize_text(&msg.text),
|
||||||
&msg.subject,
|
&msg.subject,
|
||||||
msg.param.to_string(),
|
msg.param.to_string(),
|
||||||
msg.hidden,
|
msg.hidden,
|
||||||
@@ -2117,6 +2121,7 @@ impl Chat {
|
|||||||
type,
|
type,
|
||||||
state,
|
state,
|
||||||
txt,
|
txt,
|
||||||
|
txt_normalized,
|
||||||
subject,
|
subject,
|
||||||
param,
|
param,
|
||||||
hidden,
|
hidden,
|
||||||
@@ -2128,7 +2133,7 @@ impl Chat {
|
|||||||
location_id,
|
location_id,
|
||||||
ephemeral_timer,
|
ephemeral_timer,
|
||||||
ephemeral_timestamp)
|
ephemeral_timestamp)
|
||||||
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1,?,?,?);",
|
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1,?,?,?);",
|
||||||
params_slice![
|
params_slice![
|
||||||
msg.rfc724_mid,
|
msg.rfc724_mid,
|
||||||
msg.chat_id,
|
msg.chat_id,
|
||||||
@@ -2138,6 +2143,7 @@ impl Chat {
|
|||||||
msg.viewtype,
|
msg.viewtype,
|
||||||
msg.state,
|
msg.state,
|
||||||
msg.text,
|
msg.text,
|
||||||
|
message::normalize_text(&msg.text),
|
||||||
&msg.subject,
|
&msg.subject,
|
||||||
msg.param.to_string(),
|
msg.param.to_string(),
|
||||||
msg.hidden,
|
msg.hidden,
|
||||||
@@ -4370,9 +4376,10 @@ pub async fn add_device_msg_with_importance(
|
|||||||
timestamp_rcvd,
|
timestamp_rcvd,
|
||||||
type,state,
|
type,state,
|
||||||
txt,
|
txt,
|
||||||
|
txt_normalized,
|
||||||
param,
|
param,
|
||||||
rfc724_mid)
|
rfc724_mid)
|
||||||
VALUES (?,?,?,?,?,?,?,?,?,?,?);",
|
VALUES (?,?,?,?,?,?,?,?,?,?,?,?);",
|
||||||
(
|
(
|
||||||
chat_id,
|
chat_id,
|
||||||
ContactId::DEVICE,
|
ContactId::DEVICE,
|
||||||
@@ -4383,6 +4390,7 @@ pub async fn add_device_msg_with_importance(
|
|||||||
msg.viewtype,
|
msg.viewtype,
|
||||||
state,
|
state,
|
||||||
&msg.text,
|
&msg.text,
|
||||||
|
message::normalize_text(&msg.text),
|
||||||
msg.param.to_string(),
|
msg.param.to_string(),
|
||||||
rfc724_mid,
|
rfc724_mid,
|
||||||
),
|
),
|
||||||
@@ -4486,8 +4494,8 @@ pub(crate) async fn add_info_msg_with_cmd(
|
|||||||
|
|
||||||
let row_id =
|
let row_id =
|
||||||
context.sql.insert(
|
context.sql.insert(
|
||||||
"INSERT INTO msgs (chat_id,from_id,to_id,timestamp,timestamp_sent,timestamp_rcvd,type,state,txt,rfc724_mid,ephemeral_timer, param,mime_in_reply_to)
|
"INSERT INTO msgs (chat_id,from_id,to_id,timestamp,timestamp_sent,timestamp_rcvd,type,state,txt,txt_normalized,rfc724_mid,ephemeral_timer,param,mime_in_reply_to)
|
||||||
VALUES (?,?,?, ?,?,?,?,?, ?,?,?, ?,?);",
|
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);",
|
||||||
(
|
(
|
||||||
chat_id,
|
chat_id,
|
||||||
from_id.unwrap_or(ContactId::INFO),
|
from_id.unwrap_or(ContactId::INFO),
|
||||||
@@ -4498,6 +4506,7 @@ pub(crate) async fn add_info_msg_with_cmd(
|
|||||||
Viewtype::Text,
|
Viewtype::Text,
|
||||||
MessageState::InNoticed,
|
MessageState::InNoticed,
|
||||||
text,
|
text,
|
||||||
|
message::normalize_text(text),
|
||||||
rfc724_mid,
|
rfc724_mid,
|
||||||
ephemeral_timer,
|
ephemeral_timer,
|
||||||
param.to_string(),
|
param.to_string(),
|
||||||
@@ -4542,8 +4551,8 @@ pub(crate) async fn update_msg_text_and_timestamp(
|
|||||||
context
|
context
|
||||||
.sql
|
.sql
|
||||||
.execute(
|
.execute(
|
||||||
"UPDATE msgs SET txt=?, timestamp=? WHERE id=?;",
|
"UPDATE msgs SET txt=?, txt_normalized=?, timestamp=? WHERE id=?;",
|
||||||
(text, timestamp, msg_id),
|
(text, message::normalize_text(text), timestamp, msg_id),
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
context.emit_msgs_changed(chat_id, msg_id);
|
context.emit_msgs_changed(chat_id, msg_id);
|
||||||
|
|||||||
@@ -1259,12 +1259,12 @@ impl Context {
|
|||||||
Ok(list)
|
Ok(list)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Searches for messages containing the query string.
|
/// Searches for messages containing the query string case-insensitively.
|
||||||
///
|
///
|
||||||
/// If `chat_id` is provided this searches only for messages in this chat, if `chat_id`
|
/// If `chat_id` is provided this searches only for messages in this chat, if `chat_id`
|
||||||
/// is `None` this searches messages from all chats.
|
/// is `None` this searches messages from all chats.
|
||||||
pub async fn search_msgs(&self, chat_id: Option<ChatId>, query: &str) -> Result<Vec<MsgId>> {
|
pub async fn search_msgs(&self, chat_id: Option<ChatId>, query: &str) -> Result<Vec<MsgId>> {
|
||||||
let real_query = query.trim();
|
let real_query = query.trim().to_lowercase();
|
||||||
if real_query.is_empty() {
|
if real_query.is_empty() {
|
||||||
return Ok(Vec::new());
|
return Ok(Vec::new());
|
||||||
}
|
}
|
||||||
@@ -1280,7 +1280,7 @@ impl Context {
|
|||||||
WHERE m.chat_id=?
|
WHERE m.chat_id=?
|
||||||
AND m.hidden=0
|
AND m.hidden=0
|
||||||
AND ct.blocked=0
|
AND ct.blocked=0
|
||||||
AND txt LIKE ?
|
AND IFNULL(txt_normalized, txt) LIKE ?
|
||||||
ORDER BY m.timestamp,m.id;",
|
ORDER BY m.timestamp,m.id;",
|
||||||
(chat_id, str_like_in_text),
|
(chat_id, str_like_in_text),
|
||||||
|row| row.get::<_, MsgId>("id"),
|
|row| row.get::<_, MsgId>("id"),
|
||||||
@@ -1316,7 +1316,7 @@ impl Context {
|
|||||||
AND m.hidden=0
|
AND m.hidden=0
|
||||||
AND c.blocked!=1
|
AND c.blocked!=1
|
||||||
AND ct.blocked=0
|
AND ct.blocked=0
|
||||||
AND m.txt LIKE ?
|
AND IFNULL(txt_normalized, txt) LIKE ?
|
||||||
ORDER BY m.id DESC LIMIT 1000",
|
ORDER BY m.id DESC LIMIT 1000",
|
||||||
(str_like_in_text,),
|
(str_like_in_text,),
|
||||||
|row| row.get::<_, MsgId>("id"),
|
|row| row.get::<_, MsgId>("id"),
|
||||||
@@ -1721,6 +1721,8 @@ mod tests {
|
|||||||
msg2.set_text("barbaz".to_string());
|
msg2.set_text("barbaz".to_string());
|
||||||
send_msg(&alice, chat.id, &mut msg2).await?;
|
send_msg(&alice, chat.id, &mut msg2).await?;
|
||||||
|
|
||||||
|
alice.send_text(chat.id, "Δ-Chat").await;
|
||||||
|
|
||||||
// Global search with a part of text finds the message.
|
// Global search with a part of text finds the message.
|
||||||
let res = alice.search_msgs(None, "ob").await?;
|
let res = alice.search_msgs(None, "ob").await?;
|
||||||
assert_eq!(res.len(), 1);
|
assert_eq!(res.len(), 1);
|
||||||
@@ -1733,6 +1735,12 @@ mod tests {
|
|||||||
assert_eq!(res.first(), Some(&msg2.id));
|
assert_eq!(res.first(), Some(&msg2.id));
|
||||||
assert_eq!(res.get(1), Some(&msg1.id));
|
assert_eq!(res.get(1), Some(&msg1.id));
|
||||||
|
|
||||||
|
// Search is case-insensitive.
|
||||||
|
for chat_id in [None, Some(chat.id)] {
|
||||||
|
let res = alice.search_msgs(chat_id, "δ-chat").await?;
|
||||||
|
assert_eq!(res.len(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
// Global search with longer text does not find any message.
|
// Global search with longer text does not find any message.
|
||||||
let res = alice.search_msgs(None, "foobarbaz").await?;
|
let res = alice.search_msgs(None, "foobarbaz").await?;
|
||||||
assert!(res.is_empty());
|
assert!(res.is_empty());
|
||||||
|
|||||||
@@ -447,7 +447,7 @@ pub(crate) async fn delete_expired_messages(context: &Context, now: i64) -> Resu
|
|||||||
for (msg_id, chat_id, viewtype, location_id) in rows {
|
for (msg_id, chat_id, viewtype, location_id) in rows {
|
||||||
transaction.execute(
|
transaction.execute(
|
||||||
"UPDATE msgs
|
"UPDATE msgs
|
||||||
SET chat_id=?, txt='', subject='', txt_raw='',
|
SET chat_id=?, txt='', txt_normalized=NULL, subject='', txt_raw='',
|
||||||
mime_headers='', from_id=0, to_id=0, param=''
|
mime_headers='', from_id=0, to_id=0, param=''
|
||||||
WHERE id=?",
|
WHERE id=?",
|
||||||
(DC_CHAT_ID_TRASH, msg_id),
|
(DC_CHAT_ID_TRASH, msg_id),
|
||||||
|
|||||||
@@ -113,7 +113,7 @@ impl MsgId {
|
|||||||
r#"
|
r#"
|
||||||
UPDATE msgs
|
UPDATE msgs
|
||||||
SET
|
SET
|
||||||
chat_id=?, txt='',
|
chat_id=?, txt='', txt_normalized=NULL,
|
||||||
subject='', txt_raw='',
|
subject='', txt_raw='',
|
||||||
mime_headers='',
|
mime_headers='',
|
||||||
from_id=0, to_id=0,
|
from_id=0, to_id=0,
|
||||||
@@ -2072,6 +2072,15 @@ impl Viewtype {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns text for storing in the `msgs.txt_normalized` column (to make case-insensitive search
|
||||||
|
/// possible for non-ASCII messages).
|
||||||
|
pub(crate) fn normalize_text(text: &str) -> Option<String> {
|
||||||
|
if text.is_ascii() {
|
||||||
|
return None;
|
||||||
|
};
|
||||||
|
Some(text.to_lowercase()).filter(|t| t != text)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use num_traits::FromPrimitive;
|
use num_traits::FromPrimitive;
|
||||||
|
|||||||
@@ -1540,7 +1540,7 @@ INSERT INTO msgs
|
|||||||
rfc724_mid, chat_id,
|
rfc724_mid, chat_id,
|
||||||
from_id, to_id, timestamp, timestamp_sent,
|
from_id, to_id, timestamp, timestamp_sent,
|
||||||
timestamp_rcvd, type, state, msgrmsg,
|
timestamp_rcvd, type, state, msgrmsg,
|
||||||
txt, subject, txt_raw, param, hidden,
|
txt, txt_normalized, subject, txt_raw, param, hidden,
|
||||||
bytes, mime_headers, mime_compressed, mime_in_reply_to,
|
bytes, mime_headers, mime_compressed, mime_in_reply_to,
|
||||||
mime_references, mime_modified, error, ephemeral_timer,
|
mime_references, mime_modified, error, ephemeral_timer,
|
||||||
ephemeral_timestamp, download_state, hop_info
|
ephemeral_timestamp, download_state, hop_info
|
||||||
@@ -1550,7 +1550,7 @@ INSERT INTO msgs
|
|||||||
?, ?, ?, ?,
|
?, ?, ?, ?,
|
||||||
?, ?, ?, ?,
|
?, ?, ?, ?,
|
||||||
?, ?, ?, ?, ?,
|
?, ?, ?, ?, ?,
|
||||||
?, ?, ?, ?, 1,
|
?, ?, ?, ?, ?, 1,
|
||||||
?, ?, ?, ?,
|
?, ?, ?, ?,
|
||||||
?, ?, ?, ?
|
?, ?, ?, ?
|
||||||
)
|
)
|
||||||
@@ -1558,7 +1558,8 @@ ON CONFLICT (id) DO UPDATE
|
|||||||
SET rfc724_mid=excluded.rfc724_mid, chat_id=excluded.chat_id,
|
SET rfc724_mid=excluded.rfc724_mid, chat_id=excluded.chat_id,
|
||||||
from_id=excluded.from_id, to_id=excluded.to_id, timestamp_sent=excluded.timestamp_sent,
|
from_id=excluded.from_id, to_id=excluded.to_id, timestamp_sent=excluded.timestamp_sent,
|
||||||
type=excluded.type, msgrmsg=excluded.msgrmsg,
|
type=excluded.type, msgrmsg=excluded.msgrmsg,
|
||||||
txt=excluded.txt, subject=excluded.subject, txt_raw=excluded.txt_raw, param=excluded.param,
|
txt=excluded.txt, txt_normalized=excluded.txt_normalized, subject=excluded.subject,
|
||||||
|
txt_raw=excluded.txt_raw, param=excluded.param,
|
||||||
hidden=excluded.hidden,bytes=excluded.bytes, mime_headers=excluded.mime_headers,
|
hidden=excluded.hidden,bytes=excluded.bytes, mime_headers=excluded.mime_headers,
|
||||||
mime_compressed=excluded.mime_compressed, mime_in_reply_to=excluded.mime_in_reply_to,
|
mime_compressed=excluded.mime_compressed, mime_in_reply_to=excluded.mime_in_reply_to,
|
||||||
mime_references=excluded.mime_references, mime_modified=excluded.mime_modified, error=excluded.error, ephemeral_timer=excluded.ephemeral_timer,
|
mime_references=excluded.mime_references, mime_modified=excluded.mime_modified, error=excluded.error, ephemeral_timer=excluded.ephemeral_timer,
|
||||||
@@ -1578,6 +1579,7 @@ RETURNING id
|
|||||||
state,
|
state,
|
||||||
is_dc_message,
|
is_dc_message,
|
||||||
if trash { "" } else { msg },
|
if trash { "" } else { msg },
|
||||||
|
if trash { None } else { message::normalize_text(msg) },
|
||||||
if trash { "" } else { &subject },
|
if trash { "" } else { &subject },
|
||||||
// txt_raw might contain invalid utf8
|
// txt_raw might contain invalid utf8
|
||||||
if trash { "" } else { &txt_raw },
|
if trash { "" } else { &txt_raw },
|
||||||
|
|||||||
@@ -937,6 +937,11 @@ CREATE INDEX msgs_status_updates_index2 ON msgs_status_updates (uid);
|
|||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if dbversion < 115 {
|
||||||
|
sql.execute_migration("ALTER TABLE msgs ADD COLUMN txt_normalized TEXT", 115)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
let new_version = sql
|
let new_version = sql
|
||||||
.get_raw_config_int(VERSION_CFG)
|
.get_raw_config_int(VERSION_CFG)
|
||||||
.await?
|
.await?
|
||||||
|
|||||||
Reference in New Issue
Block a user