feat: Case-insensitive search for non-ASCII messages (#5052)

SQLite search with `LIKE` is case-insensitive only for ASCII chars. To make it case-insensitive for
all messages, create a new column `msgs.txt_normalized` defaulting to `NULL` (so we do not bump up
the database size in a migration) and storing lowercased/normalized text there when the row is
created/updated. When doing a search, search over `IFNULL(txt_normalized, txt)`.
This commit is contained in:
iequidoo
2024-03-04 20:44:08 -03:00
committed by iequidoo
parent a5d14b377d
commit f6f4ccc6ea
6 changed files with 51 additions and 18 deletions

View File

@@ -927,12 +927,13 @@ impl ChatId {
.sql
.execute(
"UPDATE msgs
SET timestamp=?,type=?,txt=?, param=?,mime_in_reply_to=?
SET timestamp=?,type=?,txt=?,txt_normalized=?,param=?,mime_in_reply_to=?
WHERE id=?;",
(
time(),
msg.viewtype,
&msg.text,
message::normalize_text(&msg.text),
msg.param.to_string(),
msg.in_reply_to.as_deref().unwrap_or_default(),
msg.id,
@@ -956,10 +957,11 @@ impl ChatId {
type,
state,
txt,
txt_normalized,
param,
hidden,
mime_in_reply_to)
VALUES (?,?,?, ?,?,?,?,?,?);",
VALUES (?,?,?,?,?,?,?,?,?,?);",
(
self,
ContactId::SELF,
@@ -967,6 +969,7 @@ impl ChatId {
msg.viewtype,
MessageState::OutDraft,
&msg.text,
message::normalize_text(&msg.text),
msg.param.to_string(),
1,
msg.in_reply_to.as_deref().unwrap_or_default(),
@@ -2075,7 +2078,7 @@ impl Chat {
.execute(
"UPDATE msgs
SET rfc724_mid=?, chat_id=?, from_id=?, to_id=?, timestamp=?, type=?,
state=?, txt=?, subject=?, param=?,
state=?, txt=?, txt_normalized=?, subject=?, param=?,
hidden=?, mime_in_reply_to=?, mime_references=?, mime_modified=?,
mime_headers=?, mime_compressed=1, location_id=?, ephemeral_timer=?,
ephemeral_timestamp=?
@@ -2089,6 +2092,7 @@ impl Chat {
msg.viewtype,
msg.state,
msg.text,
message::normalize_text(&msg.text),
&msg.subject,
msg.param.to_string(),
msg.hidden,
@@ -2117,6 +2121,7 @@ impl Chat {
type,
state,
txt,
txt_normalized,
subject,
param,
hidden,
@@ -2128,7 +2133,7 @@ impl Chat {
location_id,
ephemeral_timer,
ephemeral_timestamp)
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1,?,?,?);",
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,1,?,?,?);",
params_slice![
msg.rfc724_mid,
msg.chat_id,
@@ -2138,6 +2143,7 @@ impl Chat {
msg.viewtype,
msg.state,
msg.text,
message::normalize_text(&msg.text),
&msg.subject,
msg.param.to_string(),
msg.hidden,
@@ -4370,9 +4376,10 @@ pub async fn add_device_msg_with_importance(
timestamp_rcvd,
type,state,
txt,
txt_normalized,
param,
rfc724_mid)
VALUES (?,?,?,?,?,?,?,?,?,?,?);",
VALUES (?,?,?,?,?,?,?,?,?,?,?,?);",
(
chat_id,
ContactId::DEVICE,
@@ -4383,6 +4390,7 @@ pub async fn add_device_msg_with_importance(
msg.viewtype,
state,
&msg.text,
message::normalize_text(&msg.text),
msg.param.to_string(),
rfc724_mid,
),
@@ -4486,8 +4494,8 @@ pub(crate) async fn add_info_msg_with_cmd(
let row_id =
context.sql.insert(
"INSERT INTO msgs (chat_id,from_id,to_id,timestamp,timestamp_sent,timestamp_rcvd,type,state,txt,rfc724_mid,ephemeral_timer, param,mime_in_reply_to)
VALUES (?,?,?, ?,?,?,?,?, ?,?,?, ?,?);",
"INSERT INTO msgs (chat_id,from_id,to_id,timestamp,timestamp_sent,timestamp_rcvd,type,state,txt,txt_normalized,rfc724_mid,ephemeral_timer,param,mime_in_reply_to)
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);",
(
chat_id,
from_id.unwrap_or(ContactId::INFO),
@@ -4498,6 +4506,7 @@ pub(crate) async fn add_info_msg_with_cmd(
Viewtype::Text,
MessageState::InNoticed,
text,
message::normalize_text(text),
rfc724_mid,
ephemeral_timer,
param.to_string(),
@@ -4542,8 +4551,8 @@ pub(crate) async fn update_msg_text_and_timestamp(
context
.sql
.execute(
"UPDATE msgs SET txt=?, timestamp=? WHERE id=?;",
(text, timestamp, msg_id),
"UPDATE msgs SET txt=?, txt_normalized=?, timestamp=? WHERE id=?;",
(text, message::normalize_text(text), timestamp, msg_id),
)
.await?;
context.emit_msgs_changed(chat_id, msg_id);