feat: Case-insensitive search for non-ASCII messages (#5052)

SQLite search with `LIKE` is case-insensitive only for ASCII chars. To make it case-insensitive for
all messages, create a new column `msgs.txt_normalized` defaulting to `NULL` (so we do not bump up
the database size in a migration) and storing lowercased/normalized text there when the row is
created/updated. When doing a search, search over `IFNULL(txt_normalized, txt)`.
This commit is contained in:
iequidoo
2024-03-04 20:44:08 -03:00
committed by iequidoo
parent a5d14b377d
commit f6f4ccc6ea
6 changed files with 51 additions and 18 deletions

View File

@@ -1540,7 +1540,7 @@ INSERT INTO msgs
rfc724_mid, chat_id,
from_id, to_id, timestamp, timestamp_sent,
timestamp_rcvd, type, state, msgrmsg,
txt, subject, txt_raw, param, hidden,
txt, txt_normalized, subject, txt_raw, param, hidden,
bytes, mime_headers, mime_compressed, mime_in_reply_to,
mime_references, mime_modified, error, ephemeral_timer,
ephemeral_timestamp, download_state, hop_info
@@ -1550,7 +1550,7 @@ INSERT INTO msgs
?, ?, ?, ?,
?, ?, ?, ?,
?, ?, ?, ?, ?,
?, ?, ?, ?, 1,
?, ?, ?, ?, ?, 1,
?, ?, ?, ?,
?, ?, ?, ?
)
@@ -1558,7 +1558,8 @@ ON CONFLICT (id) DO UPDATE
SET rfc724_mid=excluded.rfc724_mid, chat_id=excluded.chat_id,
from_id=excluded.from_id, to_id=excluded.to_id, timestamp_sent=excluded.timestamp_sent,
type=excluded.type, msgrmsg=excluded.msgrmsg,
txt=excluded.txt, subject=excluded.subject, txt_raw=excluded.txt_raw, param=excluded.param,
txt=excluded.txt, txt_normalized=excluded.txt_normalized, subject=excluded.subject,
txt_raw=excluded.txt_raw, param=excluded.param,
hidden=excluded.hidden,bytes=excluded.bytes, mime_headers=excluded.mime_headers,
mime_compressed=excluded.mime_compressed, mime_in_reply_to=excluded.mime_in_reply_to,
mime_references=excluded.mime_references, mime_modified=excluded.mime_modified, error=excluded.error, ephemeral_timer=excluded.ephemeral_timer,
@@ -1578,6 +1579,7 @@ RETURNING id
state,
is_dc_message,
if trash { "" } else { msg },
if trash { None } else { message::normalize_text(msg) },
if trash { "" } else { &subject },
// txt_raw might contain invalid utf8
if trash { "" } else { &txt_raw },