mirror of
https://github.com/chatmail/core.git
synced 2026-04-28 19:06:35 +03:00
feat: Disable wal_autocheckpoint
From https://www.sqlite.org/wal.html: > The default strategy is to allow successive write transactions to grow the WAL until the WAL becomes about 1000 pages in size, then to run a checkpoint operation for each subsequent COMMIT until the WAL is reset to be smaller than 1000 pages. By default, the checkpoint will be run automatically by the same thread that does the COMMIT that pushes the WAL over its size limit. This has the effect of causing most COMMIT operations to be very fast but an occasional COMMIT (those that trigger a checkpoint) to be much slower. And while autocheckpoint runs in the `PASSIVE` mode and thus doesn't block concurrent readers and writers, in our design it blocks writers because it's done under `write_mutex` locked and thus may cause the app to stuck for noticeable time. Let's disable autocheckpointing then, we can't rely on it anyway. Instead, run a `TRUNCATE` checkpoint from `inbox_loop()` if the WAL is >= 4K pages and a `PASSIVE` checkpoint otherwise.
This commit is contained in:
@@ -474,6 +474,11 @@ async fn inbox_fetch_idle(
|
||||
last_housekeeping_time.saturating_add(constants::HOUSEKEEPING_PERIOD);
|
||||
if next_housekeeping_time <= time() {
|
||||
sql::housekeeping(ctx).await.log_err(ctx).ok();
|
||||
} else {
|
||||
let force_truncate = false;
|
||||
if let Err(err) = ctx.sql.wal_checkpoint(ctx, force_truncate).await {
|
||||
warn!(ctx, "wal_checkpoint() failed: {err:#}.");
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
|
||||
18
src/sql.rs
18
src/sql.rs
@@ -666,8 +666,16 @@ impl Sql {
|
||||
&self.config_cache
|
||||
}
|
||||
|
||||
/// Attempts to truncate the WAL file.
|
||||
pub(crate) async fn wal_checkpoint(&self, context: &Context) -> Result<()> {
|
||||
/// Runs a WAL checkpoint operation.
|
||||
///
|
||||
/// * `force_truncate` - Force TRUNCATE mode to truncate the WAL file to 0 bytes, otherwise only
|
||||
/// run PASSIVE mode if the WAL isn't too large. NB: Truncating blocks all db connections for
|
||||
/// some time.
|
||||
pub(crate) async fn wal_checkpoint(
|
||||
&self,
|
||||
context: &Context,
|
||||
force_truncate: bool,
|
||||
) -> Result<()> {
|
||||
let lock = self.pool.read().await;
|
||||
let Some(pool) = lock.as_ref() else {
|
||||
// No db connections, nothing to checkpoint.
|
||||
@@ -680,7 +688,7 @@ impl Sql {
|
||||
readers_blocked_duration,
|
||||
pages_total,
|
||||
pages_checkpointed,
|
||||
} = pool.wal_checkpoint().await?;
|
||||
} = pool.wal_checkpoint(force_truncate).await?;
|
||||
if pages_checkpointed < pages_total {
|
||||
warn!(
|
||||
context,
|
||||
@@ -711,6 +719,7 @@ fn new_connection(path: &Path, passphrase: &str) -> Result<Connection> {
|
||||
PRAGMA secure_delete=on;
|
||||
PRAGMA soft_heap_limit = 8388608; -- 8 MiB limit, same as set in Android SQLiteDatabase.
|
||||
PRAGMA foreign_keys=on;
|
||||
PRAGMA wal_autocheckpoint=N;
|
||||
",
|
||||
)?;
|
||||
|
||||
@@ -840,7 +849,8 @@ pub async fn housekeeping(context: &Context) -> Result<()> {
|
||||
// bigger than 200M) and also make sure we truncate the WAL periodically. Auto-checkponting does
|
||||
// not normally truncate the WAL (unless the `journal_size_limit` pragma is set), see
|
||||
// https://www.sqlite.org/wal.html.
|
||||
if let Err(err) = Sql::wal_checkpoint(&context.sql, context).await {
|
||||
let force_truncate = true;
|
||||
if let Err(err) = Sql::wal_checkpoint(&context.sql, context, force_truncate).await {
|
||||
warn!(context, "wal_checkpoint() failed: {err:#}.");
|
||||
debug_assert!(false);
|
||||
}
|
||||
|
||||
@@ -199,8 +199,8 @@ impl Pool {
|
||||
Arc::clone(&self.inner).get(query_only).await
|
||||
}
|
||||
|
||||
/// Truncates the WAL file.
|
||||
pub(crate) async fn wal_checkpoint(&self) -> Result<WalCheckpointStats> {
|
||||
wal_checkpoint::wal_checkpoint(self).await
|
||||
/// Runs a WAL checkpoint operation.
|
||||
pub(crate) async fn wal_checkpoint(&self, force_truncate: bool) -> Result<WalCheckpointStats> {
|
||||
wal_checkpoint::wal_checkpoint(self, force_truncate).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,26 +26,45 @@ pub(crate) struct WalCheckpointStats {
|
||||
|
||||
/// Number of checkpointed WAL pages.
|
||||
///
|
||||
/// It should be the same as `pages_total`
|
||||
/// If TRUNCATE is forced, it should be the same as `pages_total`
|
||||
/// unless there are external connections to the database
|
||||
/// that are not in the pool.
|
||||
pub pages_checkpointed: i64,
|
||||
}
|
||||
|
||||
/// Runs a checkpoint operation in TRUNCATE mode, so the WAL file is truncated to 0 bytes.
|
||||
pub(super) async fn wal_checkpoint(pool: &Pool) -> Result<WalCheckpointStats> {
|
||||
/// Runs a WAL checkpoint operation.
|
||||
///
|
||||
/// * `force_truncate` - Force TRUNCATE mode to truncate the WAL file to 0 bytes, otherwise only run
|
||||
/// PASSIVE mode if the WAL isn't too large.
|
||||
pub(super) async fn wal_checkpoint(
|
||||
pool: &Pool,
|
||||
force_truncate: bool,
|
||||
) -> Result<WalCheckpointStats> {
|
||||
let t_start = Time::now();
|
||||
|
||||
// Do as much work as possible without blocking anybody.
|
||||
let query_only = true;
|
||||
let conn = pool.get(query_only).await?;
|
||||
tokio::task::block_in_place(|| {
|
||||
let (pages_total, pages_checkpointed) = tokio::task::block_in_place(|| {
|
||||
// Execute some transaction causing the WAL file to be opened so that the
|
||||
// `wal_checkpoint()` can proceed, otherwise it fails when called the first time,
|
||||
// see https://sqlite.org/forum/forumpost/7512d76a05268fc8.
|
||||
conn.query_row("PRAGMA table_list", [], |_| Ok(()))?;
|
||||
conn.query_row("PRAGMA wal_checkpoint(PASSIVE)", [], |_| Ok(()))
|
||||
conn.query_row("PRAGMA wal_checkpoint(PASSIVE)", [], |row| {
|
||||
let pages_total: i64 = row.get(1)?;
|
||||
let pages_checkpointed: i64 = row.get(2)?;
|
||||
Ok((pages_total, pages_checkpointed))
|
||||
})
|
||||
})?;
|
||||
if !force_truncate && pages_total < 4096 {
|
||||
return Ok(WalCheckpointStats {
|
||||
total_duration: time_elapsed(&t_start),
|
||||
writers_blocked_duration: Duration::ZERO,
|
||||
readers_blocked_duration: Duration::ZERO,
|
||||
pages_total,
|
||||
pages_checkpointed,
|
||||
});
|
||||
}
|
||||
|
||||
// Kick out writers. `write_mutex` should be locked before taking an `InnerPool.semaphore`
|
||||
// permit to avoid ABBA deadlocks, so drop `conn` which holds a semaphore permit.
|
||||
|
||||
Reference in New Issue
Block a user