fix: get_connectivity(): Get rid of locking SchedulerState::inner (#7124)

`get_connectivity()` is expected to return immediately, not when the scheduler finishes updating its
state in `start_io()/stop_io()/pause_io()`, otherwise it causes app non-responsiveness.

Instead of read-locking `SchedulerState::inner`, store the `ConnectivityStore` collection in
`Context` and fetch it from there in `get_connectivity()`. Update it every time we release a write
lock on `SchedulerState::inner`.
This commit is contained in:
iequidoo
2025-08-20 11:59:15 -03:00
committed by iequidoo
parent 3656337d41
commit 6837874d43
6 changed files with 44 additions and 33 deletions

View File

@@ -8,12 +8,12 @@ use async_channel::{self as channel, Receiver, Sender};
use futures::future::try_join_all;
use futures_lite::FutureExt;
use rand::Rng;
use tokio::sync::{RwLock, RwLockWriteGuard, oneshot};
use tokio::sync::{RwLock, oneshot};
use tokio::task;
use tokio_util::sync::CancellationToken;
use tokio_util::task::TaskTracker;
use self::connectivity::ConnectivityStore;
pub(crate) use self::connectivity::ConnectivityStore;
use crate::config::{self, Config};
use crate::constants;
use crate::contact::{ContactId, RecentlySeenLoop};
@@ -53,32 +53,32 @@ impl SchedulerState {
}
/// Starts the scheduler if it is not yet started.
pub(crate) async fn start(&self, context: Context) {
pub(crate) async fn start(&self, context: &Context) {
let mut inner = self.inner.write().await;
match *inner {
InnerSchedulerState::Started(_) => (),
InnerSchedulerState::Stopped => Self::do_start(inner, context).await,
InnerSchedulerState::Stopped => Self::do_start(&mut inner, context).await,
InnerSchedulerState::Paused {
ref mut started, ..
} => *started = true,
}
context.update_connectivities(&inner);
}
/// Starts the scheduler if it is not yet started.
async fn do_start(mut inner: RwLockWriteGuard<'_, InnerSchedulerState>, context: Context) {
async fn do_start(inner: &mut InnerSchedulerState, context: &Context) {
info!(context, "starting IO");
// Notify message processing loop
// to allow processing old messages after restart.
context.new_msgs_notify.notify_one();
let ctx = context.clone();
match Scheduler::start(&context).await {
match Scheduler::start(context).await {
Ok(scheduler) => {
*inner = InnerSchedulerState::Started(scheduler);
context.emit_event(EventType::ConnectivityChanged);
}
Err(err) => error!(&ctx, "Failed to start IO: {:#}", err),
Err(err) => error!(context, "Failed to start IO: {:#}", err),
}
}
@@ -87,18 +87,19 @@ impl SchedulerState {
let mut inner = self.inner.write().await;
match *inner {
InnerSchedulerState::Started(_) => {
Self::do_stop(inner, context, InnerSchedulerState::Stopped).await
Self::do_stop(&mut inner, context, InnerSchedulerState::Stopped).await
}
InnerSchedulerState::Stopped => (),
InnerSchedulerState::Paused {
ref mut started, ..
} => *started = false,
}
context.update_connectivities(&inner);
}
/// Stops the scheduler if it is currently running.
async fn do_stop(
mut inner: RwLockWriteGuard<'_, InnerSchedulerState>,
inner: &mut InnerSchedulerState,
context: &Context,
new_state: InnerSchedulerState,
) {
@@ -122,7 +123,7 @@ impl SchedulerState {
debug_logging.loop_handle.abort();
debug_logging.loop_handle.await.ok();
}
let prev_state = std::mem::replace(&mut *inner, new_state);
let prev_state = std::mem::replace(inner, new_state);
context.emit_event(EventType::ConnectivityChanged);
match prev_state {
InnerSchedulerState::Started(scheduler) => scheduler.stop(context).await,
@@ -138,7 +139,7 @@ impl SchedulerState {
/// If in the meantime [`SchedulerState::start`] or [`SchedulerState::stop`] is called
/// resume will do the right thing and restore the scheduler to the state requested by
/// the last call.
pub(crate) async fn pause(&'_ self, context: Context) -> Result<IoPausedGuard> {
pub(crate) async fn pause(&'_ self, context: &Context) -> Result<IoPausedGuard> {
{
let mut inner = self.inner.write().await;
match *inner {
@@ -147,7 +148,7 @@ impl SchedulerState {
started: true,
pause_guards_count: NonZeroUsize::new(1).unwrap(),
};
Self::do_stop(inner, &context, new_state).await;
Self::do_stop(&mut inner, context, new_state).await;
}
InnerSchedulerState::Stopped => {
*inner = InnerSchedulerState::Paused {
@@ -164,9 +165,11 @@ impl SchedulerState {
.ok_or_else(|| Error::msg("Too many pause guards active"))?
}
}
context.update_connectivities(&inner);
}
let (tx, rx) = oneshot::channel();
let context = context.clone();
tokio::spawn(async move {
rx.await.ok();
let mut inner = context.scheduler.inner.write().await;
@@ -183,7 +186,7 @@ impl SchedulerState {
} => {
if *pause_guards_count == NonZeroUsize::new(1).unwrap() {
match *started {
true => SchedulerState::do_start(inner, context.clone()).await,
true => SchedulerState::do_start(&mut inner, &context).await,
false => *inner = InnerSchedulerState::Stopped,
}
} else {
@@ -193,6 +196,7 @@ impl SchedulerState {
}
}
}
context.update_connectivities(&inner);
});
Ok(IoPausedGuard { sender: Some(tx) })
}
@@ -202,7 +206,7 @@ impl SchedulerState {
info!(context, "restarting IO");
if self.is_running().await {
self.stop(context).await;
self.start(context.clone()).await;
self.start(context).await;
}
}
@@ -288,7 +292,7 @@ impl SchedulerState {
}
#[derive(Debug, Default)]
enum InnerSchedulerState {
pub(crate) enum InnerSchedulerState {
Started(Scheduler),
#[default]
Stopped,