Skip to content

Commit

Permalink
Avoid unneeded start_session() when spawning (solana-labs#1815)
Browse files Browse the repository at this point in the history
* Avoid unneeded start_session() when spawning

* Add comments
  • Loading branch information
ryoqun authored Jun 24, 2024
1 parent 3ea0855 commit 40a9851
Showing 1 changed file with 61 additions and 52 deletions.
113 changes: 61 additions & 52 deletions unified-scheduler-pool/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -766,23 +766,6 @@ struct ThreadManager<S: SpawnableScheduler<TH>, TH: TaskHandler> {
handler_threads: Vec<JoinHandle<()>>,
}

impl<TH: TaskHandler> PooledScheduler<TH> {
fn do_spawn(
pool: Arc<SchedulerPool<Self, TH>>,
initial_context: SchedulingContext,
result_with_timings: ResultWithTimings,
) -> Self {
Self::from_inner(
PooledSchedulerInner::<Self, TH> {
thread_manager: ThreadManager::new(pool),
usage_queue_loader: UsageQueueLoader::default(),
},
initial_context,
result_with_timings,
)
}
}

struct HandlerPanicked;
type HandlerResult = std::result::Result<Box<ExecutedTask>, HandlerPanicked>;

Expand Down Expand Up @@ -852,7 +835,15 @@ impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {
);
}

fn start_threads(&mut self, context: &SchedulingContext) {
// This method must take same set of session-related arguments as start_session() to avoid
// unneeded channel operations to minimize overhead. Starting threads incurs a very high cost
// already... Also, pre-creating threads isn't desirable as well to avoid `Option`-ed types
// for type safety.
fn start_threads(
&mut self,
context: SchedulingContext,
mut result_with_timings: ResultWithTimings,
) {
// Firstly, setup bi-directional messaging between the scheduler and handlers to pass
// around tasks, by creating 2 channels (one for to-be-handled tasks from the scheduler to
// the handlers and the other for finished tasks from the handlers to the scheduler).
Expand Down Expand Up @@ -930,7 +921,7 @@ impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {
// prioritization further. Consequently, this also contributes to alleviate the known
// heuristic's caveat for the first task of linearized runs, which is described above.
let (mut runnable_task_sender, runnable_task_receiver) =
chained_channel::unbounded::<Task, SchedulingContext>(context.clone());
chained_channel::unbounded::<Task, SchedulingContext>(context);
// Create two handler-to-scheduler channels to prioritize the finishing of blocked tasks,
// because it is more likely that a blocked task will have more blocked tasks behind it,
// which should be scheduled while minimizing the delay to clear buffered linearized runs
Expand Down Expand Up @@ -1013,29 +1004,14 @@ impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {
let mut state_machine = unsafe {
SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling()
};
let mut result_with_timings = initialized_result_with_timings();

// The following loop maintains and updates ResultWithTimings as its
// externally-provieded mutable state for each session in this way:
//
// 1. Initial result_with_timing is propagated implicitly by the moved variable.
// 2. Subsequent result_with_timings are propagated explicitly from
// the new_task_receiver.recv() invocation located at the end of loop.
'nonaborted_main_loop: loop {
match new_task_receiver.recv() {
Ok(NewTaskPayload::OpenSubchannel((
new_context,
new_result_with_timings,
))) => {
// signal about new SchedulingContext to handler threads
runnable_task_sender
.send_chained_channel(new_context, handler_count)
.unwrap();
result_with_timings = new_result_with_timings;
}
Ok(_) => {
unreachable!();
}
Err(_) => {
// This unusual condition must be triggered by ThreadManager::drop();
break 'nonaborted_main_loop;
}
}

let mut is_finished = false;
while !is_finished {
// ALL recv selectors are eager-evaluated ALWAYS by current crossbeam impl,
Expand Down Expand Up @@ -1121,6 +1097,28 @@ impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {
.expect("always outlived receiver");
session_ending = false;
}

match new_task_receiver.recv() {
Ok(NewTaskPayload::OpenSubchannel((
new_context,
new_result_with_timings,
))) => {
// We just received subsequent (= not initial) session and about to
// enter into the preceding `while(!is_finished) {...}` loop again.
// Before that, propagate new SchedulingContext to handler threads
runnable_task_sender
.send_chained_channel(new_context, handler_count)
.unwrap();
result_with_timings = new_result_with_timings;
}
Ok(_) => {
unreachable!();
}
Err(_) => {
// This unusual condition must be triggered by ThreadManager::drop();
break 'nonaborted_main_loop;
}
}
}

// There are several code-path reaching here out of the preceding unconditional
Expand Down Expand Up @@ -1152,6 +1150,14 @@ impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {
let finished_blocked_task_sender = finished_blocked_task_sender.clone();
let finished_idle_task_sender = finished_idle_task_sender.clone();

// The following loop maintains and updates SchedulingContext as its
// externally-provided state for each session in this way:
//
// 1. Initial context is propagated implicitly by the moved runnable_task_receiver,
// which is clone()-d just above for this particular thread.
// 2. Subsequent contexts are propagated explicitly inside `.after_select()` as part of
// `select_biased!`, which are sent from `.send_chained_channel()` in the scheduler
// thread for all-but-initial sessions.
move || loop {
let (task, sender) = select_biased! {
recv(runnable_task_receiver.for_select()) -> message => {
Expand Down Expand Up @@ -1327,13 +1333,14 @@ impl<S: SpawnableScheduler<TH>, TH: TaskHandler> ThreadManager<S, TH> {

fn start_session(
&mut self,
context: &SchedulingContext,
context: SchedulingContext,
result_with_timings: ResultWithTimings,
) {
assert!(!self.are_threads_joined());
assert_matches!(self.session_result_with_timings, None);
self.new_task_sender
.send(NewTaskPayload::OpenSubchannel((
context.clone(),
context,
result_with_timings,
)))
.expect("no new session after aborted");
Expand All @@ -1353,7 +1360,7 @@ pub trait SpawnableScheduler<TH: TaskHandler>: InstalledScheduler {

fn spawn(
pool: Arc<SchedulerPool<Self, TH>>,
initial_context: SchedulingContext,
context: SchedulingContext,
result_with_timings: ResultWithTimings,
) -> Self
where
Expand All @@ -1379,21 +1386,23 @@ impl<TH: TaskHandler> SpawnableScheduler<TH> for PooledScheduler<TH> {
) -> Self {
inner
.thread_manager
.start_session(&context, result_with_timings);
.start_session(context.clone(), result_with_timings);
Self { inner, context }
}

fn spawn(
pool: Arc<SchedulerPool<Self, TH>>,
initial_context: SchedulingContext,
context: SchedulingContext,
result_with_timings: ResultWithTimings,
) -> Self {
let mut scheduler = Self::do_spawn(pool, initial_context, result_with_timings);
scheduler
.inner
let mut inner = Self::Inner {
thread_manager: ThreadManager::new(pool),
usage_queue_loader: UsageQueueLoader::default(),
};
inner
.thread_manager
.start_threads(&scheduler.context);
scheduler
.start_threads(context.clone(), result_with_timings);
Self { inner, context }
}
}

Expand Down Expand Up @@ -2775,13 +2784,13 @@ mod tests {

fn spawn(
pool: Arc<SchedulerPool<Self, DefaultTaskHandler>>,
initial_context: SchedulingContext,
context: SchedulingContext,
_result_with_timings: ResultWithTimings,
) -> Self {
AsyncScheduler::<TRIGGER_RACE_CONDITION>(
Mutex::new(initialized_result_with_timings()),
Mutex::new(vec![]),
initial_context,
context,
pool,
)
}
Expand Down

0 comments on commit 40a9851

Please sign in to comment.