Skip to content

Commit cac916f

Browse files
igawkawasaki
authored andcommitted
blk-mq: prevent offlining hk CPUs with associated online isolated CPUs
When isolcpus=io_queue is enabled, and the last housekeeping CPU for a given hctx goes offline, there would be no CPU left to handle I/O. To prevent I/O stalls, prevent offlining housekeeping CPUs that are still serving isolated CPUs. When isolcpus=io_queue is enabled and the last housekeeping CPU for a given hctx goes offline, no CPU would be left to handle I/O. To prevent I/O stalls, disallow offlining housekeeping CPUs that are still serving isolated CPUs. Signed-off-by: Daniel Wagner <[email protected]> Reviewed-by: Hannes Reinecke <[email protected]> Signed-off-by: Aaron Tomlin <[email protected]>
1 parent 1ec3378 commit cac916f

1 file changed

Lines changed: 42 additions & 0 deletions

File tree

block/blk-mq.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3720,6 +3720,43 @@ static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
37203720
return data.has_rq;
37213721
}
37223722

3723+
static bool blk_mq_hctx_can_offline_hk_cpu(struct blk_mq_hw_ctx *hctx,
3724+
unsigned int this_cpu)
3725+
{
3726+
const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
3727+
3728+
for (int i = 0; i < hctx->nr_ctx; i++) {
3729+
struct blk_mq_ctx *ctx = hctx->ctxs[i];
3730+
3731+
if (ctx->cpu == this_cpu)
3732+
continue;
3733+
3734+
/*
3735+
* Check if this context has at least one online
3736+
* housekeeping CPU; in this case the hardware context is
3737+
* usable.
3738+
*/
3739+
if (cpumask_test_cpu(ctx->cpu, hk_mask) &&
3740+
cpu_online(ctx->cpu))
3741+
break;
3742+
3743+
/*
3744+
* The context doesn't have any online housekeeping CPUs,
3745+
* but there might be an online isolated CPU mapped to
3746+
* it.
3747+
*/
3748+
if (cpu_is_offline(ctx->cpu))
3749+
continue;
3750+
3751+
pr_warn("%s: trying to offline hctx%d but there is still an online isolcpu CPU %d mapped to it\n",
3752+
hctx->queue->disk->disk_name,
3753+
hctx->queue_num, ctx->cpu);
3754+
return false;
3755+
}
3756+
3757+
return true;
3758+
}
3759+
37233760
static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx,
37243761
unsigned int this_cpu)
37253762
{
@@ -3752,6 +3789,11 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node)
37523789
struct blk_mq_hw_ctx, cpuhp_online);
37533790
int ret = 0;
37543791

3792+
if (housekeeping_enabled(HK_TYPE_IO_QUEUE)) {
3793+
if (!blk_mq_hctx_can_offline_hk_cpu(hctx, cpu))
3794+
return -EINVAL;
3795+
}
3796+
37553797
if (!hctx->nr_ctx || blk_mq_hctx_has_online_cpu(hctx, cpu))
37563798
return 0;
37573799

0 commit comments

Comments
 (0)