Skip to content

Commit ebc2164

Browse files
chiarameiohasrleon
authored andcommitted
RDMA/mlx5: Fix UMR hang in LAG error state unload
During firmware reset in LAG mode, a race condition causes the driver to hang indefinitely while waiting for UMR completion during device unload. See [1]. In LAG mode the bond device is only registered on the master, so it never sees sys_error events from the slave. During firmware reset this causes UMR waits to hang forever on unload as the slave is dead but the master hasn't entered error state yet, so UMR posts succeed but completions never arrive. Fix this by adding a sys_error notifier that gets registered before MLX5_IB_STAGE_IB_REG and stays alive until after ib_unregister_device(). This ensures error events reach the bond device throughout teardown. [1] Call Trace: __schedule+0x2bd/0x760 schedule+0x37/0xa0 schedule_preempt_disabled+0xa/0x10 __mutex_lock.isra.6+0x2b5/0x4a0 __mlx5_ib_dereg_mr+0x606/0x870 [mlx5_ib] ? __xa_erase+0x4a/0xa0 ? _cond_resched+0x15/0x30 ? wait_for_completion+0x31/0x100 ib_dereg_mr_user+0x48/0xc0 [ib_core] ? rdmacg_uncharge_hierarchy+0xa0/0x100 destroy_hw_idr_uobject+0x20/0x50 [ib_uverbs] uverbs_destroy_uobject+0x37/0x150 [ib_uverbs] __uverbs_cleanup_ufile+0xda/0x140 [ib_uverbs] uverbs_destroy_ufile_hw+0x3a/0xf0 [ib_uverbs] ib_uverbs_remove_one+0xc3/0x140 [ib_uverbs] remove_client_context+0x8b/0xd0 [ib_core] disable_device+0x8c/0x130 [ib_core] __ib_unregister_device+0x10d/0x180 [ib_core] ib_unregister_device+0x21/0x30 [ib_core] __mlx5_ib_remove+0x1e4/0x1f0 [mlx5_ib] auxiliary_bus_remove+0x1e/0x30 device_release_driver_internal+0x103/0x1f0 bus_remove_device+0xf7/0x170 device_del+0x181/0x410 mlx5_rescan_drivers_locked.part.10+0xa9/0x1d0 [mlx5_core] mlx5_disable_lag+0x253/0x260 [mlx5_core] mlx5_lag_disable_change+0x89/0xc0 [mlx5_core] mlx5_eswitch_disable+0x67/0xa0 [mlx5_core] mlx5_unload+0x15/0xd0 [mlx5_core] mlx5_unload_one+0x71/0xc0 [mlx5_core] mlx5_sync_reset_reload_work+0x83/0x100 [mlx5_core] process_one_work+0x1a7/0x360 worker_thread+0x30/0x390 ? create_worker+0x1a0/0x1a0 kthread+0x116/0x130 ? kthread_flush_work_fn+0x10/0x10 ret_from_fork+0x22/0x40 Fixes: ede132a ("RDMA/mlx5: Move events notifier registration to be after device registration") Signed-off-by: Chiara Meiohas <[email protected]> Signed-off-by: Maher Sanalla <[email protected]> Reviewed-by: Mark Bloch <[email protected]> Signed-off-by: Edward Srouji <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Leon Romanovsky <[email protected]>
1 parent f972bde commit ebc2164

2 files changed

Lines changed: 68 additions & 9 deletions

File tree

drivers/infiniband/hw/mlx5/main.c

Lines changed: 66 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3009,7 +3009,6 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
30093009
container_of(_work, struct mlx5_ib_event_work, work);
30103010
struct mlx5_ib_dev *ibdev;
30113011
struct ib_event ibev;
3012-
bool fatal = false;
30133012

30143013
if (work->is_slave) {
30153014
ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi);
@@ -3020,12 +3019,6 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
30203019
}
30213020

30223021
switch (work->event) {
3023-
case MLX5_DEV_EVENT_SYS_ERROR:
3024-
ibev.event = IB_EVENT_DEVICE_FATAL;
3025-
mlx5_ib_handle_internal_error(ibdev);
3026-
ibev.element.port_num = (u8)(unsigned long)work->param;
3027-
fatal = true;
3028-
break;
30293022
case MLX5_EVENT_TYPE_PORT_CHANGE:
30303023
if (handle_port_change(ibdev, work->param, &ibev))
30313024
goto out;
@@ -3047,8 +3040,6 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
30473040
if (ibdev->ib_active)
30483041
ib_dispatch_event(&ibev);
30493042

3050-
if (fatal)
3051-
ibdev->ib_active = false;
30523043
out:
30533044
kfree(work);
30543045
}
@@ -3092,6 +3083,66 @@ static int mlx5_ib_event_slave_port(struct notifier_block *nb,
30923083
return NOTIFY_OK;
30933084
}
30943085

3086+
static void mlx5_ib_handle_sys_error_event(struct work_struct *_work)
3087+
{
3088+
struct mlx5_ib_event_work *work =
3089+
container_of(_work, struct mlx5_ib_event_work, work);
3090+
struct mlx5_ib_dev *ibdev = work->dev;
3091+
struct ib_event ibev;
3092+
3093+
ibev.event = IB_EVENT_DEVICE_FATAL;
3094+
mlx5_ib_handle_internal_error(ibdev);
3095+
ibev.element.port_num = (u8)(unsigned long)work->param;
3096+
ibev.device = &ibdev->ib_dev;
3097+
3098+
if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) {
3099+
mlx5_ib_warn(ibdev, "warning: event on port %d\n", ibev.element.port_num);
3100+
goto out;
3101+
}
3102+
3103+
if (ibdev->ib_active)
3104+
ib_dispatch_event(&ibev);
3105+
3106+
ibdev->ib_active = false;
3107+
out:
3108+
kfree(work);
3109+
}
3110+
3111+
static int mlx5_ib_sys_error_event(struct notifier_block *nb,
3112+
unsigned long event, void *param)
3113+
{
3114+
struct mlx5_ib_event_work *work;
3115+
3116+
if (event != MLX5_DEV_EVENT_SYS_ERROR)
3117+
return NOTIFY_DONE;
3118+
3119+
work = kmalloc(sizeof(*work), GFP_ATOMIC);
3120+
if (!work)
3121+
return NOTIFY_DONE;
3122+
3123+
INIT_WORK(&work->work, mlx5_ib_handle_sys_error_event);
3124+
work->dev = container_of(nb, struct mlx5_ib_dev, sys_error_events);
3125+
work->is_slave = false;
3126+
work->param = param;
3127+
work->event = event;
3128+
3129+
queue_work(mlx5_ib_event_wq, &work->work);
3130+
3131+
return NOTIFY_OK;
3132+
}
3133+
3134+
static int mlx5_ib_stage_sys_error_notifier_init(struct mlx5_ib_dev *dev)
3135+
{
3136+
dev->sys_error_events.notifier_call = mlx5_ib_sys_error_event;
3137+
mlx5_notifier_register(dev->mdev, &dev->sys_error_events);
3138+
return 0;
3139+
}
3140+
3141+
static void mlx5_ib_stage_sys_error_notifier_cleanup(struct mlx5_ib_dev *dev)
3142+
{
3143+
mlx5_notifier_unregister(dev->mdev, &dev->sys_error_events);
3144+
}
3145+
30953146
static int mlx5_ib_get_plane_num(struct mlx5_core_dev *mdev, u8 *num_plane)
30963147
{
30973148
struct mlx5_hca_vport_context vport_ctx;
@@ -4943,6 +4994,9 @@ static const struct mlx5_ib_profile pf_profile = {
49434994
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
49444995
mlx5_ib_devx_init,
49454996
mlx5_ib_devx_cleanup),
4997+
STAGE_CREATE(MLX5_IB_STAGE_SYS_ERROR_NOTIFIER,
4998+
mlx5_ib_stage_sys_error_notifier_init,
4999+
mlx5_ib_stage_sys_error_notifier_cleanup),
49465000
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
49475001
mlx5_ib_stage_ib_reg_init,
49485002
mlx5_ib_stage_ib_reg_cleanup),
@@ -5000,6 +5054,9 @@ const struct mlx5_ib_profile raw_eth_profile = {
50005054
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
50015055
mlx5_ib_devx_init,
50025056
mlx5_ib_devx_cleanup),
5057+
STAGE_CREATE(MLX5_IB_STAGE_SYS_ERROR_NOTIFIER,
5058+
mlx5_ib_stage_sys_error_notifier_init,
5059+
mlx5_ib_stage_sys_error_notifier_cleanup),
50035060
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
50045061
mlx5_ib_stage_ib_reg_init,
50055062
mlx5_ib_stage_ib_reg_cleanup),

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,6 +1007,7 @@ enum mlx5_ib_stages {
10071007
MLX5_IB_STAGE_BFREG,
10081008
MLX5_IB_STAGE_PRE_IB_REG_UMR,
10091009
MLX5_IB_STAGE_WHITELIST_UID,
1010+
MLX5_IB_STAGE_SYS_ERROR_NOTIFIER,
10101011
MLX5_IB_STAGE_IB_REG,
10111012
MLX5_IB_STAGE_DEVICE_NOTIFIER,
10121013
MLX5_IB_STAGE_POST_IB_REG_UMR,
@@ -1165,6 +1166,7 @@ struct mlx5_ib_dev {
11651166
/* protect accessing data_direct_dev */
11661167
struct mutex data_direct_lock;
11671168
struct notifier_block mdev_events;
1169+
struct notifier_block sys_error_events;
11681170
struct notifier_block lag_events;
11691171
int num_ports;
11701172
/* serialize update of capability mask

0 commit comments

Comments
 (0)