Skip to content

Commit 6b13cb8

Browse files
committed
accel/amdxdna: Fix runtime suspend deadlock when there is pending job
The runtime suspend callback drains the running job workqueue before suspending the device. If a job is still executing and calls pm_runtime_resume_and_get(), it can deadlock with the runtime suspend path. Fix this by moving pm_runtime_resume_and_get() from the job execution routine to the job submission routine, ensuring the device is resumed before the job is queued and avoiding the deadlock during runtime suspend. Fixes: 063db45 ("accel/amdxdna: Enhance runtime power management") Reviewed-by: Mario Limonciello (AMD) <[email protected]> Signed-off-by: Lizhi Hou <[email protected]> Link: https://patch.msgid.link/[email protected]
1 parent 59bdbab commit 6b13cb8

2 files changed

Lines changed: 12 additions & 12 deletions

File tree

drivers/accel/amdxdna/aie2_ctx.c

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
165165

166166
trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
167167

168-
amdxdna_pm_suspend_put(job->hwctx->client->xdna);
169168
job->hwctx->priv->completed++;
170169
dma_fence_signal(fence);
171170

@@ -290,19 +289,11 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
290289
struct dma_fence *fence;
291290
int ret;
292291

293-
ret = amdxdna_pm_resume_get(hwctx->client->xdna);
294-
if (ret)
292+
if (!hwctx->priv->mbox_chann)
295293
return NULL;
296294

297-
if (!hwctx->priv->mbox_chann) {
298-
amdxdna_pm_suspend_put(hwctx->client->xdna);
299-
return NULL;
300-
}
301-
302-
if (!mmget_not_zero(job->mm)) {
303-
amdxdna_pm_suspend_put(hwctx->client->xdna);
295+
if (!mmget_not_zero(job->mm))
304296
return ERR_PTR(-ESRCH);
305-
}
306297

307298
kref_get(&job->refcnt);
308299
fence = dma_fence_get(job->fence);
@@ -333,7 +324,6 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
333324

334325
out:
335326
if (ret) {
336-
amdxdna_pm_suspend_put(hwctx->client->xdna);
337327
dma_fence_put(job->fence);
338328
aie2_job_put(job);
339329
mmput(job->mm);

drivers/accel/amdxdna/amdxdna_ctx.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "amdxdna_ctx.h"
1818
#include "amdxdna_gem.h"
1919
#include "amdxdna_pci_drv.h"
20+
#include "amdxdna_pm.h"
2021

2122
#define MAX_HWCTX_ID 255
2223
#define MAX_ARG_COUNT 4095
@@ -445,6 +446,7 @@ amdxdna_arg_bos_lookup(struct amdxdna_client *client,
445446
void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job)
446447
{
447448
trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release");
449+
amdxdna_pm_suspend_put(job->hwctx->client->xdna);
448450
amdxdna_arg_bos_put(job);
449451
amdxdna_gem_put_obj(job->cmd_bo);
450452
dma_fence_put(job->fence);
@@ -482,6 +484,12 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
482484
goto cmd_put;
483485
}
484486

487+
ret = amdxdna_pm_resume_get(xdna);
488+
if (ret) {
489+
XDNA_ERR(xdna, "Resume failed, ret %d", ret);
490+
goto put_bos;
491+
}
492+
485493
idx = srcu_read_lock(&client->hwctx_srcu);
486494
hwctx = xa_load(&client->hwctx_xa, hwctx_hdl);
487495
if (!hwctx) {
@@ -522,6 +530,8 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
522530
dma_fence_put(job->fence);
523531
unlock_srcu:
524532
srcu_read_unlock(&client->hwctx_srcu, idx);
533+
amdxdna_pm_suspend_put(xdna);
534+
put_bos:
525535
amdxdna_arg_bos_put(job);
526536
cmd_put:
527537
amdxdna_gem_put_obj(job->cmd_bo);

0 commit comments

Comments
 (0)