Skip to content

Commit a6e7732

Browse files
committed
Merge tag 'drm-xe-fixes-2026-03-19' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
Driver Changes: - A number of teardown fixes (Daniele, Matt Brost, Zhanjun, Ashutosh) - Skip over non-leaf PTE for PRL generation (Brian) - Fix an unitialized variable (Umesh) - Fix a missing runtime PM reference (Sanjay) Signed-off-by: Dave Airlie <[email protected]> From: Thomas Hellstrom <[email protected]> Link: https://patch.msgid.link/abxj4_dBHYBiSvDG@fedora
2 parents a15130d + 65d046b commit a6e7732

10 files changed

Lines changed: 136 additions & 50 deletions

File tree

drivers/gpu/drm/xe/xe_ggtt.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,8 @@ static void dev_fini_ggtt(void *arg)
313313
{
314314
struct xe_ggtt *ggtt = arg;
315315

316+
scoped_guard(mutex, &ggtt->lock)
317+
ggtt->flags &= ~XE_GGTT_FLAGS_ONLINE;
316318
drain_workqueue(ggtt->wq);
317319
}
318320

@@ -377,6 +379,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
377379
if (err)
378380
return err;
379381

382+
ggtt->flags |= XE_GGTT_FLAGS_ONLINE;
380383
err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt);
381384
if (err)
382385
return err;
@@ -410,13 +413,10 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
410413
static void ggtt_node_remove(struct xe_ggtt_node *node)
411414
{
412415
struct xe_ggtt *ggtt = node->ggtt;
413-
struct xe_device *xe = tile_to_xe(ggtt->tile);
414416
bool bound;
415-
int idx;
416-
417-
bound = drm_dev_enter(&xe->drm, &idx);
418417

419418
mutex_lock(&ggtt->lock);
419+
bound = ggtt->flags & XE_GGTT_FLAGS_ONLINE;
420420
if (bound)
421421
xe_ggtt_clear(ggtt, node->base.start, node->base.size);
422422
drm_mm_remove_node(&node->base);
@@ -429,8 +429,6 @@ static void ggtt_node_remove(struct xe_ggtt_node *node)
429429
if (node->invalidate_on_remove)
430430
xe_ggtt_invalidate(ggtt);
431431

432-
drm_dev_exit(idx);
433-
434432
free_node:
435433
xe_ggtt_node_fini(node);
436434
}

drivers/gpu/drm/xe/xe_ggtt_types.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,14 @@ struct xe_ggtt {
2828
/** @size: Total usable size of this GGTT */
2929
u64 size;
3030

31-
#define XE_GGTT_FLAGS_64K BIT(0)
31+
#define XE_GGTT_FLAGS_64K BIT(0)
32+
#define XE_GGTT_FLAGS_ONLINE BIT(1)
3233
/**
3334
* @flags: Flags for this GGTT
3435
* Acceptable flags:
3536
* - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K.
37+
* - %XE_GGTT_FLAGS_ONLINE - is GGTT online, protected by ggtt->lock
38+
* after init
3639
*/
3740
unsigned int flags;
3841
/** @scratch: Internal object allocation used as a scratch page */

drivers/gpu/drm/xe/xe_gt_ccs_mode.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "xe_gt_printk.h"
1313
#include "xe_gt_sysfs.h"
1414
#include "xe_mmio.h"
15+
#include "xe_pm.h"
1516
#include "xe_sriov.h"
1617

1718
static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
@@ -150,6 +151,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr,
150151
xe_gt_info(gt, "Setting compute mode to %d\n", num_engines);
151152
gt->ccs_mode = num_engines;
152153
xe_gt_record_user_engines(gt);
154+
guard(xe_pm_runtime)(xe);
153155
xe_gt_reset(gt);
154156
}
155157

drivers/gpu/drm/xe/xe_guc.c

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,22 +1124,22 @@ static int guc_wait_ucode(struct xe_guc *guc)
11241124
struct xe_guc_pc *guc_pc = &gt->uc.guc.pc;
11251125
u32 before_freq, act_freq, cur_freq;
11261126
u32 status = 0, tries = 0;
1127+
int load_result, ret;
11271128
ktime_t before;
11281129
u64 delta_ms;
1129-
int ret;
11301130

11311131
before_freq = xe_guc_pc_get_act_freq(guc_pc);
11321132
before = ktime_get();
11331133

1134-
ret = poll_timeout_us(ret = guc_load_done(gt, &status, &tries), ret,
1134+
ret = poll_timeout_us(load_result = guc_load_done(gt, &status, &tries), load_result,
11351135
10 * USEC_PER_MSEC,
11361136
GUC_LOAD_TIMEOUT_SEC * USEC_PER_SEC, false);
11371137

11381138
delta_ms = ktime_to_ms(ktime_sub(ktime_get(), before));
11391139
act_freq = xe_guc_pc_get_act_freq(guc_pc);
11401140
cur_freq = xe_guc_pc_get_cur_freq_fw(guc_pc);
11411141

1142-
if (ret) {
1142+
if (ret || load_result <= 0) {
11431143
xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz)\n",
11441144
status, delta_ms, xe_guc_pc_get_act_freq(guc_pc),
11451145
xe_guc_pc_get_cur_freq_fw(guc_pc));
@@ -1347,15 +1347,37 @@ int xe_guc_enable_communication(struct xe_guc *guc)
13471347
return 0;
13481348
}
13491349

1350-
int xe_guc_suspend(struct xe_guc *guc)
1350+
/**
1351+
* xe_guc_softreset() - Soft reset GuC
1352+
* @guc: The GuC object
1353+
*
1354+
* Send soft reset command to GuC through mmio send.
1355+
*
1356+
* Return: 0 if success, otherwise error code
1357+
*/
1358+
int xe_guc_softreset(struct xe_guc *guc)
13511359
{
1352-
struct xe_gt *gt = guc_to_gt(guc);
13531360
u32 action[] = {
13541361
XE_GUC_ACTION_CLIENT_SOFT_RESET,
13551362
};
13561363
int ret;
13571364

1365+
if (!xe_uc_fw_is_running(&guc->fw))
1366+
return 0;
1367+
13581368
ret = xe_guc_mmio_send(guc, action, ARRAY_SIZE(action));
1369+
if (ret)
1370+
return ret;
1371+
1372+
return 0;
1373+
}
1374+
1375+
int xe_guc_suspend(struct xe_guc *guc)
1376+
{
1377+
struct xe_gt *gt = guc_to_gt(guc);
1378+
int ret;
1379+
1380+
ret = xe_guc_softreset(guc);
13591381
if (ret) {
13601382
xe_gt_err(gt, "GuC suspend failed: %pe\n", ERR_PTR(ret));
13611383
return ret;

drivers/gpu/drm/xe/xe_guc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ int xe_guc_opt_in_features_enable(struct xe_guc *guc);
4444
void xe_guc_runtime_suspend(struct xe_guc *guc);
4545
void xe_guc_runtime_resume(struct xe_guc *guc);
4646
int xe_guc_suspend(struct xe_guc *guc);
47+
int xe_guc_softreset(struct xe_guc *guc);
4748
void xe_guc_notify(struct xe_guc *guc);
4849
int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr);
4950
int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len);

drivers/gpu/drm/xe/xe_guc_ct.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@ static void guc_action_disable_ct(void *arg)
345345
{
346346
struct xe_guc_ct *ct = arg;
347347

348+
xe_guc_ct_stop(ct);
348349
guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED);
349350
}
350351

drivers/gpu/drm/xe/xe_guc_submit.c

Lines changed: 61 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@
4848

4949
#define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6
5050

51+
static int guc_submit_reset_prepare(struct xe_guc *guc);
52+
5153
static struct xe_guc *
5254
exec_queue_to_guc(struct xe_exec_queue *q)
5355
{
@@ -239,7 +241,7 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
239241
EXEC_QUEUE_STATE_BANNED));
240242
}
241243

242-
static void guc_submit_fini(struct drm_device *drm, void *arg)
244+
static void guc_submit_sw_fini(struct drm_device *drm, void *arg)
243245
{
244246
struct xe_guc *guc = arg;
245247
struct xe_device *xe = guc_to_xe(guc);
@@ -257,6 +259,19 @@ static void guc_submit_fini(struct drm_device *drm, void *arg)
257259
xa_destroy(&guc->submission_state.exec_queue_lookup);
258260
}
259261

262+
static void guc_submit_fini(void *arg)
263+
{
264+
struct xe_guc *guc = arg;
265+
266+
/* Forcefully kill any remaining exec queues */
267+
xe_guc_ct_stop(&guc->ct);
268+
guc_submit_reset_prepare(guc);
269+
xe_guc_softreset(guc);
270+
xe_guc_submit_stop(guc);
271+
xe_uc_fw_sanitize(&guc->fw);
272+
xe_guc_submit_pause_abort(guc);
273+
}
274+
260275
static void guc_submit_wedged_fini(void *arg)
261276
{
262277
struct xe_guc *guc = arg;
@@ -326,7 +341,11 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
326341

327342
guc->submission_state.initialized = true;
328343

329-
return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
344+
err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc);
345+
if (err)
346+
return err;
347+
348+
return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc);
330349
}
331350

332351
/*
@@ -1252,6 +1271,7 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
12521271
*/
12531272
void xe_guc_submit_wedge(struct xe_guc *guc)
12541273
{
1274+
struct xe_device *xe = guc_to_xe(guc);
12551275
struct xe_gt *gt = guc_to_gt(guc);
12561276
struct xe_exec_queue *q;
12571277
unsigned long index;
@@ -1266,20 +1286,28 @@ void xe_guc_submit_wedge(struct xe_guc *guc)
12661286
if (!guc->submission_state.initialized)
12671287
return;
12681288

1269-
err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
1270-
guc_submit_wedged_fini, guc);
1271-
if (err) {
1272-
xe_gt_err(gt, "Failed to register clean-up in wedged.mode=%s; "
1273-
"Although device is wedged.\n",
1274-
xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET));
1275-
return;
1276-
}
1289+
if (xe->wedged.mode == 2) {
1290+
err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
1291+
guc_submit_wedged_fini, guc);
1292+
if (err) {
1293+
xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; "
1294+
"Although device is wedged.\n");
1295+
return;
1296+
}
12771297

1278-
mutex_lock(&guc->submission_state.lock);
1279-
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
1280-
if (xe_exec_queue_get_unless_zero(q))
1281-
set_exec_queue_wedged(q);
1282-
mutex_unlock(&guc->submission_state.lock);
1298+
mutex_lock(&guc->submission_state.lock);
1299+
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
1300+
if (xe_exec_queue_get_unless_zero(q))
1301+
set_exec_queue_wedged(q);
1302+
mutex_unlock(&guc->submission_state.lock);
1303+
} else {
1304+
/* Forcefully kill any remaining exec queues, signal fences */
1305+
guc_submit_reset_prepare(guc);
1306+
xe_guc_submit_stop(guc);
1307+
xe_guc_softreset(guc);
1308+
xe_uc_fw_sanitize(&guc->fw);
1309+
xe_guc_submit_pause_abort(guc);
1310+
}
12831311
}
12841312

12851313
static bool guc_submit_hint_wedged(struct xe_guc *guc)
@@ -2230,14 +2258,15 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = {
22302258
static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
22312259
{
22322260
struct xe_gpu_scheduler *sched = &q->guc->sched;
2261+
bool do_destroy = false;
22332262

22342263
/* Stop scheduling + flush any DRM scheduler operations */
22352264
xe_sched_submission_stop(sched);
22362265

22372266
/* Clean up lost G2H + reset engine state */
22382267
if (exec_queue_registered(q)) {
22392268
if (exec_queue_destroyed(q))
2240-
__guc_exec_queue_destroy(guc, q);
2269+
do_destroy = true;
22412270
}
22422271
if (q->guc->suspend_pending) {
22432272
set_exec_queue_suspended(q);
@@ -2273,18 +2302,15 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
22732302
xe_guc_exec_queue_trigger_cleanup(q);
22742303
}
22752304
}
2305+
2306+
if (do_destroy)
2307+
__guc_exec_queue_destroy(guc, q);
22762308
}
22772309

2278-
int xe_guc_submit_reset_prepare(struct xe_guc *guc)
2310+
static int guc_submit_reset_prepare(struct xe_guc *guc)
22792311
{
22802312
int ret;
22812313

2282-
if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc)))
2283-
return 0;
2284-
2285-
if (!guc->submission_state.initialized)
2286-
return 0;
2287-
22882314
/*
22892315
* Using an atomic here rather than submission_state.lock as this
22902316
* function can be called while holding the CT lock (engine reset
@@ -2299,6 +2325,17 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc)
22992325
return ret;
23002326
}
23012327

2328+
int xe_guc_submit_reset_prepare(struct xe_guc *guc)
2329+
{
2330+
if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc)))
2331+
return 0;
2332+
2333+
if (!guc->submission_state.initialized)
2334+
return 0;
2335+
2336+
return guc_submit_reset_prepare(guc);
2337+
}
2338+
23022339
void xe_guc_submit_reset_wait(struct xe_guc *guc)
23032340
{
23042341
wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) ||
@@ -2695,8 +2732,7 @@ void xe_guc_submit_pause_abort(struct xe_guc *guc)
26952732
continue;
26962733

26972734
xe_sched_submission_start(sched);
2698-
if (exec_queue_killed_or_banned_or_wedged(q))
2699-
xe_guc_exec_queue_trigger_cleanup(q);
2735+
guc_exec_queue_kill(q);
27002736
}
27012737
mutex_unlock(&guc->submission_state.lock);
27022738
}

drivers/gpu/drm/xe/xe_lrc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2413,14 +2413,14 @@ static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts)
24132413
* @lrc: Pointer to the lrc.
24142414
*
24152415
* Return latest ctx timestamp. With support for active contexts, the
2416-
* calculation may bb slightly racy, so follow a read-again logic to ensure that
2416+
* calculation may be slightly racy, so follow a read-again logic to ensure that
24172417
* the context is still active before returning the right timestamp.
24182418
*
24192419
* Returns: New ctx timestamp value
24202420
*/
24212421
u64 xe_lrc_timestamp(struct xe_lrc *lrc)
24222422
{
2423-
u64 lrc_ts, reg_ts, new_ts;
2423+
u64 lrc_ts, reg_ts, new_ts = lrc->ctx_timestamp;
24242424
u32 engine_id;
24252425

24262426
lrc_ts = xe_lrc_ctx_timestamp(lrc);

drivers/gpu/drm/xe/xe_oa.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -543,8 +543,7 @@ static ssize_t xe_oa_read(struct file *file, char __user *buf,
543543
size_t offset = 0;
544544
int ret;
545545

546-
/* Can't read from disabled streams */
547-
if (!stream->enabled || !stream->sample)
546+
if (!stream->sample)
548547
return -EINVAL;
549548

550549
if (!(file->f_flags & O_NONBLOCK)) {
@@ -1460,6 +1459,10 @@ static void xe_oa_stream_disable(struct xe_oa_stream *stream)
14601459

14611460
if (stream->sample)
14621461
hrtimer_cancel(&stream->poll_check_timer);
1462+
1463+
/* Update stream->oa_buffer.tail to allow any final reports to be read */
1464+
if (xe_oa_buffer_check_unlocked(stream))
1465+
wake_up(&stream->poll_wq);
14631466
}
14641467

14651468
static int xe_oa_enable_preempt_timeslice(struct xe_oa_stream *stream)

0 commit comments

Comments
 (0)