Skip to content

Commit 6596a02

Browse files
committed
Merge tag 'drm-next-2026-04-22' of https://gitlab.freedesktop.org/drm/kernel
Pull more drm updates from Dave Airlie: "This is a followup which is mostly next material with some fixes. Alex pointed out I missed one of his AMD MRs from last week, so I added that, then Jani sent the pipe reordering stuff, otherwise it's just some minor i915 fixes and a dma-buf fix. drm: - Add support for AMD VSDB parsing to drm_edid dma-buf: - fix documentation formatting i915: - add support for reordered pipes to support joined pipes better - Fix VESA backlight possible check condition - Verify the correct plane DDB entry amdgpu: - Audio regression fix - Use drm edid parser for AMD VSDB - Misc cleanups - VCE cs parse fixes - VCN cs parse fixes - RAS fixes - Clean up and unify vram reservation handling - GPU Partition updates - system_wq cleanups - Add CONFIG_GCOV_PROFILE_AMDGPU kconfig option - SMU vram copy updates - SMU 13/14/15 fixes - UserQ fixes - Replace pasid idr with an xarray - Dither handling fix - Enable amdgpu by default for CIK APUs - Add IBs to devcoredump amdkfd: - system_wq cleanups radeon: - system_wq cleanups" * tag 'drm-next-2026-04-22' of https://gitlab.freedesktop.org/drm/kernel: (62 commits) drm/i915/display: change pipe allocation order for discrete platforms drm/i915/wm: Verify the correct plane DDB entry drm/i915/backlight: Fix VESA backlight possible check condition drm/i915: Walk crtcs in pipe order drm/i915/joiner: Make joiner "nomodeset" state copy independent of pipe order dma-buf: fix htmldocs error for dma_buf_attach_revocable drm/amdgpu: dump job ibs in the devcoredump drm/amdgpu: store ib info for devcoredump drm/amdgpu: extract amdgpu_vm_lock_by_pasid from amdgpu_vm_handle_fault drm/amdgpu: Use amdgpu by default for CIK APUs too drm/amd/display: Remove unused NUM_ELEMENTS macros drm/amd/display: Replace inline NUM_ELEMENTS macro with ARRAY_SIZE drm/amdgpu: save ring content before resetting the device drm/amdgpu: make userq fence_drv drop explicit in queue destroy drm/amdgpu: rework userq fence driver alloc/destroy drm/amdgpu/userq: use dma_fence_wait_timeout without test for signalled drm/amdgpu/userq: call dma_resv_wait_timeout without test for signalled drm/amdgpu/userq: add the return code too in error condition drm/amdgpu/userq: fence wait for max time in amdgpu_userq_wait_for_signal drm/amd/display: Change dither policy for 10 bpc output back to dithering ...
2 parents d46dd0d + a775637 commit 6596a02

85 files changed

Lines changed: 1162 additions & 777 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

drivers/dma-buf/dma-buf.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1353,6 +1353,7 @@ EXPORT_SYMBOL_NS_GPL(dma_buf_attach_revocable, "DMA_BUF");
13531353
* Upon return importers may continue to access the DMA-buf memory. The caller
13541354
* must do two additional waits to ensure that the memory is no longer being
13551355
* accessed:
1356+
*
13561357
* 1) Until dma_resv_wait_timeout() retires fences the importer is allowed to
13571358
* fully access the memory.
13581359
* 2) Until the importer calls unmap it is allowed to speculatively

drivers/gpu/drm/amd/amdgpu/Kconfig

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,23 @@ config DRM_AMDGPU_WERROR
103103
Add -Werror to the build flags for amdgpu.ko.
104104
Only enable this if you are warning code for amdgpu.ko.
105105

106+
107+
config GCOV_PROFILE_AMDGPU
108+
bool "Enable GCOV profiling on amdgpu"
109+
depends on DRM_AMDGPU
110+
depends on GCOV_KERNEL
111+
default n
112+
help
113+
Enable GCOV profiling on the amdgpu driver for checking which
114+
functions/lines are executed during testing. This adds compiler
115+
instrumentation flags to all amdgpu source files, producing
116+
.gcda/.gcno coverage data accessible via debugfs.
117+
118+
This increases the amdgpu module size by ~50% and adds ~2-5%
119+
runtime overhead on GPU submission paths.
120+
121+
If unsure, say N.
122+
106123
source "drivers/gpu/drm/amd/acp/Kconfig"
107124
source "drivers/gpu/drm/amd/display/Kconfig"
108125
source "drivers/gpu/drm/amd/amdkfd/Kconfig"

drivers/gpu/drm/amd/amdgpu/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ FULL_AMD_PATH=$(src)/..
2727
DISPLAY_FOLDER_NAME=display
2828
FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME)
2929

30+
ifdef CONFIG_GCOV_PROFILE_AMDGPU
31+
GCOV_PROFILE := y
32+
endif
33+
3034
ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
3135
-I$(FULL_AMD_PATH)/include \
3236
-I$(FULL_AMD_PATH)/amdgpu \

drivers/gpu/drm/amd/amdgpu/aldebaran.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
179179
list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
180180
/* For XGMI run all resets in parallel to speed up the process */
181181
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
182-
if (!queue_work(system_unbound_wq,
182+
if (!queue_work(system_dfl_wq,
183183
&tmp_adev->reset_cntl->reset_work))
184184
r = -EALREADY;
185185
} else

drivers/gpu/drm/amd/amdgpu/amdgpu.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,11 +1045,6 @@ struct amdgpu_device {
10451045
struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM];
10461046
const struct amdgpu_userq_funcs *userq_funcs[AMDGPU_HW_IP_NUM];
10471047

1048-
/* xarray used to retrieve the user queue fence driver reference
1049-
* in the EOP interrupt handler to signal the particular user
1050-
* queue fence.
1051-
*/
1052-
struct xarray userq_xa;
10531048
/**
10541049
* @userq_doorbell_xa: Global user queue map (doorbell index → queue)
10551050
* Key: doorbell_index (unique global identifier for the queue)

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -805,7 +805,10 @@ u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
805805
} else {
806806
tmp = adev->gmc.mem_partitions[mem_id].size;
807807
}
808-
do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
808+
809+
if (adev->xcp_mgr->mem_alloc_mode == AMDGPU_PARTITION_MEM_CAPPING_EVEN)
810+
do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
811+
809812
return ALIGN_DOWN(tmp, PAGE_SIZE);
810813
} else if (adev->apu_prefer_gtt) {
811814
return (ttm_tt_pages_limit() << PAGE_SHIFT);

drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1685,9 +1685,9 @@ static int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev)
16851685
(uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
16861686
ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
16871687
/* Firmware request VRAM reservation for SR-IOV */
1688-
adev->mman.fw_vram_usage_start_offset = (start_addr &
1689-
(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
1690-
adev->mman.fw_vram_usage_size = size << 10;
1688+
amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_FW_VRAM_USAGE,
1689+
(start_addr & (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10,
1690+
size << 10, true);
16911691
/* Use the default scratch size */
16921692
usage_bytes = 0;
16931693
} else {

drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,9 @@ static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev,
120120
(u32)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
121121
ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
122122
/* Firmware request VRAM reservation for SR-IOV */
123-
adev->mman.fw_vram_usage_start_offset = (start_addr &
124-
(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
125-
adev->mman.fw_vram_usage_size = fw_size << 10;
123+
amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_FW_VRAM_USAGE,
124+
(start_addr & (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10,
125+
fw_size << 10, true);
126126
/* Use the default scratch size */
127127
*usage_bytes = 0;
128128
} else {
@@ -152,18 +152,18 @@ static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev,
152152
((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
153153
ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
154154
/* Firmware request VRAM reservation for SR-IOV */
155-
adev->mman.fw_vram_usage_start_offset = (fw_start_addr &
156-
(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
157-
adev->mman.fw_vram_usage_size = fw_size << 10;
155+
amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_FW_VRAM_USAGE,
156+
(fw_start_addr & (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10,
157+
fw_size << 10, true);
158158
}
159159

160160
if (amdgpu_sriov_vf(adev) &&
161161
((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
162162
ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
163163
/* driver request VRAM reservation for SR-IOV */
164-
adev->mman.drv_vram_usage_start_offset = (drv_start_addr &
165-
(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
166-
adev->mman.drv_vram_usage_size = drv_size << 10;
164+
amdgpu_ttm_init_vram_resv(adev, AMDGPU_RESV_DRV_VRAM_USAGE,
165+
(drv_start_addr & (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10,
166+
drv_size << 10, true);
167167
}
168168

169169
*usage_bytes = 0;

drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c

Lines changed: 171 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -210,12 +210,24 @@ static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev,
210210
static ssize_t
211211
amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_info *coredump)
212212
{
213+
struct amdgpu_device *adev = coredump->adev;
213214
struct drm_printer p;
214215
struct drm_print_iterator iter;
215216
struct amdgpu_vm_fault_info *fault_info;
217+
struct amdgpu_bo_va_mapping *mapping;
216218
struct amdgpu_ip_block *ip_block;
217-
int ver;
218-
219+
struct amdgpu_res_cursor cursor;
220+
struct amdgpu_bo *abo, *root;
221+
uint64_t va_start, offset;
222+
struct amdgpu_ring *ring;
223+
struct amdgpu_vm *vm;
224+
u32 *ib_content;
225+
uint8_t *kptr;
226+
int ver, i, j, r;
227+
u32 ring_idx, off;
228+
bool sizing_pass;
229+
230+
sizing_pass = buffer == NULL;
219231
iter.data = buffer;
220232
iter.offset = 0;
221233
iter.remain = count;
@@ -303,23 +315,25 @@ amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_inf
303315

304316
/* Add ring buffer information */
305317
drm_printf(&p, "Ring buffer information\n");
306-
for (int i = 0; i < coredump->adev->num_rings; i++) {
307-
int j = 0;
308-
struct amdgpu_ring *ring = coredump->adev->rings[i];
309-
310-
drm_printf(&p, "ring name: %s\n", ring->name);
311-
drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n",
312-
amdgpu_ring_get_rptr(ring),
313-
amdgpu_ring_get_wptr(ring),
314-
ring->buf_mask);
315-
drm_printf(&p, "Ring size in dwords: %d\n",
316-
ring->ring_size / 4);
317-
drm_printf(&p, "Ring contents\n");
318-
drm_printf(&p, "Offset \t Value\n");
319-
320-
while (j < ring->ring_size) {
321-
drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j / 4]);
322-
j += 4;
318+
if (coredump->num_rings) {
319+
for (i = 0; i < coredump->num_rings; i++) {
320+
ring_idx = coredump->rings[i].ring_index;
321+
ring = coredump->adev->rings[ring_idx];
322+
off = coredump->rings[i].offset;
323+
324+
drm_printf(&p, "ring name: %s\n", ring->name);
325+
drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n",
326+
coredump->rings[i].rptr,
327+
coredump->rings[i].wptr,
328+
ring->buf_mask);
329+
drm_printf(&p, "Ring size in dwords: %d\n",
330+
ring->ring_size / 4);
331+
drm_printf(&p, "Ring contents\n");
332+
drm_printf(&p, "Offset \t Value\n");
333+
334+
for (j = 0; j < ring->ring_size; j += 4)
335+
drm_printf(&p, "0x%x \t 0x%x\n", j,
336+
coredump->rings_dw[off + j / 4]);
323337
}
324338
}
325339

@@ -328,6 +342,87 @@ amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_inf
328342
else if (coredump->reset_vram_lost)
329343
drm_printf(&p, "VRAM is lost due to GPU reset!\n");
330344

345+
if (coredump->num_ibs) {
346+
/* Don't try to lookup the VM or map the BOs when calculating the
347+
* size required to store the devcoredump.
348+
*/
349+
if (sizing_pass)
350+
vm = NULL;
351+
else
352+
vm = amdgpu_vm_lock_by_pasid(adev, &root, coredump->pasid);
353+
354+
for (int i = 0; i < coredump->num_ibs && (sizing_pass || vm); i++) {
355+
ib_content = kvmalloc_array(coredump->ibs[i].ib_size_dw, 4,
356+
GFP_KERNEL);
357+
if (!ib_content)
358+
continue;
359+
360+
/* vm=NULL can only happen when 'sizing_pass' is true. Skip to the
361+
* drm_printf() calls (ib_content doesn't need to be initialized
362+
* as its content won't be written anywhere).
363+
*/
364+
if (!vm)
365+
goto output_ib_content;
366+
367+
va_start = coredump->ibs[i].gpu_addr & AMDGPU_GMC_HOLE_MASK;
368+
mapping = amdgpu_vm_bo_lookup_mapping(vm, va_start / AMDGPU_GPU_PAGE_SIZE);
369+
if (!mapping)
370+
goto free_ib_content;
371+
372+
offset = va_start - (mapping->start * AMDGPU_GPU_PAGE_SIZE);
373+
abo = amdgpu_bo_ref(mapping->bo_va->base.bo);
374+
r = amdgpu_bo_reserve(abo, false);
375+
if (r)
376+
goto free_ib_content;
377+
378+
if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) {
379+
off = 0;
380+
381+
if (abo->tbo.resource->mem_type != TTM_PL_VRAM)
382+
goto unreserve_abo;
383+
384+
amdgpu_res_first(abo->tbo.resource, offset,
385+
coredump->ibs[i].ib_size_dw * 4,
386+
&cursor);
387+
while (cursor.remaining) {
388+
amdgpu_device_mm_access(adev, cursor.start / 4,
389+
&ib_content[off], cursor.size / 4,
390+
false);
391+
off += cursor.size;
392+
amdgpu_res_next(&cursor, cursor.size);
393+
}
394+
} else {
395+
r = ttm_bo_kmap(&abo->tbo, 0,
396+
PFN_UP(abo->tbo.base.size),
397+
&abo->kmap);
398+
if (r)
399+
goto unreserve_abo;
400+
401+
kptr = amdgpu_bo_kptr(abo);
402+
kptr += offset;
403+
memcpy(ib_content, kptr,
404+
coredump->ibs[i].ib_size_dw * 4);
405+
406+
amdgpu_bo_kunmap(abo);
407+
}
408+
409+
output_ib_content:
410+
drm_printf(&p, "\nIB #%d 0x%llx %d dw\n",
411+
i, coredump->ibs[i].gpu_addr, coredump->ibs[i].ib_size_dw);
412+
for (int j = 0; j < coredump->ibs[i].ib_size_dw; j++)
413+
drm_printf(&p, "0x%08x\n", ib_content[j]);
414+
unreserve_abo:
415+
if (vm)
416+
amdgpu_bo_unreserve(abo);
417+
free_ib_content:
418+
kvfree(ib_content);
419+
}
420+
if (vm) {
421+
amdgpu_bo_unreserve(root);
422+
amdgpu_bo_unref(&root);
423+
}
424+
}
425+
331426
return count - iter.remain;
332427
}
333428

@@ -359,6 +454,8 @@ static void amdgpu_devcoredump_free(void *data)
359454
struct amdgpu_coredump_info *coredump = data;
360455

361456
kvfree(coredump->formatted);
457+
kvfree(coredump->rings);
458+
kvfree(coredump->rings_dw);
362459
kvfree(data);
363460
}
364461

@@ -395,18 +492,26 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
395492
{
396493
struct drm_device *dev = adev_to_drm(adev);
397494
struct amdgpu_coredump_info *coredump;
495+
size_t size = sizeof(*coredump);
398496
struct drm_sched_job *s_job;
497+
u64 total_ring_size, ring_count;
498+
struct amdgpu_ring *ring;
499+
int i, off, idx;
399500

400501
/* No need to generate a new coredump if there's one in progress already. */
401502
if (work_pending(&adev->coredump_work))
402503
return;
403504

404-
coredump = kzalloc_obj(*coredump, GFP_NOWAIT);
505+
if (job && job->pasid)
506+
size += sizeof(struct amdgpu_coredump_ib_info) * job->num_ibs;
507+
508+
coredump = kzalloc(size, GFP_NOWAIT);
405509
if (!coredump)
406510
return;
407511

408512
coredump->skip_vram_check = skip_vram_check;
409513
coredump->reset_vram_lost = vram_lost;
514+
coredump->pasid = job->pasid;
410515

411516
if (job && job->pasid) {
412517
struct amdgpu_task_info *ti;
@@ -416,13 +521,59 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
416521
coredump->reset_task_info = *ti;
417522
amdgpu_vm_put_task_info(ti);
418523
}
524+
coredump->num_ibs = job->num_ibs;
525+
for (i = 0; i < job->num_ibs; ++i) {
526+
coredump->ibs[i].gpu_addr = job->ibs[i].gpu_addr;
527+
coredump->ibs[i].ib_size_dw = job->ibs[i].length_dw;
528+
}
419529
}
420530

421531
if (job) {
422532
s_job = &job->base;
423533
coredump->ring = to_amdgpu_ring(s_job->sched);
424534
}
425535

536+
/* Dump ring content if memory allocation succeeds. */
537+
ring_count = 0;
538+
total_ring_size = 0;
539+
for (i = 0; i < adev->num_rings; i++) {
540+
ring = adev->rings[i];
541+
542+
/* Only dump rings with unsignalled fences. */
543+
if (atomic_read(&ring->fence_drv.last_seq) == ring->fence_drv.sync_seq &&
544+
coredump->ring != ring)
545+
continue;
546+
547+
total_ring_size += ring->ring_size;
548+
ring_count++;
549+
}
550+
coredump->rings_dw = kzalloc(total_ring_size, GFP_NOWAIT);
551+
coredump->rings = kcalloc(ring_count, sizeof(struct amdgpu_coredump_ring), GFP_NOWAIT);
552+
if (coredump->rings && coredump->rings_dw) {
553+
for (i = 0, off = 0, idx = 0; i < adev->num_rings; i++) {
554+
ring = adev->rings[i];
555+
556+
if (atomic_read(&ring->fence_drv.last_seq) == ring->fence_drv.sync_seq &&
557+
coredump->ring != ring)
558+
continue;
559+
560+
coredump->rings[idx].ring_index = ring->idx;
561+
coredump->rings[idx].rptr = amdgpu_ring_get_rptr(ring);
562+
coredump->rings[idx].wptr = amdgpu_ring_get_wptr(ring);
563+
coredump->rings[idx].offset = off;
564+
565+
memcpy(&coredump->rings_dw[off], ring->ring, ring->ring_size);
566+
off += ring->ring_size;
567+
idx++;
568+
}
569+
coredump->num_rings = idx;
570+
} else {
571+
kvfree(coredump->rings_dw);
572+
kvfree(coredump->rings);
573+
coredump->rings_dw = NULL;
574+
coredump->rings = NULL;
575+
}
576+
426577
coredump->adev = adev;
427578

428579
ktime_get_ts64(&coredump->reset_time);

0 commit comments

Comments
 (0)