Skip to content

Commit 1b135c6

Browse files
peppsacalexdeucher
authored andcommitted
drm/amdgpu: extract amdgpu_vm_lock_by_pasid from amdgpu_vm_handle_fault
This is tricky to implement right and we're going to need it from the devcoredump. Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]> Acked-by: Alex Deucher <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent d1f188b commit 1b135c6

2 files changed

Lines changed: 54 additions & 29 deletions

File tree

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

Lines changed: 51 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2949,6 +2949,50 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
29492949
return 0;
29502950
}
29512951

2952+
/**
2953+
* amdgpu_vm_lock_by_pasid - return an amdgpu_vm and its root bo from a pasid, if possible.
2954+
* @adev: amdgpu device pointer
2955+
* @root: root BO of the VM
2956+
* @pasid: PASID of the VM
2957+
* The caller needs to unreserve and unref the root bo on success.
2958+
*/
2959+
struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
2960+
struct amdgpu_bo **root, u32 pasid)
2961+
{
2962+
unsigned long irqflags;
2963+
struct amdgpu_vm *vm;
2964+
int r;
2965+
2966+
xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
2967+
vm = xa_load(&adev->vm_manager.pasids, pasid);
2968+
*root = vm ? amdgpu_bo_ref(vm->root.bo) : NULL;
2969+
xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
2970+
2971+
if (!*root)
2972+
return NULL;
2973+
2974+
r = amdgpu_bo_reserve(*root, true);
2975+
if (r)
2976+
goto error_unref;
2977+
2978+
/* Double check that the VM still exists */
2979+
xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
2980+
vm = xa_load(&adev->vm_manager.pasids, pasid);
2981+
if (vm && vm->root.bo != *root)
2982+
vm = NULL;
2983+
xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
2984+
if (!vm)
2985+
goto error_unlock;
2986+
2987+
return vm;
2988+
error_unlock:
2989+
amdgpu_bo_unreserve(*root);
2990+
2991+
error_unref:
2992+
amdgpu_bo_unref(root);
2993+
return NULL;
2994+
}
2995+
29522996
/**
29532997
* amdgpu_vm_handle_fault - graceful handling of VM faults.
29542998
* @adev: amdgpu device pointer
@@ -2964,50 +3008,29 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
29643008
* shouldn't be reported any more.
29653009
*/
29663010
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
2967-
u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
2968-
bool write_fault)
3011+
u32 vmid, u32 node_id, uint64_t addr,
3012+
uint64_t ts, bool write_fault)
29693013
{
29703014
bool is_compute_context = false;
29713015
struct amdgpu_bo *root;
2972-
unsigned long irqflags;
29733016
uint64_t value, flags;
29743017
struct amdgpu_vm *vm;
29753018
int r;
29763019

2977-
xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
2978-
vm = xa_load(&adev->vm_manager.pasids, pasid);
2979-
if (vm) {
2980-
root = amdgpu_bo_ref(vm->root.bo);
2981-
is_compute_context = vm->is_compute_context;
2982-
} else {
2983-
root = NULL;
2984-
}
2985-
xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
2986-
2987-
if (!root)
3020+
vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);
3021+
if (!vm)
29883022
return false;
29893023

3024+
is_compute_context = vm->is_compute_context;
3025+
29903026
if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
29913027
node_id, addr >> PAGE_SHIFT, ts, write_fault)) {
3028+
amdgpu_bo_unreserve(root);
29923029
amdgpu_bo_unref(&root);
29933030
return true;
29943031
}
29953032

29963033
addr /= AMDGPU_GPU_PAGE_SIZE;
2997-
2998-
r = amdgpu_bo_reserve(root, true);
2999-
if (r)
3000-
goto error_unref;
3001-
3002-
/* Double check that the VM still exists */
3003-
xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
3004-
vm = xa_load(&adev->vm_manager.pasids, pasid);
3005-
if (vm && vm->root.bo != root)
3006-
vm = NULL;
3007-
xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
3008-
if (!vm)
3009-
goto error_unlock;
3010-
30113034
flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
30123035
AMDGPU_PTE_SYSTEM;
30133036

@@ -3046,7 +3069,6 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
30463069
if (r < 0)
30473070
dev_err(adev->dev, "Can't handle page fault (%d)\n", r);
30483071

3049-
error_unref:
30503072
amdgpu_bo_unref(&root);
30513073

30523074
return false;

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,9 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
592592
u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
593593
bool write_fault);
594594

595+
struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
596+
struct amdgpu_bo **root, u32 pasid);
597+
595598
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
596599

597600
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,

0 commit comments

Comments
 (0)