summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
diff options
context:
space:
mode:
authorFelix Kuehling <Felix.Kuehling@amd.com>2017-09-21 16:26:41 -0400
committerAlex Deucher <alexander.deucher@amd.com>2017-09-28 16:03:30 -0400
commitc98171ccf6580407d07a3b5dc8188ce9e1f4f7ca (patch)
tree191e13e83451ea87c6e4fdc758425b046a0c4bf8 /drivers/gpu/drm/amd/amdgpu/vega10_ih.c
parent1bab0fc01b84c1aa8a65a1f1de885e1faab48264 (diff)
drm/amdgpu: Handle GPUVM fault storms
When many wavefronts cause VM faults at the same time, it can overwhelm the interrupt handler and cause IH ring overflows before the driver can notify or kill the faulting application. As a workaround I'm introducing limited per-VM fault credit. After that number of VM faults have occurred, further VM faults are filtered out at the prescreen stage of processing. This depends on the PASID in the interrupt packet, so it currently only works for KFD contexts. Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/vega10_ih.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c11
1 files changed, 7 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index a3b30d84dbb3..697325737ba8 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -260,15 +260,18 @@ static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev)
return true;
}
- /* Not a retry fault */
- if (!(dw5 & 0x80))
- return true;
-
pasid = dw3 & 0xffff;
/* No PASID, can't identify faulting process */
if (!pasid)
return true;
+ /* Not a retry fault, check fault credit */
+ if (!(dw5 & 0x80)) {
+ if (!amdgpu_vm_pasid_fault_credit(adev, pasid))
+ goto ignore_iv;
+ return true;
+ }
+
addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12);
key = AMDGPU_VM_FAULT(pasid, addr);
r = amdgpu_ih_add_fault(adev, key);