From 5740682e66cef57626a328d237698cad329c0449 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 25 Oct 2017 16:37:02 +0800 Subject: drm/amdgpu:implement new GPU recover(v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1,new imple names amdgpu_gpu_recover which gives more hint on what it does compared with gpu_reset 2,gpu_recover unify bare-metal and SR-IOV, only the asic reset part is implemented differently 3,gpu_recover will increase hang job karma and mark its entity/context as guilty if exceeds limit V2: 4,in scheduler main routine the job from guilty context will be immedialy fake signaled after it poped from queue and its fence be set with "-ECANCELED" error 5,in scheduler recovery routine all jobs from the guilty entity would be dropped 6,in run_job() routine the real IB submission would be skipped if @skip parameter equales true or there was VRAM lost occured. V3: 7,replace deprecated gpu reset, use new gpu recover Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index eda89dfdef5b..604ac03a42e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -694,25 +694,25 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) } /** - * amdgpu_debugfs_gpu_reset - manually trigger a gpu reset + * amdgpu_debugfs_gpu_recover - manually trigger a gpu reset & recover * * Manually trigger a gpu reset at the next fence wait. */ -static int amdgpu_debugfs_gpu_reset(struct seq_file *m, void *data) +static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data) { struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; - seq_printf(m, "gpu reset\n"); - amdgpu_gpu_reset(adev); + seq_printf(m, "gpu recover\n"); + amdgpu_gpu_recover(adev, NULL); return 0; } static const struct drm_info_list amdgpu_debugfs_fence_list[] = { {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, - {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL} + {"amdgpu_gpu_recover", &amdgpu_debugfs_gpu_recover, 0, NULL} }; static const struct drm_info_list amdgpu_debugfs_fence_list_sriov[] = { -- cgit v1.2.3