From a70cdb9eddcfd4ba20d69b84149b4a38648455ac Mon Sep 17 00:00:00 2001 From: Nayan Deshmukh Date: Thu, 29 Mar 2018 22:36:33 +0530 Subject: drm/scheduler: move the tracepoints file from the include directory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move it with the scheduler code. This is mostly a straight forward rename with no code change except for updating the TRACE_INCLUDE_PATH Signed-off-by: Nayan Deshmukh Suggested-by: Christian König Reviewed-by: Christian König Acked-by: Lucas Stach Signed-off-by: Alex Deucher --- include/drm/gpu_scheduler_trace.h | 82 --------------------------------------- 1 file changed, 82 deletions(-) delete mode 100644 include/drm/gpu_scheduler_trace.h (limited to 'include/drm') diff --git a/include/drm/gpu_scheduler_trace.h b/include/drm/gpu_scheduler_trace.h deleted file mode 100644 index 0789e8d0a0e1..000000000000 --- a/include/drm/gpu_scheduler_trace.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) -#define _GPU_SCHED_TRACE_H_ - -#include -#include -#include - -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM gpu_scheduler -#define TRACE_INCLUDE_FILE gpu_scheduler_trace - -TRACE_EVENT(drm_sched_job, - TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity), - TP_ARGS(sched_job, entity), - TP_STRUCT__entry( - __field(struct drm_sched_entity *, entity) - __field(struct dma_fence *, fence) - __field(const char *, name) - __field(uint64_t, id) - __field(u32, job_count) - __field(int, hw_job_count) - ), - - TP_fast_assign( - __entry->entity = entity; - __entry->id = sched_job->id; - __entry->fence = &sched_job->s_fence->finished; - __entry->name = sched_job->sched->name; - __entry->job_count = spsc_queue_count(&entity->job_queue); - __entry->hw_job_count = atomic_read( - &sched_job->sched->hw_rq_count); - ), - TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", - __entry->entity, __entry->id, - __entry->fence, __entry->name, - __entry->job_count, __entry->hw_job_count) -); - -TRACE_EVENT(drm_sched_process_job, - TP_PROTO(struct drm_sched_fence *fence), - TP_ARGS(fence), - TP_STRUCT__entry( - __field(struct dma_fence *, fence) - ), - - TP_fast_assign( - __entry->fence = &fence->finished; - ), - TP_printk("fence=%p signaled", __entry->fence) -); - -#endif - -/* This part must be outside protection */ -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . -#include -- cgit v1.2.3 From 1a61ee07211c543bf43e635fa703c162a78af0e1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 4 Apr 2018 15:32:51 -0700 Subject: drm/sched: Extend the documentation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These comments answer all the questions I had for myself when implementing a driver using the GPU scheduler. Signed-off-by: Eric Anholt Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/drm/gpu_scheduler.h | 46 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) (limited to 'include/drm') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index dfd54fb94e10..c053a32341bf 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -43,10 +43,12 @@ enum drm_sched_priority { }; /** - * A scheduler entity is a wrapper around a job queue or a group - * of other entities. Entities take turns emitting jobs from their - * job queues to corresponding hardware ring based on scheduling - * policy. + * drm_sched_entity - A wrapper around a job queue (typically attached + * to the DRM file_priv). + * + * Entities will emit jobs in order to their corresponding hardware + * ring, and the scheduler will alternate between entities based on + * scheduling policy. */ struct drm_sched_entity { struct list_head list; @@ -78,7 +80,18 @@ struct drm_sched_rq { struct drm_sched_fence { struct dma_fence scheduled; + + /* This fence is what will be signaled by the scheduler when + * the job is completed. + * + * When setting up an out fence for the job, you should use + * this, since it's available immediately upon + * drm_sched_job_init(), and the fence returned by the driver + * from run_job() won't be created until the dependencies have + * resolved. + */ struct dma_fence finished; + struct dma_fence_cb cb; struct dma_fence *parent; struct drm_gpu_scheduler *sched; @@ -88,6 +101,13 @@ struct drm_sched_fence { struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); +/** + * drm_sched_job - A job to be run by an entity. + * + * A job is created by the driver using drm_sched_job_init(), and + * should call drm_sched_entity_push_job() once it wants the scheduler + * to schedule the job. + */ struct drm_sched_job { struct spsc_node queue_node; struct drm_gpu_scheduler *sched; @@ -112,10 +132,28 @@ static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job, * these functions should be implemented in driver side */ struct drm_sched_backend_ops { + /* Called when the scheduler is considering scheduling this + * job next, to get another struct dma_fence for this job to + * block on. Once it returns NULL, run_job() may be called. + */ struct dma_fence *(*dependency)(struct drm_sched_job *sched_job, struct drm_sched_entity *s_entity); + + /* Called to execute the job once all of the dependencies have + * been resolved. This may be called multiple times, if + * timedout_job() has happened and drm_sched_job_recovery() + * decides to try it again. + */ struct dma_fence *(*run_job)(struct drm_sched_job *sched_job); + + /* Called when a job has taken too long to execute, to trigger + * GPU recovery. + */ void (*timedout_job)(struct drm_sched_job *sched_job); + + /* Called once the job's finished fence has been signaled and + * it's time to clean it up. + */ void (*free_job)(struct drm_sched_job *sched_job); }; -- cgit v1.2.3 From 8ee3a52e3f35e064a3bf82f21dc74ddaf9843648 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 16 Apr 2018 10:07:02 +0800 Subject: drm/gpu-sched: fix force APP kill hang(v4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit issue: there are VMC page fault occurred if force APP kill during 3dmark test, the cause is in entity_fini we manually signal all those jobs in entity's queue which confuse the sync/dep mechanism: 1)page fault occurred in sdma's clear job which operate on shadow buffer, and shadow buffer's Gart table is cleaned by ttm_bo_release since the fence in its reservation was fake signaled by entity_fini() under the case of SIGKILL received. 2)page fault occurred in gfx' job because during the lifetime of gfx job we manually fake signal all jobs from its entity in entity_fini(), thus the unmapping/clear PTE job depend on those result fence is satisfied and sdma start clearing the PTE and lead to GFX page fault. fix: 1)should at least wait all jobs already scheduled complete in entity_fini() if SIGKILL is the case. 2)if a fence signaled and try to clear some entity's dependency, should set this entity guilty to prevent its job really run since the dependency is fake signaled. v2: splitting drm_sched_entity_fini() into two functions: 1)The first one is does the waiting, removes the entity from the runqueue and returns an error when the process was killed. 2)The second one then goes over the entity, install it as completion signal for the remaining jobs and signals all jobs with an error code. v3: 1)Replace the fini1 and fini2 with better name 2)Call the first part before the VM teardown in amdgpu_driver_postclose_kms() and the second part after the VM teardown 3)Keep the original function drm_sched_entity_fini to refine the code. v4: 1)Rename entity->finished to entity->last_scheduled; 2)Rename drm_sched_entity_fini_job_cb() to drm_sched_entity_kill_jobs_cb(); 3)Pass NULL to drm_sched_entity_fini_job_cb() if -ENOENT; 4)Replace the type of entity->fini_status with "int"; 5)Remove the check about entity->finished. Signed-off-by: Monk Liu Signed-off-by: Emily Deng Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/drm/gpu_scheduler.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/drm') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index c053a32341bf..350a62c26b29 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -65,6 +65,8 @@ struct drm_sched_entity { struct dma_fence *dependency; struct dma_fence_cb cb; atomic_t *guilty; /* points to ctx's guilty */ + int fini_status; + struct dma_fence *last_scheduled; }; /** @@ -119,6 +121,7 @@ struct drm_sched_job { uint64_t id; atomic_t karma; enum drm_sched_priority s_priority; + struct drm_sched_entity *entity; }; static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job, @@ -186,6 +189,10 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, struct drm_sched_rq *rq, uint32_t jobs, atomic_t *guilty); +void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity); +void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity); void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity); void drm_sched_entity_push_job(struct drm_sched_job *sched_job, -- cgit v1.2.3 From 48ff108d9dc42bf92256484c50cdb3697f5ccb04 Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Thu, 9 Nov 2017 13:18:24 -0500 Subject: drm/amdgpu: add VEGAM ASIC type Signed-off-by: Leo Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/drm/amd_asic_type.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/drm') diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h index 6c731c52c071..695bde7eb055 100644 --- a/include/drm/amd_asic_type.h +++ b/include/drm/amd_asic_type.h @@ -44,6 +44,7 @@ enum amd_asic_type { CHIP_POLARIS10, CHIP_POLARIS11, CHIP_POLARIS12, + CHIP_VEGAM, CHIP_VEGA10, CHIP_VEGA12, CHIP_RAVEN, -- cgit v1.2.3 From 8344c53f57057b42a5da87e9557c40fcda18fb7a Mon Sep 17 00:00:00 2001 From: Nayan Deshmukh Date: Thu, 29 Mar 2018 22:36:32 +0530 Subject: drm/scheduler: remove unused parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this patch also effect the amdgpu and etnaviv drivers which use the function drm_sched_entity_init Signed-off-by: Nayan Deshmukh Suggested-by: Christian König Acked-by: Lucas Stach Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/drm/gpu_scheduler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/drm') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 350a62c26b29..52380067a43f 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -188,7 +188,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched); int drm_sched_entity_init(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, struct drm_sched_rq *rq, - uint32_t jobs, atomic_t *guilty); + atomic_t *guilty); void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity); void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, -- cgit v1.2.3