From a70cdb9eddcfd4ba20d69b84149b4a38648455ac Mon Sep 17 00:00:00 2001
From: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Date: Thu, 29 Mar 2018 22:36:33 +0530
Subject: drm/scheduler: move the tracepoints file from the include directory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move it with the scheduler code. This is mostly a straight forward
rename with no code change except for updating the TRACE_INCLUDE_PATH

Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/drm/gpu_scheduler_trace.h | 82 ---------------------------------------
 1 file changed, 82 deletions(-)
 delete mode 100644 include/drm/gpu_scheduler_trace.h

(limited to 'include/drm')

diff --git a/include/drm/gpu_scheduler_trace.h b/include/drm/gpu_scheduler_trace.h
deleted file mode 100644
index 0789e8d0a0e1..000000000000
--- a/include/drm/gpu_scheduler_trace.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _GPU_SCHED_TRACE_H_
-
-#include <linux/stringify.h>
-#include <linux/types.h>
-#include <linux/tracepoint.h>
-
-#include <drm/drmP.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM gpu_scheduler
-#define TRACE_INCLUDE_FILE gpu_scheduler_trace
-
-TRACE_EVENT(drm_sched_job,
-	    TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity),
-	    TP_ARGS(sched_job, entity),
-	    TP_STRUCT__entry(
-			     __field(struct drm_sched_entity *, entity)
-			     __field(struct dma_fence *, fence)
-			     __field(const char *, name)
-			     __field(uint64_t, id)
-			     __field(u32, job_count)
-			     __field(int, hw_job_count)
-			     ),
-
-	    TP_fast_assign(
-			   __entry->entity = entity;
-			   __entry->id = sched_job->id;
-			   __entry->fence = &sched_job->s_fence->finished;
-			   __entry->name = sched_job->sched->name;
-			   __entry->job_count = spsc_queue_count(&entity->job_queue);
-			   __entry->hw_job_count = atomic_read(
-				   &sched_job->sched->hw_rq_count);
-			   ),
-	    TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
-		      __entry->entity, __entry->id,
-		      __entry->fence, __entry->name,
-		      __entry->job_count, __entry->hw_job_count)
-);
-
-TRACE_EVENT(drm_sched_process_job,
-	    TP_PROTO(struct drm_sched_fence *fence),
-	    TP_ARGS(fence),
-	    TP_STRUCT__entry(
-		    __field(struct dma_fence *, fence)
-		    ),
-
-	    TP_fast_assign(
-		    __entry->fence = &fence->finished;
-		    ),
-	    TP_printk("fence=%p signaled", __entry->fence)
-);
-
-#endif
-
-/* This part must be outside protection */
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
-#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 1a61ee07211c543bf43e635fa703c162a78af0e1 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 4 Apr 2018 15:32:51 -0700
Subject: drm/sched: Extend the documentation.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These comments answer all the questions I had for myself when
implementing a driver using the GPU scheduler.

Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/drm/gpu_scheduler.h | 46 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 42 insertions(+), 4 deletions(-)

(limited to 'include/drm')

diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index dfd54fb94e10..c053a32341bf 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -43,10 +43,12 @@ enum drm_sched_priority {
 };
 
 /**
- * A scheduler entity is a wrapper around a job queue or a group
- * of other entities. Entities take turns emitting jobs from their
- * job queues to corresponding hardware ring based on scheduling
- * policy.
+ * drm_sched_entity - A wrapper around a job queue (typically attached
+ * to the DRM file_priv).
+ *
+ * Entities will emit jobs in order to their corresponding hardware
+ * ring, and the scheduler will alternate between entities based on
+ * scheduling policy.
 */
 struct drm_sched_entity {
 	struct list_head		list;
@@ -78,7 +80,18 @@ struct drm_sched_rq {
 
 struct drm_sched_fence {
 	struct dma_fence		scheduled;
+
+	/* This fence is what will be signaled by the scheduler when
+	 * the job is completed.
+	 *
+	 * When setting up an out fence for the job, you should use
+	 * this, since it's available immediately upon
+	 * drm_sched_job_init(), and the fence returned by the driver
+	 * from run_job() won't be created until the dependencies have
+	 * resolved.
+	 */
 	struct dma_fence		finished;
+
 	struct dma_fence_cb		cb;
 	struct dma_fence		*parent;
 	struct drm_gpu_scheduler	*sched;
@@ -88,6 +101,13 @@ struct drm_sched_fence {
 
 struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
 
+/**
+ * drm_sched_job - A job to be run by an entity.
+ *
+ * A job is created by the driver using drm_sched_job_init(), and
+ * should call drm_sched_entity_push_job() once it wants the scheduler
+ * to schedule the job.
+ */
 struct drm_sched_job {
 	struct spsc_node		queue_node;
 	struct drm_gpu_scheduler	*sched;
@@ -112,10 +132,28 @@ static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
  * these functions should be implemented in driver side
 */
 struct drm_sched_backend_ops {
+	/* Called when the scheduler is considering scheduling this
+	 * job next, to get another struct dma_fence for this job to
+	 * block on.  Once it returns NULL, run_job() may be called.
+	 */
 	struct dma_fence *(*dependency)(struct drm_sched_job *sched_job,
 					struct drm_sched_entity *s_entity);
+
+	/* Called to execute the job once all of the dependencies have
+	 * been resolved.  This may be called multiple times, if
+	 * timedout_job() has happened and drm_sched_job_recovery()
+	 * decides to try it again.
+	 */
 	struct dma_fence *(*run_job)(struct drm_sched_job *sched_job);
+
+	/* Called when a job has taken too long to execute, to trigger
+	 * GPU recovery.
+	 */
 	void (*timedout_job)(struct drm_sched_job *sched_job);
+
+	/* Called once the job's finished fence has been signaled and
+	 * it's time to clean it up.
+	 */
 	void (*free_job)(struct drm_sched_job *sched_job);
 };
 
-- 
cgit v1.2.3


From 8ee3a52e3f35e064a3bf82f21dc74ddaf9843648 Mon Sep 17 00:00:00 2001
From: Emily Deng <Emily.Deng@amd.com>
Date: Mon, 16 Apr 2018 10:07:02 +0800
Subject: drm/gpu-sched: fix force APP kill hang(v4)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

issue:
there are VMC page fault occurred if force APP kill during
3dmark test, the cause is in entity_fini we manually signal
all those jobs in entity's queue which confuse the sync/dep
mechanism:

1)page fault occurred in sdma's clear job which operate on
shadow buffer, and shadow buffer's Gart table is cleaned by
ttm_bo_release since the fence in its reservation was fake signaled
by entity_fini() under the case of SIGKILL received.

2)page fault occurred in gfx' job because during the lifetime
of gfx job we manually fake signal all jobs from its entity
in entity_fini(), thus the unmapping/clear PTE job depend on those
result fence is satisfied and sdma start clearing the PTE and lead
to GFX page fault.

fix:
1)should at least wait all jobs already scheduled complete in entity_fini()
if SIGKILL is the case.

2)if a fence signaled and try to clear some entity's dependency, should
set this entity guilty to prevent its job really run since the dependency
is fake signaled.

v2:
splitting drm_sched_entity_fini() into two functions:
1)The first one is does the waiting, removes the entity from the
runqueue and returns an error when the process was killed.
2)The second one then goes over the entity, install it as
completion signal for the remaining jobs and signals all jobs
with an error code.

v3:
1)Replace the fini1 and fini2 with better name
2)Call the first part before the VM teardown in
amdgpu_driver_postclose_kms() and the second part
after the VM teardown
3)Keep the original function drm_sched_entity_fini to
refine the code.

v4:
1)Rename entity->finished to entity->last_scheduled;
2)Rename drm_sched_entity_fini_job_cb() to
drm_sched_entity_kill_jobs_cb();
3)Pass NULL to drm_sched_entity_fini_job_cb() if -ENOENT;
4)Replace the type of entity->fini_status with "int";
5)Remove the check about entity->finished.

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Signed-off-by: Emily Deng <Emily.Deng@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/drm/gpu_scheduler.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/drm')

diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index c053a32341bf..350a62c26b29 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -65,6 +65,8 @@ struct drm_sched_entity {
 	struct dma_fence		*dependency;
 	struct dma_fence_cb		cb;
 	atomic_t			*guilty; /* points to ctx's guilty */
+	int            fini_status;
+	struct dma_fence    *last_scheduled;
 };
 
 /**
@@ -119,6 +121,7 @@ struct drm_sched_job {
 	uint64_t			id;
 	atomic_t			karma;
 	enum drm_sched_priority		s_priority;
+	struct drm_sched_entity  *entity;
 };
 
 static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
@@ -186,6 +189,10 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched,
 			  struct drm_sched_entity *entity,
 			  struct drm_sched_rq *rq,
 			  uint32_t jobs, atomic_t *guilty);
+void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
+			   struct drm_sched_entity *entity);
+void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched,
+			   struct drm_sched_entity *entity);
 void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
 			   struct drm_sched_entity *entity);
 void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
-- 
cgit v1.2.3


From 48ff108d9dc42bf92256484c50cdb3697f5ccb04 Mon Sep 17 00:00:00 2001
From: Leo Liu <leo.liu@amd.com>
Date: Thu, 9 Nov 2017 13:18:24 -0500
Subject: drm/amdgpu: add VEGAM ASIC type

Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/drm/amd_asic_type.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/drm')

diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h
index 6c731c52c071..695bde7eb055 100644
--- a/include/drm/amd_asic_type.h
+++ b/include/drm/amd_asic_type.h
@@ -44,6 +44,7 @@ enum amd_asic_type {
 	CHIP_POLARIS10,
 	CHIP_POLARIS11,
 	CHIP_POLARIS12,
+	CHIP_VEGAM,
 	CHIP_VEGA10,
 	CHIP_VEGA12,
 	CHIP_RAVEN,
-- 
cgit v1.2.3


From 8344c53f57057b42a5da87e9557c40fcda18fb7a Mon Sep 17 00:00:00 2001
From: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Date: Thu, 29 Mar 2018 22:36:32 +0530
Subject: drm/scheduler: remove unused parameter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

this patch also effect the amdgpu and etnaviv drivers which
use the function drm_sched_entity_init

Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Acked-by: Lucas Stach <l.stach@pengutronix.de>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/drm/gpu_scheduler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/drm')

diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 350a62c26b29..52380067a43f 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -188,7 +188,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched);
 int drm_sched_entity_init(struct drm_gpu_scheduler *sched,
 			  struct drm_sched_entity *entity,
 			  struct drm_sched_rq *rq,
-			  uint32_t jobs, atomic_t *guilty);
+			  atomic_t *guilty);
 void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
 			   struct drm_sched_entity *entity);
 void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched,
-- 
cgit v1.2.3