3 files changed, 52 insertions, 10 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 96a9e8bcd805..a7d87cfaabcb 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -552,6 +552,36 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
+static int __i915_sw_fence_call
+semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	struct i915_request *request =
+		container_of(fence, typeof(*request), semaphore);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		/*
+		 * We only check a small portion of our dependencies
+		 * and so cannot guarantee that there remains no
+		 * semaphore chain across all. Instead of opting
+		 * for the full NOSEMAPHORE boost, we go for the
+		 * smaller (but still preempting) boost of
+		 * NEWCLIENT. This will be enough to boost over
+		 * a busywaiting request (as that cannot be
+		 * NEWCLIENT) without accidentally boosting
+		 * a busywait over real work elsewhere.
+		 */
+		i915_schedule_bump_priority(request, I915_PRIORITY_NEWCLIENT);
+		break;
+
+	case FENCE_FREE:
+		i915_request_put(request);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
 static void ring_retire_requests(struct intel_ring *ring)
 {
 	struct i915_request *rq, *rn;
@@ -702,6 +732,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 
 	/* We bump the ref for the fence chain */
 	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
+	i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);
 
 	i915_sched_node_init(&rq->sched);
 
@@ -784,6 +815,12 @@ emit_semaphore_wait(struct i915_request *to,
 						     &from->fence, 0,
 						     I915_FENCE_GFP);
 
+	err = i915_sw_fence_await_dma_fence(&to->semaphore,
+					    &from->fence, 0,
+					    I915_FENCE_GFP);
+	if (err < 0)
+		return err;
+
 	/* We need to pin the signaler's HWSP until we are finished reading. */
 	err = i915_timeline_read_hwsp(from, to, &hwsp_offset);
 	if (err)
@@ -1114,6 +1151,7 @@ void i915_request_add(struct i915_request *request)
 	 * run at the earliest possible convenience.
 	 */
 	local_bh_disable();
+	i915_sw_fence_commit(&request->semaphore);
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
 	if (engine->schedule) {
 		struct i915_sched_attr attr = request->gem_context->sched;
@@ -1320,7 +1358,9 @@ long i915_request_wait(struct i915_request *rq,
 	if (flags & I915_WAIT_PRIORITY) {
 		if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
 			gen6_rps_boost(rq);
+		local_bh_disable(); /* suspend tasklets for reprioritisation */
 		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
+		local_bh_enable(); /* kick tasklets en masse */
 	}
 
 	wait.tsk = current;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 875be6f71412..a982664618c2 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -143,6 +143,7 @@ struct i915_request {
 		struct i915_sw_dma_fence_cb dmaq;
 	};
 	struct list_head execute_cb;
+	struct i915_sw_fence semaphore;
 
 	/*
 	 * A list of everyone we wait upon, and everyone who waits upon us.
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 77dbf7d74e12..39bc4f54e272 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -64,7 +64,7 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 {
 	bool ret = false;
 
-	spin_lock(&schedule_lock);
+	spin_lock_irq(&schedule_lock);
 
 	if (!node_signaled(signal)) {
 		INIT_LIST_HEAD(&dep->dfs_link);
@@ -81,7 +81,7 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 		ret = true;
 	}
 
-	spin_unlock(&schedule_lock);
+	spin_unlock_irq(&schedule_lock);
 
 	return ret;
 }
@@ -108,7 +108,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
 
 	GEM_BUG_ON(!list_empty(&node->link));
 
-	spin_lock(&schedule_lock);
+	spin_lock_irq(&schedule_lock);
 
 	/*
 	 * Everyone we depended upon (the fences we wait to be signaled)
@@ -135,7 +135,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
 			i915_dependency_free(dep);
 	}
 
-	spin_unlock(&schedule_lock);
+	spin_unlock_irq(&schedule_lock);
 }
 
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
@@ -356,7 +356,7 @@ static void __i915_schedule(struct i915_request *rq,
 
 	memset(&cache, 0, sizeof(cache));
 	engine = rq->engine;
-	spin_lock_irq(&engine->timeline.lock);
+	spin_lock(&engine->timeline.lock);
 
 	/* Fifo and depth-first replacement ensure our deps execute before us */
 	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
@@ -407,32 +407,33 @@ static void __i915_schedule(struct i915_request *rq,
 		tasklet_hi_schedule(&engine->execlists.tasklet);
 	}
 
-	spin_unlock_irq(&engine->timeline.lock);
+	spin_unlock(&engine->timeline.lock);
 }
 
 void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 {
-	spin_lock(&schedule_lock);
+	spin_lock_irq(&schedule_lock);
 	__i915_schedule(rq, attr);
-	spin_unlock(&schedule_lock);
+	spin_unlock_irq(&schedule_lock);
 }
 
 void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
 {
 	struct i915_sched_attr attr;
+	unsigned long flags;
 
 	GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
 
 	if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
 		return;
 
-	spin_lock_bh(&schedule_lock);
+	spin_lock_irqsave(&schedule_lock, flags);
 
 	attr = rq->sched.attr;
 	attr.priority |= bump;
 	__i915_schedule(rq, &attr);
 
-	spin_unlock_bh(&schedule_lock);
+	spin_unlock_irqrestore(&schedule_lock, flags);
 }
 
 void __i915_priolist_free(struct i915_priolist *p)