From 732f9ca4a737aea3983ad86007e88e7fffe8cd6a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Nov 2019 18:22:09 +0000 Subject: drm/i915/gt: Fixup config ifdeffery for pm_suspend_target_state pm_suspend_target_state is declared under CONFIG_PM_SLEEP but only defined under CONFIG_SUSPEND. Play safe and only use the symbol if it is both declared and defined. Reported-by: kbuild-all@lists.01.org Signed-off-by: Chris Wilson Fixes: a70a9e998e8e ("drm/i915: Defer rc6 shutdown to suspend_late") Cc: Andi Shyti Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20191120182209.3967833-1-chris@chris-wilson.co.uk Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 6187cdd06646..7917cc348375 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -272,7 +272,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt) static suspend_state_t pm_suspend_target(void) { -#if IS_ENABLED(CONFIG_PM_SLEEP) +#if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP) return pm_suspend_target_state; #else return PM_SUSPEND_TO_IDLE; -- cgit v1.2.3 From ee33baa83109612d9a31fc2c2a7b74967767b358 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Nov 2019 12:54:33 +0000 Subject: drm/i915: Mark up the calling context for intel_wakeref_put() Previously, we assumed we could use mutex_trylock() within an atomic context, falling back to a worker if contended. However, such trickery is illegal inside interrupt context, and so we need to always use a worker under such circumstances. As we normally are in process context, we can typically use a plain mutex, and only defer to a work when we know we are being called from an interrupt path. Fixes: 51fbd8de87dc ("drm/i915/pmu: Atomically acquire the gt_pm wakeref") References: a0855d24fc22d ("locking/mutex: Complain upon mutex API misuse in IRQ contexts") References: https://bugs.freedesktop.org/show_bug.cgi?id=111626 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191120125433.3767149-1-chris@chris-wilson.co.uk (cherry picked from commit 07779a76ee1f93f930cf697b22be73d16e14f50c) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 3 ++- drivers/gpu/drm/i915/gt/intel_engine_pm.h | 10 ++++++++++ drivers/gpu/drm/i915/gt/intel_gt_pm.c | 1 - drivers/gpu/drm/i915/gt/intel_gt_pm.h | 5 +++++ drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/selftest_engine_pm.c | 7 ++++--- 7 files changed, 23 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 3c0f490ff2c7..7269c87c1372 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -177,7 +177,8 @@ static int __engine_park(struct intel_wakeref *wf) engine->execlists.no_priolist = false; - intel_gt_pm_put(engine->gt); + /* While gt calls i915_vma_parked(), we have to break the lock cycle */ + intel_gt_pm_put_async(engine->gt); return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index 739c50fefcef..24e20344dc22 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -31,6 +31,16 @@ static inline void intel_engine_pm_put(struct intel_engine_cs *engine) intel_wakeref_put(&engine->wakeref); } +static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine) +{ + intel_wakeref_put_async(&engine->wakeref); +} + +static inline void intel_engine_pm_flush(struct intel_engine_cs *engine) +{ + intel_wakeref_unlock_wait(&engine->wakeref); +} + void intel_engine_init__pm(struct intel_engine_cs *engine); #endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 7917cc348375..a459a42ad5c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -105,7 +105,6 @@ static int __gt_park(struct intel_wakeref *wf) static const struct intel_wakeref_ops wf_ops = { .get = __gt_unpark, .put = __gt_park, - .flags = INTEL_WAKEREF_PUT_ASYNC, }; void intel_gt_pm_init_early(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index b3e17399be9b..990efc27a4e4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -32,6 +32,11 @@ static inline void intel_gt_pm_put(struct intel_gt *gt) intel_wakeref_put(>->wakeref); } +static inline void intel_gt_pm_put_async(struct intel_gt *gt) +{ + intel_wakeref_put_async(>->wakeref); +} + static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) { return intel_wakeref_wait_for_idle(>->wakeref); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 0ac3b26674ad..37fe72aa8e27 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1117,7 +1117,7 @@ __execlists_schedule_out(struct i915_request *rq, intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); - intel_gt_pm_put(engine->gt); + intel_gt_pm_put_async(engine->gt); /* * If this is part of a virtual engine, its next request may diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index f03e000051c1..c97423a76642 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1114,7 +1114,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) out: intel_engine_cancel_stop_cs(engine); reset_finish_engine(engine); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); return ret; } diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 20b9c83f43ad..cbf6b0735272 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -51,11 +51,12 @@ static int live_engine_pm(void *arg) pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n", engine->name, p->name); else - intel_engine_pm_put(engine); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); + intel_engine_pm_put_async(engine); p->critical_section_end(); - /* engine wakeref is sync (instant) */ + intel_engine_pm_flush(engine); + if (intel_engine_pm_is_awake(engine)) { pr_err("%s is still awake after flushing pm\n", engine->name); -- cgit v1.2.3 From ca1711d1991f968d1e88725a2e607e57ecd5c5f1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Nov 2019 16:55:13 +0000 Subject: drm/i915/gt: Close race between engine_park and intel_gt_retire_requests The general concept was that intel_timeline.active_count was locked by the intel_timeline.mutex. The exception was for power management, where the engine->kernel_context->timeline could be manipulated under the global wakeref.mutex. This was quite solid, as we always manipulated the timeline only while we held an engine wakeref. And then we started retiring requests outside of struct_mutex, only using the timelines.active_list and the timeline->mutex. There we started manipulating intel_timeline.active_count outside of an engine wakeref, and so introduced a race between __engine_park() and intel_gt_retire_requests(), a race that could result in the engine->kernel_context not being added to the active timelines and so losing requests, which caused us to keep the system permanently powered up [and unloadable]. The race would be easy to close if we could take the engine wakeref for the timeline before we retire -- except timelines are not bound to any engine and so we would need to keep all active engines awake. The alternative is to guard intel_timeline_enter/intel_timeline_exit for use outside of the timeline->mutex. Fixes: e5dadff4b093 ("drm/i915: Protect request retirement with timeline->mutex") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191120165514.3955081-1-chris@chris-wilson.co.uk (cherry picked from commit a6edbca74b305adc165e67065d7ee766006e6a48) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_gt_requests.c | 8 +++--- drivers/gpu/drm/i915/gt/intel_timeline.c | 34 ++++++++++++++++++++------ drivers/gpu/drm/i915/gt/intel_timeline_types.h | 2 +- 3 files changed, 32 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 353809ac2754..a0112ba08ca7 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -52,8 +52,8 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) } intel_timeline_get(tl); - GEM_BUG_ON(!tl->active_count); - tl->active_count++; /* pin the list element */ + GEM_BUG_ON(!atomic_read(&tl->active_count)); + atomic_inc(&tl->active_count); /* pin the list element */ spin_unlock_irqrestore(&timelines->lock, flags); if (timeout > 0) { @@ -74,7 +74,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) /* Resume iteration after dropping lock */ list_safe_reset_next(tl, tn, link); - if (!--tl->active_count) + if (atomic_dec_and_test(&tl->active_count)) list_del(&tl->link); else active_count += !!rcu_access_pointer(tl->last_request.fence); @@ -83,7 +83,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) /* Defer the final release to after the spinlock */ if (refcount_dec_and_test(&tl->kref.refcount)) { - GEM_BUG_ON(tl->active_count); + GEM_BUG_ON(atomic_read(&tl->active_count)); list_add(&tl->link, &free); } } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 14ad10acd548..839de6b9aa24 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -339,15 +339,33 @@ void intel_timeline_enter(struct intel_timeline *tl) struct intel_gt_timelines *timelines = &tl->gt->timelines; unsigned long flags; + /* + * Pretend we are serialised by the timeline->mutex. + * + * While generally true, there are a few exceptions to the rule + * for the engine->kernel_context being used to manage power + * transitions. As the engine_park may be called from under any + * timeline, it uses the power mutex as a global serialisation + * lock to prevent any other request entering its timeline. + * + * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. + * + * However, intel_gt_retire_request() does not know which engine + * it is retiring along and so cannot partake in the engine-pm + * barrier, and there we use the tl->active_count as a means to + * pin the timeline in the active_list while the locks are dropped. + * Ergo, as that is outside of the engine-pm barrier, we need to + * use atomic to manipulate tl->active_count. + */ lockdep_assert_held(&tl->mutex); - GEM_BUG_ON(!atomic_read(&tl->pin_count)); - if (tl->active_count++) + + if (atomic_add_unless(&tl->active_count, 1, 0)) return; - GEM_BUG_ON(!tl->active_count); /* overflow? */ spin_lock_irqsave(&timelines->lock, flags); - list_add(&tl->link, &timelines->active_list); + if (!atomic_fetch_inc(&tl->active_count)) + list_add_tail(&tl->link, &timelines->active_list); spin_unlock_irqrestore(&timelines->lock, flags); } @@ -356,14 +374,16 @@ void intel_timeline_exit(struct intel_timeline *tl) struct intel_gt_timelines *timelines = &tl->gt->timelines; unsigned long flags; + /* See intel_timeline_enter() */ lockdep_assert_held(&tl->mutex); - GEM_BUG_ON(!tl->active_count); - if (--tl->active_count) + GEM_BUG_ON(!atomic_read(&tl->active_count)); + if (atomic_add_unless(&tl->active_count, -1, 1)) return; spin_lock_irqsave(&timelines->lock, flags); - list_del(&tl->link); + if (atomic_dec_and_test(&tl->active_count)) + list_del(&tl->link); spin_unlock_irqrestore(&timelines->lock, flags); /* diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index 98d9ee166379..5244615ed1cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -42,7 +42,7 @@ struct intel_timeline { * from the intel_context caller plus internal atomicity. */ atomic_t pin_count; - unsigned int active_count; + atomic_t active_count; const u32 *hwsp_seqno; struct i915_vma *hwsp_ggtt; -- cgit v1.2.3 From bf201f5eda23eb57f7217d20ea3e18da8cbcf52b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Nov 2019 16:55:14 +0000 Subject: drm/i915/gt: Unlock engine-pm after queuing the kernel context switch In commit a79ca656b648 ("drm/i915: Push the wakeref->count deferral to the backend"), I erroneously concluded that we last modify the engine inside __i915_request_commit() meaning that we could enable concurrent submission for userspace as we enqueued this request. However, this falls into a trap with other users of the engine->kernel_context waking up and submitting their request before the idle-switch is queued, with the result that the kernel_context is executed out-of-sequence most likely upsetting the GPU and certainly ourselves when we try to retire the out-of-sequence requests. As such we need to hold onto the effective engine->kernel_context mutex lock (via the engine pm mutex proxy) until we have finish queuing the request to the engine. v2: Serialise against concurrent intel_gt_retire_requests() v3: Describe the hairy locking scheme with intel_gt_retire_requests() for future reference. v4: Combine timeline->lock and engine pm release; it's hairy. Fixes: a79ca656b648 ("drm/i915: Push the wakeref->count deferral to the backend") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191120165514.3955081-2-chris@chris-wilson.co.uk (cherry picked from commit 5cba288466e9b229feb68295675246e7522fb5eb) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 47 ++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 7269c87c1372..373a4b9f159c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -73,8 +73,25 @@ static inline void __timeline_mark_unlock(struct intel_context *ce, #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ +static void +__intel_timeline_enter_and_release_pm(struct intel_timeline *tl, + struct intel_engine_cs *engine) +{ + struct intel_gt_timelines *timelines = &engine->gt->timelines; + + spin_lock(&timelines->lock); + + if (!atomic_fetch_inc(&tl->active_count)) + list_add_tail(&tl->link, &timelines->active_list); + + __intel_wakeref_defer_park(&engine->wakeref); + + spin_unlock(&timelines->lock); +} + static bool switch_to_kernel_context(struct intel_engine_cs *engine) { + struct intel_context *ce = engine->kernel_context; struct i915_request *rq; unsigned long flags; bool result = true; @@ -98,16 +115,31 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * This should hold true as we can only park the engine after * retiring the last request, thus all rings should be empty and * all timelines idle. + * + * For unlocking, there are 2 other parties and the GPU who have a + * stake here. + * + * A new gpu user will be waiting on the engine-pm to start their + * engine_unpark. New waiters are predicated on engine->wakeref.count + * and so intel_wakeref_defer_park() acts like a mutex_unlock of the + * engine->wakeref. + * + * The other party is intel_gt_retire_requests(), which is walking the + * list of active timelines looking for completions. Meanwhile as soon + * as we call __i915_request_queue(), the GPU may complete our request. + * Ergo, if we put ourselves on the timelines.active_list + * (se intel_timeline_enter()) before we increment the + * engine->wakeref.count, we may see the request completion and retire + * it causing an undeflow of the engine->wakeref. */ - flags = __timeline_mark_lock(engine->kernel_context); + flags = __timeline_mark_lock(ce); + GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); - rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT); + rq = __i915_request_create(ce, GFP_NOWAIT); if (IS_ERR(rq)) /* Context switch failed, hope for the best! Maybe reset? */ goto out_unlock; - intel_timeline_enter(i915_request_timeline(rq)); - /* Check again on the next retirement. */ engine->wakeref_serial = engine->serial + 1; i915_request_add_active_barriers(rq); @@ -116,13 +148,14 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) rq->sched.attr.priority = I915_PRIORITY_BARRIER; __i915_request_commit(rq); - /* Release our exclusive hold on the engine */ - __intel_wakeref_defer_park(&engine->wakeref); __i915_request_queue(rq, NULL); + /* Expose ourselves to intel_gt_retire_requests() and new submission */ + __intel_timeline_enter_and_release_pm(ce->timeline, engine); + result = false; out_unlock: - __timeline_mark_unlock(engine->kernel_context, flags); + __timeline_mark_unlock(ce, flags); return result; } -- cgit v1.2.3 From 97f9af78f38d43cd058cfd40220647be67aca9f7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 Nov 2019 09:43:18 +0000 Subject: drm/i915/gt: Mark the execlists->active as the primary volatile access Since we want to do a lockless read of the current active request, and that request is written to by process_csb also without serialisation, we need to instruct gcc to take care in reading the pointer itself. Otherwise, we have observed execlists_active() to report 0x40. [ 2400.760381] igt/para-4098 1..s. 2376479300us : process_csb: rcs0 cs-irq head=3, tail=4 [ 2400.760826] igt/para-4098 1..s. 2376479303us : process_csb: rcs0 csb[4]: status=0x00000001:0x00000000 [ 2400.761271] igt/para-4098 1..s. 2376479306us : trace_ports: rcs0: promote { b9c59:2622, b9c55:2624 } [ 2400.761726] igt/para-4097 0d... 2376479311us : __i915_schedule: rcs0: -2147483648->3, inflight:0000000000000040, rq:ffff888208c1e940 which is impossible! The answer is that as we keep the existing execlists->active pointing into the array as we copy over that array, the unserialised read may see a partial pointer value. Fixes: df403069029d ("drm/i915/execlists: Lift process_csb() out of the irq-off spinlock") Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191125094318.1630806-1-chris@chris-wilson.co.uk (cherry picked from commit 331bf90591573dfe6c8e892239713ef9702f1396) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_engine.h | 4 +--- drivers/gpu/drm/i915/gt/intel_lrc.c | 27 +++++++++++++++++---------- 2 files changed, 18 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index bc3b72bfa9e3..01765a7ec18f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -100,9 +100,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists) static inline struct i915_request * execlists_active(const struct intel_engine_execlists *execlists) { - GEM_BUG_ON(execlists->active - execlists->inflight > - execlists_num_ports(execlists)); - return READ_ONCE(*execlists->active); + return *READ_ONCE(execlists->active); } static inline void diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 37fe72aa8e27..a692e6c9ea75 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1943,6 +1943,9 @@ cancel_port_requests(struct intel_engine_execlists * const execlists) execlists_schedule_out(rq); memset(execlists->pending, 0, sizeof(execlists->pending)); + /* Mark the end of active before we overwrite *active */ + WRITE_ONCE(execlists->active, execlists->pending); + for (port = execlists->active; (rq = *port); port++) execlists_schedule_out(rq); execlists->active = @@ -2099,23 +2102,27 @@ static void process_csb(struct intel_engine_cs *engine) else promote = gen8_csb_parse(execlists, buf + 2 * head); if (promote) { + struct i915_request * const *old = execlists->active; + + /* Point active to the new ELSP; prevent overwriting */ + WRITE_ONCE(execlists->active, execlists->pending); + set_timeslice(engine); + if (!inject_preempt_hang(execlists)) ring_set_paused(engine, 0); /* cancel old inflight, prepare for switch */ - trace_ports(execlists, "preempted", execlists->active); - while (*execlists->active) - execlists_schedule_out(*execlists->active++); + trace_ports(execlists, "preempted", old); + while (*old) + execlists_schedule_out(*old++); /* switch pending to inflight */ GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); - execlists->active = - memcpy(execlists->inflight, - execlists->pending, - execlists_num_ports(execlists) * - sizeof(*execlists->pending)); - - set_timeslice(engine); + WRITE_ONCE(execlists->active, + memcpy(execlists->inflight, + execlists->pending, + execlists_num_ports(execlists) * + sizeof(*execlists->pending))); WRITE_ONCE(execlists->pending[0], NULL); } else { -- cgit v1.2.3 From 4ec5cc78c1b06f8d30b5c682acccf17fb5191cf2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 Nov 2019 11:25:20 +0000 Subject: drm/i915/execlists: Fixup cancel_port_requests() I rushed a last minute correction to cancel_port_requests() to prevent the snooping of *execlists->active as the inflight array was being updated, without noticing we iterated the inflight array starting from active! Oops. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112387 Fixes: 97f9af78f38d ("drm/i915/gt: Mark the execlists->active as the primary volatile access") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20191125112520.1760492-1-chris@chris-wilson.co.uk (cherry picked from commit da0ef77e1e0ccff703efee82406c629d5c4f4bbb) [Joonas: Fixed Fixes: tag to match drm-intel-next-fixes] Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_lrc.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a692e6c9ea75..217b32ae0bcc 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1937,19 +1937,17 @@ skip_submit: static void cancel_port_requests(struct intel_engine_execlists * const execlists) { - struct i915_request * const *port, *rq; + struct i915_request * const *port; - for (port = execlists->pending; (rq = *port); port++) - execlists_schedule_out(rq); + for (port = execlists->pending; *port; port++) + execlists_schedule_out(*port); memset(execlists->pending, 0, sizeof(execlists->pending)); /* Mark the end of active before we overwrite *active */ - WRITE_ONCE(execlists->active, execlists->pending); - - for (port = execlists->active; (rq = *port); port++) - execlists_schedule_out(rq); - execlists->active = - memset(execlists->inflight, 0, sizeof(execlists->inflight)); + for (port = xchg(&execlists->active, execlists->pending); *port; port++) + execlists_schedule_out(*port); + WRITE_ONCE(execlists->active, + memset(execlists->inflight, 0, sizeof(execlists->inflight))); } static inline void -- cgit v1.2.3 From a09c2860ae4fcf4d7e25202661f3eb868547cddb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 Nov 2019 10:58:57 +0000 Subject: drm/i915/gt: Adapt engine_park synchronisation rules for engine_retire In the next patch, we will introduce a new asynchronous retirement worker, fed by execlists CS events. Here we may queue a retirement as soon as a request is submitted to HW (and completes instantly), and we also want to process that retirement as early as possible and cannot afford to postpone (as there may not be another opportunity to retire it for a few seconds). To allow the new async retirer to run in parallel with our submission, pull the __i915_request_queue (that passes the request to HW) inside the timelines spinlock so that the retirement cannot release the timeline before we have completed the submission. v2: Actually to play nicely with engine_retire, we have to raise the timeline.active_lock before releasing the HW. intel_gt_retire_requsts() is still serialised by the outer lock so they cannot see this intermediate state, and engine_retire is serialised by HW submission. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191125105858.1718307-2-chris@chris-wilson.co.uk (cherry picked from commit 88a4655e75ac8f55eea5e3f38b176cba9cf653b5) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 373a4b9f159c..0e1ad4a4bd97 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -74,16 +74,33 @@ static inline void __timeline_mark_unlock(struct intel_context *ce, #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ static void -__intel_timeline_enter_and_release_pm(struct intel_timeline *tl, - struct intel_engine_cs *engine) +__queue_and_release_pm(struct i915_request *rq, + struct intel_timeline *tl, + struct intel_engine_cs *engine) { struct intel_gt_timelines *timelines = &engine->gt->timelines; + GEM_TRACE("%s\n", engine->name); + + /* + * We have to serialise all potential retirement paths with our + * submission, as we don't want to underflow either the + * engine->wakeref.counter or our timeline->active_count. + * + * Equally, we cannot allow a new submission to start until + * after we finish queueing, nor could we allow that submitter + * to retire us before we are ready! + */ spin_lock(&timelines->lock); + /* Let intel_gt_retire_requests() retire us (acquired under lock) */ if (!atomic_fetch_inc(&tl->active_count)) list_add_tail(&tl->link, &timelines->active_list); + /* Hand the request over to HW and so engine_retire() */ + __i915_request_queue(rq, NULL); + + /* Let new submissions commence (and maybe retire this timeline) */ __intel_wakeref_defer_park(&engine->wakeref); spin_unlock(&timelines->lock); @@ -148,10 +165,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) rq->sched.attr.priority = I915_PRIORITY_BARRIER; __i915_request_commit(rq); - __i915_request_queue(rq, NULL); - - /* Expose ourselves to intel_gt_retire_requests() and new submission */ - __intel_timeline_enter_and_release_pm(ce->timeline, engine); + /* Expose ourselves to the world */ + __queue_and_release_pm(rq, ce->timeline, engine); result = false; out_unlock: -- cgit v1.2.3 From 311770173fac27845a3a83e2c16100a54d308f72 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 Nov 2019 10:58:58 +0000 Subject: drm/i915/gt: Schedule request retirement when timeline idles The major drawback of commit 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA") is that it disables RC6 while Skylake (and friends) is active, and we do not consider the GPU idle until all outstanding requests have been retired and the engine switched over to the kernel context. If userspace is idle, this task falls onto our background idle worker, which only runs roughly once a second, meaning that userspace has to have been idle for a couple of seconds before we enable RC6 again. Naturally, this causes us to consume considerably more energy than before as powersaving is effectively disabled while a display server (here's looking at you Xorg) is running. As execlists will get a completion event as each context is completed, we can use this interrupt to queue a retire worker bound to this engine to cleanup idle timelines. We will then immediately notice the idle engine (without userspace intervention or the aid of the background retire worker) and start parking the GPU. Thus during light workloads, we will do much more work to idle the GPU faster... Hopefully with commensurate power saving! v2: Watch context completions and only look at those local to the engine when retiring to reduce the amount of excess work we perform. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112315 References: 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA") References: 2248a28384fe ("drm/i915/gen8+: Add RC6 CTX corruption WA") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191125105858.1718307-3-chris@chris-wilson.co.uk (cherry picked from commit 4f88f8747fa43c97c3b3712d8d87295ea757cc51) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 8 +-- drivers/gpu/drm/i915/gt/intel_engine_types.h | 8 +++ drivers/gpu/drm/i915/gt/intel_gt_requests.c | 75 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_requests.h | 7 +++ drivers/gpu/drm/i915/gt/intel_lrc.c | 9 ++++ drivers/gpu/drm/i915/gt/intel_timeline.c | 1 + drivers/gpu/drm/i915/gt/intel_timeline_types.h | 3 ++ 7 files changed, 108 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 5ca3ec911e50..813bd3a610d2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -28,13 +28,13 @@ #include "i915_drv.h" -#include "gt/intel_gt.h" - +#include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" #include "intel_engine_pool.h" #include "intel_engine_user.h" -#include "intel_context.h" +#include "intel_gt.h" +#include "intel_gt_requests.h" #include "intel_lrc.h" #include "intel_reset.h" #include "intel_ring.h" @@ -616,6 +616,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_execlists(engine); intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); + intel_engine_init_retire(engine); intel_engine_pool_init(&engine->pool); @@ -838,6 +839,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) cleanup_status_page(engine); + intel_engine_fini_retire(engine); intel_engine_pool_fini(&engine->pool); intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 758f0e8ec672..17f1f1441efc 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -451,6 +451,14 @@ struct intel_engine_cs { struct intel_engine_execlists execlists; + /* + * Keep track of completed timelines on this engine for early + * retirement with the goal of quickly enabling powersaving as + * soon as the engine is idle. + */ + struct intel_timeline *retire; + struct work_struct retire_work; + /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index a0112ba08ca7..3dc13ecf41bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -4,6 +4,8 @@ * Copyright © 2019 Intel Corporation */ +#include + #include "i915_drv.h" /* for_each_engine() */ #include "i915_request.h" #include "intel_gt.h" @@ -29,6 +31,79 @@ static void flush_submission(struct intel_gt *gt) intel_engine_flush_submission(engine); } +static void engine_retire(struct work_struct *work) +{ + struct intel_engine_cs *engine = + container_of(work, typeof(*engine), retire_work); + struct intel_timeline *tl = xchg(&engine->retire, NULL); + + do { + struct intel_timeline *next = xchg(&tl->retire, NULL); + + /* + * Our goal here is to retire _idle_ timelines as soon as + * possible (as they are idle, we do not expect userspace + * to be cleaning up anytime soon). + * + * If the timeline is currently locked, either it is being + * retired elsewhere or about to be! + */ + if (mutex_trylock(&tl->mutex)) { + retire_requests(tl); + mutex_unlock(&tl->mutex); + } + intel_timeline_put(tl); + + GEM_BUG_ON(!next); + tl = ptr_mask_bits(next, 1); + } while (tl); +} + +static bool add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl) +{ + struct intel_timeline *first; + + /* + * We open-code a llist here to include the additional tag [BIT(0)] + * so that we know when the timeline is already on a + * retirement queue: either this engine or another. + * + * However, we rely on that a timeline can only be active on a single + * engine at any one time and that add_retire() is called before the + * engine releases the timeline and transferred to another to retire. + */ + + if (READ_ONCE(tl->retire)) /* already queued */ + return false; + + intel_timeline_get(tl); + first = READ_ONCE(engine->retire); + do + tl->retire = ptr_pack_bits(first, 1, 1); + while (!try_cmpxchg(&engine->retire, &first, tl)); + + return !first; +} + +void intel_engine_add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl) +{ + if (add_retire(engine, tl)) + schedule_work(&engine->retire_work); +} + +void intel_engine_init_retire(struct intel_engine_cs *engine) +{ + INIT_WORK(&engine->retire_work, engine_retire); +} + +void intel_engine_fini_retire(struct intel_engine_cs *engine) +{ + flush_work(&engine->retire_work); + GEM_BUG_ON(engine->retire); +} + long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) { struct intel_gt_timelines *timelines = >->timelines; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h index bd31cbce47e0..d626fb115386 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -7,7 +7,9 @@ #ifndef INTEL_GT_REQUESTS_H #define INTEL_GT_REQUESTS_H +struct intel_engine_cs; struct intel_gt; +struct intel_timeline; long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); static inline void intel_gt_retire_requests(struct intel_gt *gt) @@ -15,6 +17,11 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt) intel_gt_retire_requests_timeout(gt, 0); } +void intel_engine_init_retire(struct intel_engine_cs *engine); +void intel_engine_add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl); +void intel_engine_fini_retire(struct intel_engine_cs *engine); + int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); void intel_gt_init_requests(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 217b32ae0bcc..9fdefbdc3546 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -142,6 +142,7 @@ #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_reset.h" @@ -1115,6 +1116,14 @@ __execlists_schedule_out(struct i915_request *rq, * refrain from doing non-trivial work here. */ + /* + * If we have just completed this context, the engine may now be + * idle and we want to re-enter powersaving. + */ + if (list_is_last(&rq->link, &ce->timeline->requests) && + i915_request_completed(rq)) + intel_engine_add_retire(engine, ce->timeline); + intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); intel_gt_pm_put_async(engine->gt); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 839de6b9aa24..649798c184fb 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -282,6 +282,7 @@ void intel_timeline_fini(struct intel_timeline *timeline) { GEM_BUG_ON(atomic_read(&timeline->pin_count)); GEM_BUG_ON(!list_empty(&timeline->requests)); + GEM_BUG_ON(timeline->retire); if (timeline->hwsp_cacheline) cacheline_free(timeline->hwsp_cacheline); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index 5244615ed1cb..aaf15cbe1ce1 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -66,6 +66,9 @@ struct intel_timeline { */ struct i915_active_fence last_request; + /** A chain of completed timelines ready for early retirement. */ + struct intel_timeline *retire; + /** * We track the most recent seqno that we wait on in every context so * that we only have to emit a new await and dependency on a more -- cgit v1.2.3 From 0725d9a31869e6c80630e99da366ede2848295cc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 18 Nov 2019 23:02:40 +0000 Subject: drm/i915/gt: Make intel_ring_unpin() safe for concurrent pint In order to avoid some nasty mutex inversions, commit 09c5ab384f6f ("drm/i915: Keep rings pinned while the context is active") allowed the intel_ring unpinning to be run concurrently with the next context pinning it. Thus each step in intel_ring_unpin() needed to be atomic and ordered in a nice onion with intel_ring_pin() so that the lifetimes overlapped and were always safe. Sadly, a few steps in intel_ring_unpin() were overlooked, such as closing the read/write pointers of the ring and discarding the intel_ring.vaddr, as these steps were not serialised with intel_ring_pin() and so could leave the ring in disarray. Fixes: 09c5ab384f6f ("drm/i915: Keep rings pinned while the context is active") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191118230254.2615942-6-chris@chris-wilson.co.uk (cherry picked from commit a266bf42006004306dd48a9082c35dfbff153307) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_ring.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index ece20504d240..374b28f13ca0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -57,9 +57,10 @@ int intel_ring_pin(struct intel_ring *ring) i915_vma_make_unshrinkable(vma); - GEM_BUG_ON(ring->vaddr); - ring->vaddr = addr; + /* Discard any unused bytes beyond that submitted to hw. */ + intel_ring_reset(ring, ring->emit); + ring->vaddr = addr; return 0; err_ring: @@ -85,20 +86,14 @@ void intel_ring_unpin(struct intel_ring *ring) if (!atomic_dec_and_test(&ring->pin_count)) return; - /* Discard any unused bytes beyond that submitted to hw. */ - intel_ring_reset(ring, ring->emit); - i915_vma_unset_ggtt_write(vma); if (i915_vma_is_map_and_fenceable(vma)) i915_vma_unpin_iomap(vma); else i915_gem_object_unpin_map(vma->obj); - GEM_BUG_ON(!ring->vaddr); - ring->vaddr = NULL; - - i915_vma_unpin(vma); i915_vma_make_purgeable(vma); + i915_vma_unpin(vma); } static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) -- cgit v1.2.3 From 3cc44feb9861d2f5267af9b962ae92c5ea1b48fd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Nov 2019 06:55:21 +0000 Subject: drm/i915: Reduce nested prepare_remote_context() to a trylock On context retiring, we may invoke the kernel_context to unpin this context. Elsewhere, we may use the kernel_context to modify this context. This currently leads to an AB-BA lock inversion, so we need to back-off from the contended lock, and repeat. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111732 Signed-off-by: Chris Wilson Fixes: a9877da2d629 ("drm/i915/oa: Reconfigure contexts on the fly") Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191126065521.2331017-1-chris@chris-wilson.co.uk (cherry picked from commit 58b4c1a07ada7fb91ea757bdb9bd47df02207357) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_context.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index ee9d2bcd2c13..ef7bc41ffffa 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -310,10 +310,23 @@ int intel_context_prepare_remote_request(struct intel_context *ce, GEM_BUG_ON(rq->hw_context == ce); if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ - err = mutex_lock_interruptible_nested(&tl->mutex, - SINGLE_DEPTH_NESTING); - if (err) - return err; + /* + * Ideally, we just want to insert our foreign fence as + * a barrier into the remove context, such that this operation + * occurs after all current operations in that context, and + * all future operations must occur after this. + * + * Currently, the timeline->last_request tracking is guarded + * by its mutex and so we must obtain that to atomically + * insert our barrier. However, since we already hold our + * timeline->mutex, we must be careful against potential + * inversion if we are the kernel_context as the remote context + * will itself poke at the kernel_context when it needs to + * unpin. Ergo, if already locked, we drop both locks and + * try again (through the magic of userspace repeating EAGAIN). + */ + if (!mutex_trylock(&tl->mutex)) + return -EAGAIN; /* Queue this switch after current activity by this context. */ err = i915_active_fence_set(&tl->last_request, rq); -- cgit v1.2.3