diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 539 |
1 files changed, 358 insertions, 181 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e3a5f673ff67..15434cad5430 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -139,6 +139,7 @@ #include "i915_gem_render_state.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" +#include "intel_workarounds.h" #define RING_EXECLIST_QFULL (1 << 0x2) #define RING_EXECLIST1_VALID (1 << 0x3) @@ -176,14 +177,16 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) static inline int rq_prio(const struct i915_request *rq) { - return rq->priotree.priority; + return rq->sched.attr.priority; } static inline bool need_preempt(const struct intel_engine_cs *engine, const struct i915_request *last, int prio) { - return engine->i915->preempt_context && prio > max(rq_prio(last), 0); + return (intel_engine_has_preemption(engine) && + __execlists_need_preempt(prio, rq_prio(last)) && + !i915_request_completed(last)); } /** @@ -221,7 +224,7 @@ static void intel_lr_context_descriptor_update(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); u64 desc; BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); @@ -255,9 +258,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, } static struct i915_priolist * -lookup_priolist(struct intel_engine_cs *engine, - struct i915_priotree *pt, - int prio) +lookup_priolist(struct intel_engine_cs *engine, int prio) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_priolist *p; @@ -328,10 +329,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) struct i915_priolist *uninitialized_var(p); int last_prio = I915_PRIORITY_INVALID; - lockdep_assert_held(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); list_for_each_entry_safe_reverse(rq, rn, - &engine->timeline->requests, + &engine->timeline.requests, link) { if (i915_request_completed(rq)) return; @@ -342,10 +343,11 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != last_prio) { last_prio = rq_prio(rq); - p = lookup_priolist(engine, &rq->priotree, last_prio); + p = lookup_priolist(engine, last_prio); } - list_add(&rq->priotree.link, &p->requests); + GEM_BUG_ON(p->priority != rq_prio(rq)); + list_add(&rq->sched.link, &p->requests); } } @@ -354,10 +356,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) { struct intel_engine_cs *engine = container_of(execlists, typeof(*engine), execlists); + unsigned long flags; + + spin_lock_irqsave(&engine->timeline.lock, flags); - spin_lock_irq(&engine->timeline->lock); __unwind_incomplete_requests(engine); - spin_unlock_irq(&engine->timeline->lock); + + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static inline void @@ -374,6 +379,19 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status) status, rq); } +inline void +execlists_user_begin(struct intel_engine_execlists *execlists, + const struct execlist_port *port) +{ + execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER); +} + +inline void +execlists_user_end(struct intel_engine_execlists *execlists) +{ + execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); +} + static inline void execlists_context_schedule_in(struct i915_request *rq) { @@ -382,10 +400,11 @@ execlists_context_schedule_in(struct i915_request *rq) } static inline void -execlists_context_schedule_out(struct i915_request *rq) +execlists_context_schedule_out(struct i915_request *rq, unsigned long status) { intel_engine_context_out(rq->engine); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + execlists_context_status_change(rq, status); + trace_i915_request_out(rq); } static void @@ -399,7 +418,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) static u64 execlists_update_context(struct i915_request *rq) { - struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; + struct intel_context *ce = to_intel_context(rq->ctx, rq->engine); struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; @@ -454,10 +473,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n", engine->name, n, port[n].context_id, count, rq->global_seqno, + rq->fence.context, rq->fence.seqno, + intel_engine_get_seqno(engine), rq_prio(rq)); } else { GEM_BUG_ON(!n); @@ -506,7 +527,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) { struct intel_engine_execlists *execlists = &engine->execlists; struct intel_context *ce = - &engine->i915->preempt_context->engine[engine->id]; + to_intel_context(engine->i915->preempt_context, engine); unsigned int n; GEM_BUG_ON(execlists->preempt_complete_status != @@ -535,7 +556,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); } -static void execlists_dequeue(struct intel_engine_cs *engine) +static bool __execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; @@ -545,6 +566,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct rb_node *rb; bool submit = false; + lockdep_assert_held(&engine->timeline.lock); + /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is * static for a context, and unique to each, so we only execute @@ -566,7 +589,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - spin_lock_irq(&engine->timeline->lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); @@ -581,7 +603,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_USER)); GEM_BUG_ON(!port_count(&port[0])); if (port_count(&port[0]) > 1) - goto unlock; + return false; /* * If we write to ELSP a second time before the HW has had @@ -591,11 +613,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * the HW to indicate that it has had a chance to respond. */ if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) - goto unlock; + return false; if (need_preempt(engine, last, execlists->queue_priority)) { inject_preempt_context(engine); - goto unlock; + return false; } /* @@ -620,7 +642,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * priorities of the ports haven't been switch. */ if (port_count(&port[1])) - goto unlock; + return false; /* * WaIdleLiteRestore:bdw,skl @@ -637,7 +659,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { /* * Can we combine this request with the current port? * It has to be the same context/ringbuffer and not @@ -657,7 +679,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ if (port == last_port) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -671,7 +693,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (ctx_single_port_submission(last->ctx) || ctx_single_port_submission(rq->ctx)) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -684,7 +706,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(port_isset(port)); } - INIT_LIST_HEAD(&rq->priotree.link); + INIT_LIST_HEAD(&rq->sched.link); __i915_request_submit(rq); trace_i915_request_in(rq, port_index(port, execlists)); last = rq; @@ -697,8 +719,27 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); } + done: - execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; + /* + * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. + * + * We choose queue_priority such that if we add a request of greater + * priority than this, we kick the submission tasklet to decide on + * the right order of submitting the requests to hardware. We must + * also be prepared to reorder requests as they are in-flight on the + * HW. We derive the queue_priority then as the first "hole" in + * the HW submission ports and if there are no available slots, + * the priority of the lowest executing request, i.e. last. + * + * When we do receive a higher priority request ready to run from the + * user, see queue_request(), the queue_priority is bumped to that + * request triggering preemption on the next dequeue (or subsequent + * interrupt for secondary ports). + */ + execlists->queue_priority = + port != execlists->port ? rq_prio(last) : INT_MIN; + execlists->first = rb; if (submit) port_assign(port, last); @@ -706,13 +747,25 @@ done: /* We must always keep the beast fed if we have work piled up */ GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); -unlock: - spin_unlock_irq(&engine->timeline->lock); + /* Re-evaluate the executing context setup after each preemptive kick */ + if (last) + execlists_user_begin(execlists, execlists->port); - if (submit) { - execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); + return submit; +} + +static void execlists_dequeue(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + unsigned long flags; + bool submit; + + spin_lock_irqsave(&engine->timeline.lock, flags); + submit = __execlists_dequeue(engine); + spin_unlock_irqrestore(&engine->timeline.lock, flags); + + if (submit) execlists_submit_ports(engine); - } GEM_BUG_ON(port_isset(execlists->port) && !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); @@ -727,13 +780,18 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) while (num_ports-- && port_isset(port)) { struct i915_request *rq = port_request(port); - GEM_BUG_ON(!execlists->active); - intel_engine_context_out(rq->engine); + GEM_TRACE("%s:port%u global=%d (fence %llx:%d), (current %d)\n", + rq->engine->name, + (unsigned int)(port - execlists->port), + rq->global_seqno, + rq->fence.context, rq->fence.seqno, + intel_engine_get_seqno(rq->engine)); - execlists_context_status_change(rq, - i915_request_completed(rq) ? - INTEL_CONTEXT_SCHEDULE_OUT : - INTEL_CONTEXT_SCHEDULE_PREEMPTED); + GEM_BUG_ON(!execlists->active); + execlists_context_schedule_out(rq, + i915_request_completed(rq) ? + INTEL_CONTEXT_SCHEDULE_OUT : + INTEL_CONTEXT_SCHEDULE_PREEMPTED); i915_request_put(rq); @@ -741,7 +799,82 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) port++; } - execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); + execlists_user_end(execlists); +} + +static void clear_gtiir(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int i; + + /* + * Clear any pending interrupt state. + * + * We do it twice out of paranoia that some of the IIR are + * double buffered, and so if we only reset it once there may + * still be an interrupt pending. + */ + if (INTEL_GEN(dev_priv) >= 11) { + static const struct { + u8 bank; + u8 bit; + } gen11_gtiir[] = { + [RCS] = {0, GEN11_RCS0}, + [BCS] = {0, GEN11_BCS}, + [_VCS(0)] = {1, GEN11_VCS(0)}, + [_VCS(1)] = {1, GEN11_VCS(1)}, + [_VCS(2)] = {1, GEN11_VCS(2)}, + [_VCS(3)] = {1, GEN11_VCS(3)}, + [_VECS(0)] = {1, GEN11_VECS(0)}, + [_VECS(1)] = {1, GEN11_VECS(1)}, + }; + unsigned long irqflags; + + GEM_BUG_ON(engine->id >= ARRAY_SIZE(gen11_gtiir)); + + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + for (i = 0; i < 2; i++) { + gen11_reset_one_iir(dev_priv, + gen11_gtiir[engine->id].bank, + gen11_gtiir[engine->id].bit); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); + } else { + static const u8 gtiir[] = { + [RCS] = 0, + [BCS] = 0, + [VCS] = 1, + [VCS2] = 1, + [VECS] = 3, + }; + + GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); + + for (i = 0; i < 2; i++) { + I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), + engine->irq_keep_mask); + POSTING_READ(GEN8_GT_IIR(gtiir[engine->id])); + } + GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) & + engine->irq_keep_mask); + } +} + +static void reset_irq(struct intel_engine_cs *engine) +{ + /* Mark all CS interrupts as complete */ + smp_store_mb(engine->execlists.active, 0); + synchronize_hardirq(engine->i915->drm.irq); + + clear_gtiir(engine); + + /* + * The port is checked prior to scheduling a tasklet, but + * just in case we have suspended the tasklet to do the + * wedging make sure that when it wakes, it decides there + * is no work to do by clearing the irq_posted bit. + */ + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); } static void execlists_cancel_requests(struct intel_engine_cs *engine) @@ -751,7 +884,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) struct rb_node *rb; unsigned long flags; - GEM_TRACE("%s\n", engine->name); + GEM_TRACE("%s current %d\n", + engine->name, intel_engine_get_seqno(engine)); /* * Before we call engine->cancel_requests(), we should have exclusive @@ -771,11 +905,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Cancel the requests on the HW and clear the ELSP tracker. */ execlists_cancel_port_requests(execlists); + reset_irq(engine); - spin_lock(&engine->timeline->lock); + spin_lock(&engine->timeline.lock); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->timeline->requests, link) { + list_for_each_entry(rq, &engine->timeline.requests, link) { GEM_BUG_ON(!rq->global_seqno); if (!i915_request_completed(rq)) dma_fence_set_error(&rq->fence, -EIO); @@ -786,8 +921,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) while (rb) { struct i915_priolist *p = to_priolist(rb); - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { - INIT_LIST_HEAD(&rq->priotree.link); + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { + INIT_LIST_HEAD(&rq->sched.link); dma_fence_set_error(&rq->fence, -EIO); __i915_request_submit(rq); @@ -807,18 +942,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) execlists->first = NULL; GEM_BUG_ON(port_isset(execlists->port)); - spin_unlock(&engine->timeline->lock); - - /* - * The port is checked prior to scheduling a tasklet, but - * just in case we have suspended the tasklet to do the - * wedging make sure that when it wakes, it decides there - * is no work to do by clearing the irq_posted bit. - */ - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - - /* Mark all CS interrupts as complete */ - execlists->active = 0; + spin_unlock(&engine->timeline.lock); local_irq_restore(flags); } @@ -831,7 +955,7 @@ static void execlists_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port * const port = execlists->port; + struct execlist_port *port = execlists->port; struct drm_i915_private *dev_priv = engine->i915; bool fw = false; @@ -884,6 +1008,7 @@ static void execlists_submission_tasklet(unsigned long data) head = execlists->csb_head; tail = READ_ONCE(buf[write_idx]); + rmb(); /* Hopefully paired with a wmb() in HW */ } GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", engine->name, @@ -958,10 +1083,13 @@ static void execlists_submission_tasklet(unsigned long data) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n", engine->name, port->context_id, count, rq ? rq->global_seqno : 0, + rq ? rq->fence.context : 0, + rq ? rq->fence.seqno : 0, + intel_engine_get_seqno(engine), rq ? rq_prio(rq) : 0); /* Check the context/desc id for this event matches */ @@ -969,28 +1097,43 @@ static void execlists_submission_tasklet(unsigned long data) GEM_BUG_ON(count == 0); if (--count == 0) { + /* + * On the final event corresponding to the + * submission of this context, we expect either + * an element-switch event or a completion + * event (and on completion, the active-idle + * marker). No more preemptions, lite-restore + * or otherwise. + */ GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); GEM_BUG_ON(port_isset(&port[1]) && !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); + GEM_BUG_ON(!port_isset(&port[1]) && + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); + + /* + * We rely on the hardware being strongly + * ordered, that the breadcrumb write is + * coherent (visible from the CPU) before the + * user interrupt and CSB is processed. + */ GEM_BUG_ON(!i915_request_completed(rq)); - execlists_context_schedule_out(rq); - trace_i915_request_out(rq); + + execlists_context_schedule_out(rq, + INTEL_CONTEXT_SCHEDULE_OUT); i915_request_put(rq); GEM_TRACE("%s completed ctx=%d\n", engine->name, port->context_id); - execlists_port_complete(execlists, port); + port = execlists_port_complete(execlists, port); + if (port_isset(port)) + execlists_user_begin(execlists, port); + else + execlists_user_end(execlists); } else { port_set(port, port_pack(rq, count)); } - - /* After the final element, the hw should be idle */ - GEM_BUG_ON(port_count(port) == 0 && - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); - if (port_count(port) == 0) - execlists_clear_active(execlists, - EXECLISTS_ACTIVE_USER); } if (head != execlists->csb_head) { @@ -1013,18 +1156,23 @@ static void execlists_submission_tasklet(unsigned long data) } static void queue_request(struct intel_engine_cs *engine, - struct i915_priotree *pt, + struct i915_sched_node *node, int prio) { - list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests); + list_add_tail(&node->link, + &lookup_priolist(engine, prio)->requests); +} + +static void __submit_queue(struct intel_engine_cs *engine, int prio) +{ + engine->execlists.queue_priority = prio; + tasklet_hi_schedule(&engine->execlists.tasklet); } static void submit_queue(struct intel_engine_cs *engine, int prio) { - if (prio > engine->execlists.queue_priority) { - engine->execlists.queue_priority = prio; - tasklet_hi_schedule(&engine->execlists.tasklet); - } + if (prio > engine->execlists.queue_priority) + __submit_queue(engine, prio); } static void execlists_submit_request(struct i915_request *request) @@ -1033,42 +1181,45 @@ static void execlists_submit_request(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); - queue_request(engine, &request->priotree, rq_prio(request)); + queue_request(engine, &request->sched, rq_prio(request)); submit_queue(engine, rq_prio(request)); GEM_BUG_ON(!engine->execlists.first); - GEM_BUG_ON(list_empty(&request->priotree.link)); + GEM_BUG_ON(list_empty(&request->sched.link)); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } -static struct i915_request *pt_to_request(struct i915_priotree *pt) +static struct i915_request *sched_to_request(struct i915_sched_node *node) { - return container_of(pt, struct i915_request, priotree); + return container_of(node, struct i915_request, sched); } static struct intel_engine_cs * -pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) +sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) { - struct intel_engine_cs *engine = pt_to_request(pt)->engine; + struct intel_engine_cs *engine = sched_to_request(node)->engine; GEM_BUG_ON(!locked); if (engine != locked) { - spin_unlock(&locked->timeline->lock); - spin_lock(&engine->timeline->lock); + spin_unlock(&locked->timeline.lock); + spin_lock(&engine->timeline.lock); } return engine; } -static void execlists_schedule(struct i915_request *request, int prio) +static void execlists_schedule(struct i915_request *request, + const struct i915_sched_attr *attr) { - struct intel_engine_cs *engine; + struct i915_priolist *uninitialized_var(pl); + struct intel_engine_cs *engine, *last; struct i915_dependency *dep, *p; struct i915_dependency stack; + const int prio = attr->priority; LIST_HEAD(dfs); GEM_BUG_ON(prio == I915_PRIORITY_INVALID); @@ -1076,23 +1227,23 @@ static void execlists_schedule(struct i915_request *request, int prio) if (i915_request_completed(request)) return; - if (prio <= READ_ONCE(request->priotree.priority)) + if (prio <= READ_ONCE(request->sched.attr.priority)) return; /* Need BKL in order to use the temporary link inside i915_dependency */ lockdep_assert_held(&request->i915->drm.struct_mutex); - stack.signaler = &request->priotree; + stack.signaler = &request->sched; list_add(&stack.dfs_link, &dfs); /* * Recursively bump all dependent priorities to match the new request. * * A naive approach would be to use recursion: - * static void update_priorities(struct i915_priotree *pt, prio) { - * list_for_each_entry(dep, &pt->signalers_list, signal_link) + * static void update_priorities(struct i915_sched_node *node, prio) { + * list_for_each_entry(dep, &node->signalers_list, signal_link) * update_priorities(dep->signal, prio) - * queue_request(pt); + * queue_request(node); * } * but that may have unlimited recursion depth and so runs a very * real risk of overunning the kernel stack. Instead, we build @@ -1104,7 +1255,7 @@ static void execlists_schedule(struct i915_request *request, int prio) * last element in the list is the request we must execute first. */ list_for_each_entry(dep, &dfs, dfs_link) { - struct i915_priotree *pt = dep->signaler; + struct i915_sched_node *node = dep->signaler; /* * Within an engine, there can be no cycle, but we may @@ -1112,14 +1263,14 @@ static void execlists_schedule(struct i915_request *request, int prio) * (redundant dependencies are not eliminated) and across * engines. */ - list_for_each_entry(p, &pt->signalers_list, signal_link) { + list_for_each_entry(p, &node->signalers_list, signal_link) { GEM_BUG_ON(p == dep); /* no cycles! */ - if (i915_priotree_signaled(p->signaler)) + if (i915_sched_node_signaled(p->signaler)) continue; - GEM_BUG_ON(p->signaler->priority < pt->priority); - if (prio > READ_ONCE(p->signaler->priority)) + GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority); + if (prio > READ_ONCE(p->signaler->attr.priority)) list_move_tail(&p->dfs_link, &dfs); } } @@ -1130,37 +1281,45 @@ static void execlists_schedule(struct i915_request *request, int prio) * execlists_submit_request()), we can set our own priority and skip * acquiring the engine locks. */ - if (request->priotree.priority == I915_PRIORITY_INVALID) { - GEM_BUG_ON(!list_empty(&request->priotree.link)); - request->priotree.priority = prio; + if (request->sched.attr.priority == I915_PRIORITY_INVALID) { + GEM_BUG_ON(!list_empty(&request->sched.link)); + request->sched.attr = *attr; if (stack.dfs_link.next == stack.dfs_link.prev) return; __list_del_entry(&stack.dfs_link); } + last = NULL; engine = request->engine; - spin_lock_irq(&engine->timeline->lock); + spin_lock_irq(&engine->timeline.lock); /* Fifo and depth-first replacement ensure our deps execute before us */ list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { - struct i915_priotree *pt = dep->signaler; + struct i915_sched_node *node = dep->signaler; INIT_LIST_HEAD(&dep->dfs_link); - engine = pt_lock_engine(pt, engine); + engine = sched_lock_engine(node, engine); - if (prio <= pt->priority) + if (prio <= node->attr.priority) continue; - pt->priority = prio; - if (!list_empty(&pt->link)) { - __list_del_entry(&pt->link); - queue_request(engine, pt, prio); + node->attr.priority = prio; + if (!list_empty(&node->link)) { + if (last != engine) { + pl = lookup_priolist(engine, prio); + last = engine; + } + GEM_BUG_ON(pl->priority != prio); + list_move_tail(&node->link, &pl->requests); } - submit_queue(engine, prio); + + if (prio > engine->execlists.queue_priority && + i915_sw_fence_done(&sched_to_request(node)->submit)) + __submit_queue(engine, prio); } - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline.lock); } static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) @@ -1190,7 +1349,7 @@ static struct intel_ring * execlists_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); void *vaddr; int ret; @@ -1224,6 +1383,7 @@ execlists_context_pin(struct intel_engine_cs *engine, ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = i915_ggtt_offset(ce->ring->vma); + ce->lrc_reg_state[CTX_RING_HEAD+1] = ce->ring->head; ce->state->obj->pin_global++; i915_gem_context_get(ctx); @@ -1242,7 +1402,7 @@ err: static void execlists_context_unpin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(ce->pin_count == 0); @@ -1261,8 +1421,8 @@ static void execlists_context_unpin(struct intel_engine_cs *engine, static int execlists_request_alloc(struct i915_request *request) { - struct intel_engine_cs *engine = request->engine; - struct intel_context *ce = &request->ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(request->ctx, request->engine); int ret; GEM_BUG_ON(!ce->pin_count); @@ -1522,6 +1682,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return -EINVAL; switch (INTEL_GEN(engine->i915)) { + case 11: + return 0; case 10: wa_bb_fn[0] = gen10_init_indirectctx_bb; wa_bb_fn[1] = NULL; @@ -1574,14 +1736,6 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } -static u8 gtiir[] = { - [RCS] = 0, - [BCS] = 0, - [VCS] = 1, - [VCS2] = 1, - [VECS] = 3, -}; - static void enable_execlists(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1641,6 +1795,8 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; + intel_whitelist_workarounds_apply(engine); + /* We need to disable the AsyncFlip performance optimisations in order * to use MI_WAIT_FOR_EVENT within the CS. It should already be * programmed to '1' on all products. @@ -1651,7 +1807,7 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - return init_workarounds_ring(engine); + return 0; } static int gen9_init_render_ring(struct intel_engine_cs *engine) @@ -1662,49 +1818,25 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; - return init_workarounds_ring(engine); -} - -static void reset_irq(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int i; - - GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); - - /* - * Clear any pending interrupt state. - * - * We do it twice out of paranoia that some of the IIR are double - * buffered, and if we only reset it once there may still be - * an interrupt pending. - */ - for (i = 0; i < 2; i++) { - I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), - GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); - POSTING_READ(GEN8_GT_IIR(gtiir[engine->id])); - } - GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) & - (GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift)); + intel_whitelist_workarounds_apply(engine); - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + return 0; } static void reset_common_ring(struct intel_engine_cs *engine, struct i915_request *request) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct intel_context *ce; unsigned long flags; + u32 *regs; - GEM_TRACE("%s seqno=%x\n", - engine->name, request ? request->global_seqno : 0); + GEM_TRACE("%s request global=%x, current=%d\n", + engine->name, request ? request->global_seqno : 0, + intel_engine_get_seqno(engine)); /* See execlists_cancel_requests() for the irq/spinlock split. */ local_irq_save(flags); - reset_irq(engine); - /* * Catch up with any missed context-switch interrupts. * @@ -1715,14 +1847,12 @@ static void reset_common_ring(struct intel_engine_cs *engine, * requests were completed. */ execlists_cancel_port_requests(execlists); + reset_irq(engine); /* Push back any incomplete requests for replay after the reset. */ - spin_lock(&engine->timeline->lock); + spin_lock(&engine->timeline.lock); __unwind_incomplete_requests(engine); - spin_unlock(&engine->timeline->lock); - - /* Mark all CS interrupts as complete */ - execlists->active = 0; + spin_unlock(&engine->timeline.lock); local_irq_restore(flags); @@ -1748,14 +1878,24 @@ static void reset_common_ring(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - ce = &request->ctx->engine[engine->id]; - execlists_init_reg_state(ce->lrc_reg_state, - request->ctx, engine, ce->ring); + regs = to_intel_context(request->ctx, engine)->lrc_reg_state; + if (engine->default_state) { + void *defaults; + + defaults = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (!IS_ERR(defaults)) { + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + defaults + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + i915_gem_object_unpin_map(engine->default_state); + } + } + execlists_init_reg_state(regs, request->ctx, engine, request->ring); /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */ - ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = - i915_ggtt_offset(ce->ring->vma); - ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix; + regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma); + regs[CTX_RING_HEAD + 1] = request->postfix; request->ring->head = request->postfix; intel_ring_update_space(request->ring); @@ -1816,7 +1956,7 @@ static int gen8_emit_bb_start(struct i915_request *rq, rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine); } - cs = intel_ring_begin(rq, 4); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1845,6 +1985,9 @@ static int gen8_emit_bb_start(struct i915_request *rq, (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_NOOP; intel_ring_advance(rq, cs); return 0; @@ -1987,7 +2130,7 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) cs = gen8_emit_ggtt_write(cs, request->global_seqno, intel_hws_seqno_address(request->engine)); *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2003,7 +2146,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno, intel_hws_seqno_address(request->engine)); *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2015,7 +2158,7 @@ static int gen8_init_rcs_context(struct i915_request *rq) { int ret; - ret = intel_ring_workarounds_emit(rq); + ret = intel_ctx_workarounds_emit(rq); if (ret) return ret; @@ -2075,11 +2218,13 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) engine->unpark = NULL; engine->flags |= I915_ENGINE_SUPPORTS_STATS; + if (engine->i915->preempt_context) + engine->flags |= I915_ENGINE_HAS_PREEMPTION; engine->i915->caps.scheduler = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY; - if (engine->i915->preempt_context) + if (intel_engine_has_preemption(engine)) engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION; } @@ -2118,7 +2263,20 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) static inline void logical_ring_default_irqs(struct intel_engine_cs *engine) { - unsigned shift = engine->irq_shift; + unsigned int shift = 0; + + if (INTEL_GEN(engine->i915) < 11) { + const u8 irq_shifts[] = { + [RCS] = GEN8_RCS_IRQ_SHIFT, + [BCS] = GEN8_BCS_IRQ_SHIFT, + [VCS] = GEN8_VCS1_IRQ_SHIFT, + [VCS2] = GEN8_VCS2_IRQ_SHIFT, + [VECS] = GEN8_VECS_IRQ_SHIFT, + }; + + shift = irq_shifts[engine->id]; + } + engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } @@ -2174,9 +2332,13 @@ static int logical_ring_init(struct intel_engine_cs *engine) } engine->execlists.preempt_complete_status = ~0u; - if (engine->i915->preempt_context) + if (engine->i915->preempt_context) { + struct intel_context *ce = + to_intel_context(engine->i915->preempt_context, engine); + engine->execlists.preempt_complete_status = - upper_32_bits(engine->i915->preempt_context->engine[engine->id].lrc_desc); + upper_32_bits(ce->lrc_desc); + } return 0; @@ -2430,8 +2592,10 @@ populate_lr_context(struct i915_gem_context *ctx, defaults = i915_gem_object_pin_map(engine->default_state, I915_MAP_WB); - if (IS_ERR(defaults)) - return PTR_ERR(defaults); + if (IS_ERR(defaults)) { + ret = PTR_ERR(defaults); + goto err_unpin_ctx; + } memcpy(vaddr + start, defaults + start, engine->context_size); i915_gem_object_unpin_map(engine->default_state); @@ -2449,19 +2613,20 @@ populate_lr_context(struct i915_gem_context *ctx, _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT); +err_unpin_ctx: i915_gem_object_unpin_map(ctx_obj); - - return 0; + return ret; } static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { struct drm_i915_gem_object *ctx_obj; - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); struct i915_vma *vma; uint32_t context_size; struct intel_ring *ring; + struct i915_timeline *timeline; int ret; if (ce->state) @@ -2477,8 +2642,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, ctx_obj = i915_gem_object_create(ctx->i915, context_size); if (IS_ERR(ctx_obj)) { - DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n"); - return PTR_ERR(ctx_obj); + ret = PTR_ERR(ctx_obj); + goto error_deref_obj; } vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL); @@ -2487,7 +2652,14 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, goto error_deref_obj; } - ring = intel_engine_create_ring(engine, ctx->ring_size); + timeline = i915_timeline_create(ctx->i915, ctx->name); + if (IS_ERR(timeline)) { + ret = PTR_ERR(timeline); + goto error_deref_obj; + } + + ring = intel_engine_create_ring(engine, timeline, ctx->ring_size); + i915_timeline_put(timeline); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto error_deref_obj; @@ -2529,7 +2701,8 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { for_each_engine(engine, dev_priv, id) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(ctx, engine); u32 *reg; if (!ce->state) @@ -2551,3 +2724,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) } } } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_lrc.c" +#endif |