summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/intel_lrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c707
1 files changed, 441 insertions, 266 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6f972e6ec663..d36e25607435 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -208,8 +208,9 @@
/* Typical size of the average request (2 pipecontrols and a MI_BB) */
#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
-
#define WA_TAIL_DWORDS 2
+#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
+#define PREEMPT_ID 0x1
static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
struct intel_engine_cs *engine);
@@ -243,8 +244,7 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enabl
return 0;
if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) &&
- USES_PPGTT(dev_priv) &&
- i915.use_mmio_flip >= 0)
+ USES_PPGTT(dev_priv))
return 1;
return 0;
@@ -279,17 +279,110 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<<GEN8_CTX_ID_WIDTH));
desc = ctx->desc_template; /* bits 0-11 */
- desc |= i915_ggtt_offset(ce->state) + LRC_PPHWSP_PN * PAGE_SIZE;
+ desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
/* bits 12-31 */
desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */
ce->lrc_desc = desc;
}
-uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
+static struct i915_priolist *
+lookup_priolist(struct intel_engine_cs *engine,
+ struct i915_priotree *pt,
+ int prio)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct i915_priolist *p;
+ struct rb_node **parent, *rb;
+ bool first = true;
+
+ if (unlikely(execlists->no_priolist))
+ prio = I915_PRIORITY_NORMAL;
+
+find_priolist:
+ /* most positive priority is scheduled first, equal priorities fifo */
+ rb = NULL;
+ parent = &execlists->queue.rb_node;
+ while (*parent) {
+ rb = *parent;
+ p = rb_entry(rb, typeof(*p), node);
+ if (prio > p->priority) {
+ parent = &rb->rb_left;
+ } else if (prio < p->priority) {
+ parent = &rb->rb_right;
+ first = false;
+ } else {
+ return p;
+ }
+ }
+
+ if (prio == I915_PRIORITY_NORMAL) {
+ p = &execlists->default_priolist;
+ } else {
+ p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
+ /* Convert an allocation failure to a priority bump */
+ if (unlikely(!p)) {
+ prio = I915_PRIORITY_NORMAL; /* recurses just once */
+
+ /* To maintain ordering with all rendering, after an
+ * allocation failure we have to disable all scheduling.
+ * Requests will then be executed in fifo, and schedule
+ * will ensure that dependencies are emitted in fifo.
+ * There will be still some reordering with existing
+ * requests, so if userspace lied about their
+ * dependencies that reordering may be visible.
+ */
+ execlists->no_priolist = true;
+ goto find_priolist;
+ }
+ }
+
+ p->priority = prio;
+ INIT_LIST_HEAD(&p->requests);
+ rb_link_node(&p->node, rb, parent);
+ rb_insert_color(&p->node, &execlists->queue);
+
+ if (first)
+ execlists->first = &p->node;
+
+ return ptr_pack_bits(p, first, 1);
+}
+
+static void unwind_wa_tail(struct drm_i915_gem_request *rq)
{
- return ctx->engine[engine->id].lrc_desc;
+ rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
+ assert_ring_tail_valid(rq->ring, rq->tail);
+}
+
+static void unwind_incomplete_requests(struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_request *rq, *rn;
+ struct i915_priolist *uninitialized_var(p);
+ int last_prio = I915_PRIORITY_INVALID;
+
+ lockdep_assert_held(&engine->timeline->lock);
+
+ list_for_each_entry_safe_reverse(rq, rn,
+ &engine->timeline->requests,
+ link) {
+ if (i915_gem_request_completed(rq))
+ return;
+
+ __i915_gem_request_unsubmit(rq);
+ unwind_wa_tail(rq);
+
+ GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID);
+ if (rq->priotree.priority != last_prio) {
+ p = lookup_priolist(engine,
+ &rq->priotree,
+ rq->priotree.priority);
+ p = ptr_mask_bits(p, 1);
+
+ last_prio = rq->priotree.priority;
+ }
+
+ list_add(&rq->priotree.link, &p->requests);
+ }
}
static inline void
@@ -336,14 +429,20 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
return ce->lrc_desc;
}
+static inline void elsp_write(u64 desc, u32 __iomem *elsp)
+{
+ writel(upper_32_bits(desc), elsp);
+ writel(lower_32_bits(desc), elsp);
+}
+
static void execlists_submit_ports(struct intel_engine_cs *engine)
{
- struct execlist_port *port = engine->execlist_port;
+ struct execlist_port *port = engine->execlists.port;
u32 __iomem *elsp =
engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
unsigned int n;
- for (n = ARRAY_SIZE(engine->execlist_port); n--; ) {
+ for (n = execlists_num_ports(&engine->execlists); n--; ) {
struct drm_i915_gem_request *rq;
unsigned int count;
u64 desc;
@@ -361,8 +460,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
desc = 0;
}
- writel(upper_32_bits(desc), elsp);
- writel(lower_32_bits(desc), elsp);
+ elsp_write(desc, elsp);
}
}
@@ -395,25 +493,43 @@ static void port_assign(struct execlist_port *port,
port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
}
+static void inject_preempt_context(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce =
+ &engine->i915->preempt_context->engine[engine->id];
+ u32 __iomem *elsp =
+ engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+ unsigned int n;
+
+ GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID);
+ GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES));
+
+ memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES);
+ ce->ring->tail += WA_TAIL_BYTES;
+ ce->ring->tail &= (ce->ring->size - 1);
+ ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail;
+
+ for (n = execlists_num_ports(&engine->execlists); --n; )
+ elsp_write(0, elsp);
+
+ elsp_write(ce->lrc_desc, elsp);
+}
+
+static bool can_preempt(struct intel_engine_cs *engine)
+{
+ return INTEL_INFO(engine->i915)->has_logical_ring_preemption;
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
- struct drm_i915_gem_request *last;
- struct execlist_port *port = engine->execlist_port;
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct execlist_port *port = execlists->port;
+ const struct execlist_port * const last_port =
+ &execlists->port[execlists->port_mask];
+ struct drm_i915_gem_request *last = port_request(port);
struct rb_node *rb;
bool submit = false;
- last = port_request(port);
- if (last)
- /* WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
- * as we resubmit the request. See gen8_emit_breadcrumb()
- * for where we prepare the padding after the end of the
- * request.
- */
- last->tail = last->wa_tail;
-
- GEM_BUG_ON(port_isset(&port[1]));
-
/* Hardware submission is through 2 ports. Conceptually each port
* has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
* static for a context, and unique to each, so we only execute
@@ -436,9 +552,68 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
*/
spin_lock_irq(&engine->timeline->lock);
- rb = engine->execlist_first;
- GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
- while (rb) {
+ rb = execlists->first;
+ GEM_BUG_ON(rb_first(&execlists->queue) != rb);
+ if (!rb)
+ goto unlock;
+
+ if (last) {
+ /*
+ * Don't resubmit or switch until all outstanding
+ * preemptions (lite-restore) are seen. Then we
+ * know the next preemption status we see corresponds
+ * to this ELSP update.
+ */
+ if (port_count(&port[0]) > 1)
+ goto unlock;
+
+ if (can_preempt(engine) &&
+ rb_entry(rb, struct i915_priolist, node)->priority >
+ max(last->priotree.priority, 0)) {
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
+ inject_preempt_context(engine);
+ execlists_set_active(execlists,
+ EXECLISTS_ACTIVE_PREEMPT);
+ goto unlock;
+ } else {
+ /*
+ * In theory, we could coalesce more requests onto
+ * the second port (the first port is active, with
+ * no preemptions pending). However, that means we
+ * then have to deal with the possible lite-restore
+ * of the second port (as we submit the ELSP, there
+ * may be a context-switch) but also we may complete
+ * the resubmission before the context-switch. Ergo,
+ * coalescing onto the second port will cause a
+ * preemption event, but we cannot predict whether
+ * that will affect port[0] or port[1].
+ *
+ * If the second port is already active, we can wait
+ * until the next context-switch before contemplating
+ * new requests. The GPU will be busy and we should be
+ * able to resubmit the new ELSP before it idles,
+ * avoiding pipeline bubbles (momentary pauses where
+ * the driver is unable to keep up the supply of new
+ * work).
+ */
+ if (port_count(&port[1]))
+ goto unlock;
+
+ /* WaIdleLiteRestore:bdw,skl
+ * Apply the wa NOOPs to prevent
+ * ring:HEAD == req:TAIL as we resubmit the
+ * request. See gen8_emit_breadcrumb() for
+ * where we prepare the padding after the
+ * end of the request.
+ */
+ last->tail = last->wa_tail;
+ }
+ }
+
+ do {
struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
struct drm_i915_gem_request *rq, *rn;
@@ -460,7 +635,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* combine this request with the last, then we
* are done.
*/
- if (port != engine->execlist_port) {
+ if (port == last_port) {
__list_del_many(&p->requests,
&rq->priotree.link);
goto done;
@@ -485,38 +660,108 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (submit)
port_assign(port, last);
port++;
+
+ GEM_BUG_ON(port_isset(port));
}
INIT_LIST_HEAD(&rq->priotree.link);
- rq->priotree.priority = INT_MAX;
-
__i915_gem_request_submit(rq);
- trace_i915_gem_request_in(rq, port_index(port, engine));
+ trace_i915_gem_request_in(rq, port_index(port, execlists));
last = rq;
submit = true;
}
rb = rb_next(rb);
- rb_erase(&p->node, &engine->execlist_queue);
+ rb_erase(&p->node, &execlists->queue);
INIT_LIST_HEAD(&p->requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
- }
+ } while (rb);
done:
- engine->execlist_first = rb;
+ execlists->first = rb;
if (submit)
port_assign(port, last);
+unlock:
spin_unlock_irq(&engine->timeline->lock);
- if (submit)
+ if (submit) {
+ execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
execlists_submit_ports(engine);
+ }
+}
+
+static void
+execlist_cancel_port_requests(struct intel_engine_execlists *execlists)
+{
+ struct execlist_port *port = execlists->port;
+ unsigned int num_ports = execlists_num_ports(execlists);
+
+ while (num_ports-- && port_isset(port)) {
+ struct drm_i915_gem_request *rq = port_request(port);
+
+ GEM_BUG_ON(!execlists->active);
+ execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED);
+ i915_gem_request_put(rq);
+
+ memset(port, 0, sizeof(*port));
+ port++;
+ }
}
-static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
+static void execlists_cancel_requests(struct intel_engine_cs *engine)
{
- const struct execlist_port *port = engine->execlist_port;
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct drm_i915_gem_request *rq, *rn;
+ struct rb_node *rb;
+ unsigned long flags;
- return port_count(&port[0]) + port_count(&port[1]) < 2;
+ spin_lock_irqsave(&engine->timeline->lock, flags);
+
+ /* Cancel the requests on the HW and clear the ELSP tracker. */
+ execlist_cancel_port_requests(execlists);
+
+ /* Mark all executing requests as skipped. */
+ list_for_each_entry(rq, &engine->timeline->requests, link) {
+ GEM_BUG_ON(!rq->global_seqno);
+ if (!i915_gem_request_completed(rq))
+ dma_fence_set_error(&rq->fence, -EIO);
+ }
+
+ /* Flush the queued requests to the timeline list (for retiring). */
+ rb = execlists->first;
+ while (rb) {
+ struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+
+ list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+ INIT_LIST_HEAD(&rq->priotree.link);
+
+ dma_fence_set_error(&rq->fence, -EIO);
+ __i915_gem_request_submit(rq);
+ }
+
+ rb = rb_next(rb);
+ rb_erase(&p->node, &execlists->queue);
+ INIT_LIST_HEAD(&p->requests);
+ if (p->priority != I915_PRIORITY_NORMAL)
+ kmem_cache_free(engine->i915->priorities, p);
+ }
+
+ /* Remaining _unready_ requests will be nop'ed when submitted */
+
+
+ execlists->queue = RB_ROOT;
+ execlists->first = NULL;
+ GEM_BUG_ON(port_isset(execlists->port));
+
+ /*
+ * The port is checked prior to scheduling a tasklet, but
+ * just in case we have suspended the tasklet to do the
+ * wedging make sure that when it wakes, it decides there
+ * is no work to do by clearing the irq_posted bit.
+ */
+ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+
+ spin_unlock_irqrestore(&engine->timeline->lock, flags);
}
/*
@@ -525,8 +770,9 @@ static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
*/
static void intel_lrc_irq_handler(unsigned long data)
{
- struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
- struct execlist_port *port = engine->execlist_port;
+ struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct execlist_port * const port = execlists->port;
struct drm_i915_private *dev_priv = engine->i915;
/* We can skip acquiring intel_runtime_pm_get() here as it was taken
@@ -538,19 +784,24 @@ static void intel_lrc_irq_handler(unsigned long data)
*/
GEM_BUG_ON(!dev_priv->gt.awake);
- intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
+ intel_uncore_forcewake_get(dev_priv, execlists->fw_domains);
/* Prefer doing test_and_clear_bit() as a two stage operation to avoid
* imposing the cost of a locked atomic transaction when submitting a
* new request (outside of the context-switch interrupt).
*/
while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
- u32 __iomem *csb_mmio =
- dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
- u32 __iomem *buf =
- dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
+ /* The HWSP contains a (cacheable) mirror of the CSB */
+ const u32 *buf =
+ &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
unsigned int head, tail;
+ if (unlikely(execlists->csb_use_mmio)) {
+ buf = (u32 * __force)
+ (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+ execlists->csb_head = -1; /* force mmio read of CSB ptrs */
+ }
+
/* The write will be ordered by the uncached read (itself
* a memory barrier), so we do not need another in the form
* of a locked instruction. The race between the interrupt
@@ -562,9 +813,20 @@ static void intel_lrc_irq_handler(unsigned long data)
* is set and we do a new loop.
*/
__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
- head = readl(csb_mmio);
- tail = GEN8_CSB_WRITE_PTR(head);
- head = GEN8_CSB_READ_PTR(head);
+ if (unlikely(execlists->csb_head == -1)) { /* following a reset */
+ head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+ tail = GEN8_CSB_WRITE_PTR(head);
+ head = GEN8_CSB_READ_PTR(head);
+ execlists->csb_head = head;
+ } else {
+ const int write_idx =
+ intel_hws_csb_write_index(dev_priv) -
+ I915_HWS_CSB_BUF0_INDEX;
+
+ head = execlists->csb_head;
+ tail = READ_ONCE(buf[write_idx]);
+ }
+
while (head != tail) {
struct drm_i915_gem_request *rq;
unsigned int status;
@@ -590,13 +852,35 @@ static void intel_lrc_irq_handler(unsigned long data)
* status notifier.
*/
- status = readl(buf + 2 * head);
+ status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
continue;
+ if (status & GEN8_CTX_STATUS_ACTIVE_IDLE &&
+ buf[2*head + 1] == PREEMPT_ID) {
+ execlist_cancel_port_requests(execlists);
+
+ spin_lock_irq(&engine->timeline->lock);
+ unwind_incomplete_requests(engine);
+ spin_unlock_irq(&engine->timeline->lock);
+
+ GEM_BUG_ON(!execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_PREEMPT));
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_PREEMPT);
+ continue;
+ }
+
+ if (status & GEN8_CTX_STATUS_PREEMPTED &&
+ execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_PREEMPT))
+ continue;
+
+ GEM_BUG_ON(!execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_USER));
+
/* Check the context/desc id for this event matches */
- GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
- port->context_id);
+ GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
rq = port_unpack(port, &count);
GEM_BUG_ON(count == 0);
@@ -608,8 +892,7 @@ static void intel_lrc_irq_handler(unsigned long data)
trace_i915_gem_request_out(rq);
i915_gem_request_put(rq);
- port[0] = port[1];
- memset(&port[1], 0, sizeof(port[1]));
+ execlists_port_complete(execlists, port);
} else {
port_set(port, port_pack(rq, count));
}
@@ -617,80 +900,33 @@ static void intel_lrc_irq_handler(unsigned long data)
/* After the final element, the hw should be idle */
GEM_BUG_ON(port_count(port) == 0 &&
!(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+ if (port_count(port) == 0)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_USER);
}
- writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
- csb_mmio);
+ if (head != execlists->csb_head) {
+ execlists->csb_head = head;
+ writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+ dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+ }
}
- if (execlists_elsp_ready(engine))
+ if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
execlists_dequeue(engine);
- intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
+ intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
}
-static bool
-insert_request(struct intel_engine_cs *engine,
- struct i915_priotree *pt,
- int prio)
+static void insert_request(struct intel_engine_cs *engine,
+ struct i915_priotree *pt,
+ int prio)
{
- struct i915_priolist *p;
- struct rb_node **parent, *rb;
- bool first = true;
-
- if (unlikely(engine->no_priolist))
- prio = I915_PRIORITY_NORMAL;
-
-find_priolist:
- /* most positive priority is scheduled first, equal priorities fifo */
- rb = NULL;
- parent = &engine->execlist_queue.rb_node;
- while (*parent) {
- rb = *parent;
- p = rb_entry(rb, typeof(*p), node);
- if (prio > p->priority) {
- parent = &rb->rb_left;
- } else if (prio < p->priority) {
- parent = &rb->rb_right;
- first = false;
- } else {
- list_add_tail(&pt->link, &p->requests);
- return false;
- }
- }
-
- if (prio == I915_PRIORITY_NORMAL) {
- p = &engine->default_priolist;
- } else {
- p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
- /* Convert an allocation failure to a priority bump */
- if (unlikely(!p)) {
- prio = I915_PRIORITY_NORMAL; /* recurses just once */
-
- /* To maintain ordering with all rendering, after an
- * allocation failure we have to disable all scheduling.
- * Requests will then be executed in fifo, and schedule
- * will ensure that dependencies are emitted in fifo.
- * There will be still some reordering with existing
- * requests, so if userspace lied about their
- * dependencies that reordering may be visible.
- */
- engine->no_priolist = true;
- goto find_priolist;
- }
- }
+ struct i915_priolist *p = lookup_priolist(engine, pt, prio);
- p->priority = prio;
- rb_link_node(&p->node, rb, parent);
- rb_insert_color(&p->node, &engine->execlist_queue);
-
- INIT_LIST_HEAD(&p->requests);
- list_add_tail(&pt->link, &p->requests);
-
- if (first)
- engine->execlist_first = &p->node;
-
- return first;
+ list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
+ if (ptr_unmask_bits(p, 1))
+ tasklet_hi_schedule(&engine->execlists.irq_tasklet);
}
static void execlists_submit_request(struct drm_i915_gem_request *request)
@@ -701,24 +937,23 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->timeline->lock, flags);
- if (insert_request(engine,
- &request->priotree,
- request->priotree.priority)) {
- if (execlists_elsp_ready(engine))
- tasklet_hi_schedule(&engine->irq_tasklet);
- }
+ insert_request(engine, &request->priotree, request->priotree.priority);
- GEM_BUG_ON(!engine->execlist_first);
+ GEM_BUG_ON(!engine->execlists.first);
GEM_BUG_ON(list_empty(&request->priotree.link));
spin_unlock_irqrestore(&engine->timeline->lock, flags);
}
+static struct drm_i915_gem_request *pt_to_request(struct i915_priotree *pt)
+{
+ return container_of(pt, struct drm_i915_gem_request, priotree);
+}
+
static struct intel_engine_cs *
pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
{
- struct intel_engine_cs *engine =
- container_of(pt, struct drm_i915_gem_request, priotree)->engine;
+ struct intel_engine_cs *engine = pt_to_request(pt)->engine;
GEM_BUG_ON(!locked);
@@ -737,6 +972,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
struct i915_dependency stack;
LIST_HEAD(dfs);
+ GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
+
if (prio <= READ_ONCE(request->priotree.priority))
return;
@@ -772,6 +1009,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
* engines.
*/
list_for_each_entry(p, &pt->signalers_list, signal_link) {
+ if (i915_gem_request_completed(pt_to_request(p->signaler)))
+ continue;
+
GEM_BUG_ON(p->signaler->priority < pt->priority);
if (prio > READ_ONCE(p->signaler->priority))
list_move_tail(&p->dfs_link, &dfs);
@@ -785,7 +1025,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
* execlists_submit_request()), we can set our own priority and skip
* acquiring the engine locks.
*/
- if (request->priotree.priority == INT_MIN) {
+ if (request->priotree.priority == I915_PRIORITY_INVALID) {
GEM_BUG_ON(!list_empty(&request->priotree.link));
request->priotree.priority = prio;
if (stack.dfs_link.next == stack.dfs_link.prev)
@@ -815,8 +1055,6 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
}
spin_unlock_irq(&engine->timeline->lock);
-
- /* XXX Do we need to preempt to make room for us and our deps? */
}
static struct intel_ring *
@@ -866,6 +1104,7 @@ execlists_context_pin(struct intel_engine_cs *engine,
i915_ggtt_offset(ce->ring->vma);
ce->state->obj->mm.dirty = true;
+ ce->state->obj->pin_global++;
i915_gem_context_get(ctx);
out:
@@ -893,6 +1132,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine,
intel_ring_unpin(ce->ring);
+ ce->state->obj->pin_global--;
i915_gem_object_unpin_map(ce->state->obj);
i915_vma_unpin(ce->state);
@@ -914,27 +1154,14 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
*/
request->reserved_space += EXECLISTS_REQUEST_SIZE;
- if (i915.enable_guc_submission) {
- /*
- * Check that the GuC has space for the request before
- * going any further, as the i915_add_request() call
- * later on mustn't fail ...
- */
- ret = i915_guc_wq_reserve(request);
- if (ret)
- goto err;
- }
-
cs = intel_ring_begin(request, 0);
- if (IS_ERR(cs)) {
- ret = PTR_ERR(cs);
- goto err_unreserve;
- }
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
if (!ce->initialised) {
ret = engine->init_context(request);
if (ret)
- goto err_unreserve;
+ return ret;
ce->initialised = true;
}
@@ -948,12 +1175,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
request->reserved_space -= EXECLISTS_REQUEST_SIZE;
return 0;
-
-err_unreserve:
- if (i915.enable_guc_submission)
- i915_guc_wq_unreserve(request);
-err:
- return ret;
}
/*
@@ -1031,6 +1252,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
i915_ggtt_offset(engine->scratch) +
2 * CACHELINE_BYTES);
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
/* Pad to end of cacheline */
while ((unsigned long)batch % CACHELINE_BYTES)
*batch++ = MI_NOOP;
@@ -1044,26 +1267,10 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
return batch;
}
-/*
- * This batch is started immediately after indirect_ctx batch. Since we ensure
- * that indirect_ctx ends on a cacheline this batch is aligned automatically.
- *
- * The number of DWORDS written are returned using this field.
- *
- * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding
- * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant.
- */
-static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
- /* WaDisableCtxRestoreArbitration:bdw,chv */
- *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
- *batch++ = MI_BATCH_BUFFER_END;
-
- return batch;
-}
-
static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
{
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
batch = gen8_emit_flush_coherentl3_wa(engine, batch);
@@ -1109,6 +1316,8 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
*batch++ = 0;
}
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
/* Pad to end of cacheline */
while ((unsigned long)batch % CACHELINE_BYTES)
*batch++ = MI_NOOP;
@@ -1116,13 +1325,6 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
return batch;
}
-static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
- *batch++ = MI_BATCH_BUFFER_END;
-
- return batch;
-}
-
#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
@@ -1175,13 +1377,15 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
return -EINVAL;
switch (INTEL_GEN(engine->i915)) {
+ case 10:
+ return 0;
case 9:
wa_bb_fn[0] = gen9_init_indirectctx_bb;
- wa_bb_fn[1] = gen9_init_perctx_bb;
+ wa_bb_fn[1] = NULL;
break;
case 8:
wa_bb_fn[0] = gen8_init_indirectctx_bb;
- wa_bb_fn[1] = gen8_init_perctx_bb;
+ wa_bb_fn[1] = NULL;
break;
default:
MISSING_CASE(INTEL_GEN(engine->i915));
@@ -1208,7 +1412,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
ret = -EINVAL;
break;
}
- batch_ptr = wa_bb_fn[i](engine, batch_ptr);
+ if (wa_bb_fn[i])
+ batch_ptr = wa_bb_fn[i](engine, batch_ptr);
wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
}
@@ -1232,9 +1437,7 @@ static u8 gtiir[] = {
static int gen8_init_common_ring(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
- struct execlist_port *port = engine->execlist_port;
- unsigned int n;
- bool submit;
+ struct intel_engine_execlists * const execlists = &engine->execlists;
int ret;
ret = intel_mocs_init_engine(engine);
@@ -1267,24 +1470,12 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+ execlists->csb_head = -1;
+ execlists->active = 0;
/* After a GPU reset, we may have requests to replay */
- submit = false;
- for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
- if (!port_isset(&port[n]))
- break;
-
- DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n",
- engine->name, n,
- port_request(&port[n])->global_seqno);
-
- /* Discard the current inflight count */
- port_set(&port[n], port_request(&port[n]));
- submit = true;
- }
-
- if (submit && !i915.enable_guc_submission)
- execlists_submit_ports(engine);
+ if (!i915_modparams.enable_guc_submission && execlists->first)
+ tasklet_schedule(&execlists->irq_tasklet);
return 0;
}
@@ -1325,9 +1516,11 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
static void reset_common_ring(struct intel_engine_cs *engine,
struct drm_i915_gem_request *request)
{
- struct execlist_port *port = engine->execlist_port;
+ struct intel_engine_execlists * const execlists = &engine->execlists;
struct intel_context *ce;
- unsigned int n;
+ unsigned long flags;
+
+ spin_lock_irqsave(&engine->timeline->lock, flags);
/*
* Catch up with any missed context-switch interrupts.
@@ -1338,20 +1531,12 @@ static void reset_common_ring(struct intel_engine_cs *engine,
* guessing the missed context-switch events by looking at what
* requests were completed.
*/
- if (!request) {
- for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
- i915_gem_request_put(port_request(&port[n]));
- memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
- return;
- }
+ execlist_cancel_port_requests(execlists);
- if (request->ctx != port_request(port)->ctx) {
- i915_gem_request_put(port_request(port));
- port[0] = port[1];
- memset(&port[1], 0, sizeof(port[1]));
- }
+ /* Push back any incomplete requests for replay after the reset. */
+ unwind_incomplete_requests(engine);
- GEM_BUG_ON(request->ctx != port_request(port)->ctx);
+ spin_unlock_irqrestore(&engine->timeline->lock, flags);
/* If the request was innocent, we leave the request in the ELSP
* and will try to replay it on restarting. The context image may
@@ -1363,7 +1548,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
* and have to at least restore the RING register in the context
* image back to the expected values to skip over the guilty request.
*/
- if (request->fence.error != -EIO)
+ if (!request || request->fence.error != -EIO)
return;
/* We want a simple context + ring to execute the breadcrumb update.
@@ -1386,10 +1571,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
intel_ring_update_space(request->ring);
/* Reset WaIdleLiteRestore:bdw,skl as well */
- request->tail =
- intel_ring_wrap(request->ring,
- request->wa_tail - WA_TAIL_DWORDS*sizeof(u32));
- assert_ring_tail_valid(request->ring, request->tail);
+ unwind_wa_tail(request);
}
static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
@@ -1448,13 +1630,31 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
if (IS_ERR(cs))
return PTR_ERR(cs);
+ /*
+ * WaDisableCtxRestoreArbitration:bdw,chv
+ *
+ * We don't need to perform MI_ARB_ENABLE as often as we do (in
+ * particular all the gen that do not need the w/a at all!), if we
+ * took care to make sure that on every switch into this context
+ * (both ordinary and for preemption) that arbitrartion was enabled
+ * we would be fine. However, there doesn't seem to be a downside to
+ * being paranoid and making sure it is set before each batch and
+ * every context-switch.
+ *
+ * Note that if we fail to enable arbitration before the request
+ * is complete, then we do not see the context-switch interrupt and
+ * the engine hangs (with RING_HEAD == RING_TAIL).
+ *
+ * That satisfies both the GPGPU w/a and our heavy-handed paranoia.
+ */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
/* FIXME(BDW): Address space and security selectors. */
*cs++ = MI_BATCH_BUFFER_START_GEN8 |
(flags & I915_DISPATCH_SECURE ? 0 : BIT(8)) |
(flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
*cs++ = lower_32_bits(offset);
*cs++ = upper_32_bits(offset);
- *cs++ = MI_NOOP;
intel_ring_advance(req, cs);
return 0;
@@ -1583,7 +1783,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
*/
static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs)
{
- *cs++ = MI_NOOP;
+ /* Ensure there's always at least one preemption point per-request. */
+ *cs++ = MI_ARB_CHECK;
*cs++ = MI_NOOP;
request->wa_tail = intel_ring_offset(request, cs);
}
@@ -1604,7 +1805,6 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
gen8_emit_wa_tail(request, cs);
}
-
static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
@@ -1632,7 +1832,6 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
gen8_emit_wa_tail(request, cs);
}
-
static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
@@ -1666,8 +1865,8 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
* Tasklet cannot be active at this point due intel_mark_active/idle
* so this is just for documentation.
*/
- if (WARN_ON(test_bit(TASKLET_STATE_SCHED, &engine->irq_tasklet.state)))
- tasklet_kill(&engine->irq_tasklet);
+ if (WARN_ON(test_bit(TASKLET_STATE_SCHED, &engine->execlists.irq_tasklet.state)))
+ tasklet_kill(&engine->execlists.irq_tasklet);
dev_priv = engine->i915;
@@ -1678,11 +1877,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
if (engine->cleanup)
engine->cleanup(engine);
- if (engine->status_page.vma) {
- i915_gem_object_unpin_map(engine->status_page.vma->obj);
- engine->status_page.vma = NULL;
- }
-
intel_engine_cleanup_common(engine);
lrc_destroy_wa_ctx(engine);
@@ -1694,8 +1888,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
static void execlists_set_default_submission(struct intel_engine_cs *engine)
{
engine->submit_request = execlists_submit_request;
+ engine->cancel_requests = execlists_cancel_requests;
engine->schedule = execlists_schedule;
- engine->irq_tasklet.func = intel_lrc_irq_handler;
+ engine->execlists.irq_tasklet.func = intel_lrc_irq_handler;
}
static void
@@ -1729,24 +1924,6 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
}
-static int
-lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma)
-{
- const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE;
- void *hws;
-
- /* The HWSP is part of the default context object in LRC mode. */
- hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
- if (IS_ERR(hws))
- return PTR_ERR(hws);
-
- engine->status_page.page_addr = hws + hws_offset;
- engine->status_page.ggtt_offset = i915_ggtt_offset(vma) + hws_offset;
- engine->status_page.vma = vma;
-
- return 0;
-}
-
static void
logical_ring_setup(struct intel_engine_cs *engine)
{
@@ -1770,32 +1947,23 @@ logical_ring_setup(struct intel_engine_cs *engine)
RING_CONTEXT_STATUS_BUF_BASE(engine),
FW_REG_READ);
- engine->fw_domains = fw_domains;
+ engine->execlists.fw_domains = fw_domains;
- tasklet_init(&engine->irq_tasklet,
+ tasklet_init(&engine->execlists.irq_tasklet,
intel_lrc_irq_handler, (unsigned long)engine);
logical_ring_default_vfuncs(engine);
logical_ring_default_irqs(engine);
}
-static int
-logical_ring_init(struct intel_engine_cs *engine)
+static int logical_ring_init(struct intel_engine_cs *engine)
{
- struct i915_gem_context *dctx = engine->i915->kernel_context;
int ret;
ret = intel_engine_init_common(engine);
if (ret)
goto error;
- /* And setup the hardware status page. */
- ret = lrc_setup_hws(engine, dctx->engine[engine->id].state);
- if (ret) {
- DRM_ERROR("Failed to set up hws %s: %d\n", engine->name, ret);
- goto error;
- }
-
return 0;
error:
@@ -1953,13 +2121,12 @@ static void execlists_init_reg_state(u32 *regs,
CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
if (rcs) {
- CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
+ struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
+
CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
RING_INDIRECT_CTX_OFFSET(base), 0);
-
- if (engine->wa_ctx.vma) {
- struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
+ if (wa_ctx->indirect_ctx.size) {
u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
regs[CTX_RCS_INDIRECT_CTX + 1] =
@@ -1968,6 +2135,11 @@ static void execlists_init_reg_state(u32 *regs,
regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
intel_lr_indirect_ctx_offset(engine) << 6;
+ }
+
+ CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
+ if (wa_ctx->per_ctx.size) {
+ u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
regs[CTX_BB_PER_CTX_PTR + 1] =
(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
@@ -2052,8 +2224,11 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
- /* One extra page as the sharing data between driver and GuC */
- context_size += PAGE_SIZE * LRC_PPHWSP_PN;
+ /*
+ * Before the actual start of the context image, we insert a few pages
+ * for our own use and for sharing with the GuC.
+ */
+ context_size += LRC_HEADER_PAGES * PAGE_SIZE;
ctx_obj = i915_gem_object_create(ctx->i915, context_size);
if (IS_ERR(ctx_obj)) {