diff options
author | Jordan Crouse <jcrouse@codeaurora.org> | 2017-10-20 11:07:01 -0600 |
---|---|---|
committer | Rob Clark <robdclark@gmail.com> | 2017-10-28 11:01:38 -0400 |
commit | b1fc2839d2f92d09da90d1e09156a73ddaba8a93 (patch) | |
tree | 2cbc4001472d16c4290e7a4d8be6742d57c72d3c /drivers/gpu/drm/msm/adreno/a5xx_gpu.c | |
parent | 4d87fc32dfd20ad0d4ffd5204f6e5d41d281e128 (diff) |
drm/msm: Implement preemption for A5XX targets
Implement preemption for A5XX targets - this allows multiple
ringbuffers for different priorities with automatic preemption
of a lower priority ringbuffer if a higher one is ready.
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
Diffstat (limited to 'drivers/gpu/drm/msm/adreno/a5xx_gpu.c')
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 176 |
1 files changed, 171 insertions, 5 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 32252f8ac30c..a1f4eeeb73e2 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -113,13 +113,65 @@ out: return ret; } +static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + uint32_t wptr; + unsigned long flags; + + spin_lock_irqsave(&ring->lock, flags); + + /* Copy the shadow to the actual register */ + ring->cur = ring->next; + + /* Make sure to wrap wptr if we need to */ + wptr = get_wptr(ring); + + spin_unlock_irqrestore(&ring->lock, flags); + + /* Make sure everything is posted before making a decision */ + mb(); + + /* Update HW if this is the current ring and we are not in preempt */ + if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu)) + gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); +} + static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); struct msm_drm_private *priv = gpu->dev->dev_private; struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x02); + + /* Turn off protected mode to write to special registers */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 0); + + /* Set the save preemption record for the ring/command */ + OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); + OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); + OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); + + /* Turn back on protected mode */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 1); + + /* Enable local preemption for finegrain preemption */ + OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x02); + + /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */ + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); + OUT_RING(ring, 0x02); + + /* Submit the commands */ for (i = 0; i < submit->nr_cmds; i++) { switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: @@ -137,16 +189,54 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } } + /* + * Write the render mode to NULL (0) to indicate to the CP that the IBs + * are done rendering - otherwise a lucky preemption would start + * replaying from the last checkpoint + */ + OUT_PKT7(ring, CP_SET_RENDER_MODE, 5); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + + /* Turn off IB level preemptions */ + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); + OUT_RING(ring, 0x01); + + /* Write the fence to the scratch register */ OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); OUT_RING(ring, submit->seqno); + /* + * Execute a CACHE_FLUSH_TS event. This will ensure that the + * timestamp is written to the memory and then triggers the interrupt + */ OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31)); OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, submit->seqno); - gpu->funcs->flush(gpu, ring); + /* Yield the floor on command completion */ + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); + /* + * If dword[2:1] are non zero, they specify an address for the CP to + * write the value of dword[3] to on preemption complete. Write 0 to + * skip the write + */ + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + /* Data value - not used if the address above is 0 */ + OUT_RING(ring, 0x01); + /* Set bit 0 to trigger an interrupt on preempt complete */ + OUT_RING(ring, 0x01); + + a5xx_flush(gpu, ring); + + /* Check to see if we need to start preemption */ + a5xx_preempt_trigger(gpu); } static const struct { @@ -297,6 +387,50 @@ static int a5xx_me_init(struct msm_gpu *gpu) return a5xx_idle(gpu, ring) ? 0 : -EINVAL; } +static int a5xx_preempt_start(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + struct msm_ringbuffer *ring = gpu->rb[0]; + + if (gpu->nr_rings == 1) + return 0; + + /* Turn off protected mode to write to special registers */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 0); + + /* Set the save preemption record for the ring/command */ + OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); + OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id])); + OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id])); + + /* Turn back on protected mode */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 1); + + OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x00); + + OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); + OUT_RING(ring, 0x01); + + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); + OUT_RING(ring, 0x01); + + /* Yield the floor on command completion */ + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x01); + OUT_RING(ring, 0x01); + + gpu->funcs->flush(gpu, ring); + + return a5xx_idle(gpu, ring) ? 0 : -EINVAL; +} + + static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, const struct firmware *fw, u64 *iova) { @@ -412,6 +546,7 @@ static int a5xx_zap_shader_init(struct msm_gpu *gpu) A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \ A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \ + A5XX_RBBM_INT_0_MASK_CP_SW | \ A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) @@ -556,6 +691,8 @@ static int a5xx_hw_init(struct msm_gpu *gpu) if (ret) return ret; + a5xx_preempt_hw_init(gpu); + a5xx_gpmu_ucode_init(gpu); ret = a5xx_ucode_init(gpu); @@ -610,6 +747,9 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); } + /* Last step - yield the ringbuffer */ + a5xx_preempt_start(gpu); + return 0; } @@ -640,6 +780,8 @@ static void a5xx_destroy(struct msm_gpu *gpu) DBG("%s", gpu->name); + a5xx_preempt_fini(gpu); + if (a5xx_gpu->pm4_bo) { if (a5xx_gpu->pm4_iova) msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace); @@ -677,6 +819,14 @@ static inline bool _a5xx_check_idle(struct msm_gpu *gpu) bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + + if (ring != a5xx_gpu->cur_ring) { + WARN(1, "Tried to idle a non-current ringbuffer\n"); + return false; + } + /* wait for CP to drain ringbuffer: */ if (!adreno_idle(gpu, ring)) return false; @@ -871,8 +1021,13 @@ static irqreturn_t a5xx_irq(struct msm_gpu *gpu) if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) a5xx_gpmu_err_irq(gpu); - if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) + if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { + a5xx_preempt_trigger(gpu); msm_gpu_retire(gpu); + } + + if (status & A5XX_RBBM_INT_0_MASK_CP_SW) + a5xx_preempt_irq(gpu); return IRQ_HANDLED; } @@ -1002,6 +1157,14 @@ static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m) } #endif +static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + + return a5xx_gpu->cur_ring; +} + static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, @@ -1010,8 +1173,8 @@ static const struct adreno_gpu_funcs funcs = { .pm_resume = a5xx_pm_resume, .recover = a5xx_recover, .submit = a5xx_submit, - .flush = adreno_flush, - .active_ring = adreno_active_ring, + .flush = a5xx_flush, + .active_ring = a5xx_active_ring, .irq = a5xx_irq, .destroy = a5xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -1047,7 +1210,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) a5xx_gpu->lm_leakage = 0x4E001A; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4); if (ret) { a5xx_destroy(&(a5xx_gpu->base.base)); return ERR_PTR(ret); @@ -1056,5 +1219,8 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) if (gpu->aspace) msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler); + /* Set up the preemption specific bits and pieces for each ringbuffer */ + a5xx_preempt_init(gpu); + return gpu; } |