drm/i915/execlists: Pull the render flush into breadcrumb emission

In preparation for removing the manual EMIT_FLUSH prior to emitting the breadcrumb implement the flush inline with writing the breadcrumb for execlists. Using one command to both flush and write the breadcrumb is naturally a tiny bit faster than splitting it into two. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181228153114.4948-1-chris@chris-wilson.co.uk
author: Chris Wilson <chris@chris-wilson.co.uk> 2018-12-28 15:31:13 +0000
committer: Chris Wilson <chris@chris-wilson.co.uk> 2018-12-28 16:36:55 +0000
commit: 6a6237293d0c02e0902b29a86e3e353e21f7bea6 (patch)
tree: 5f734b64daf73bc3c11450f1a0bfc8341d4e595d
parent: 95898ed6856eaa06331feeab75ec5f5a59af08b0 (diff)
3 files changed, 14 insertions, 6 deletions
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 1570dcbe249c..ab1c49b106f2 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -572,7 +572,8 @@ static void inject_preempt_context(struct work_struct *work)
 		if (engine->id == RCS) {
 			cs = gen8_emit_ggtt_write_rcs(cs,
 						      GUC_PREEMPT_FINISHED,
-						      addr);
+						      addr,
+						      PIPE_CONTROL_CS_STALL);
 		} else {
 			cs = gen8_emit_ggtt_write(cs,
 						  GUC_PREEMPT_FINISHED,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4762c1e5b9e7..2482dde6e56b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2061,10 +2061,18 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 	/* We're using qword write, seqno should be aligned to 8 bytes. */
 	BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
 
-	cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno,
-				      intel_hws_seqno_address(request->engine));
+	cs = gen8_emit_ggtt_write_rcs(cs,
+				      request->global_seqno,
+				      intel_hws_seqno_address(request->engine),
+				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+				      PIPE_CONTROL_DC_FLUSH_ENABLE |
+				      PIPE_CONTROL_FLUSH_ENABLE |
+				      PIPE_CONTROL_CS_STALL);
+
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c927bdfb1ed0..32606d795af3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -1003,7 +1003,7 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 }
 
 static inline u32 *
-gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset)
+gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
 {
 	/* We're using qword write, offset should be aligned to 8 bytes. */
 	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
@@ -1013,8 +1013,7 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset)
 	 * following the batch.
 	 */
 	*cs++ = GFX_OP_PIPE_CONTROL(6);
-	*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
-		PIPE_CONTROL_QW_WRITE;
+	*cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
 	*cs++ = gtt_offset;
 	*cs++ = 0;
 	*cs++ = value;
author	Chris Wilson <chris@chris-wilson.co.uk>	2018-12-28 15:31:13 +0000
committer	Chris Wilson <chris@chris-wilson.co.uk>	2018-12-28 16:36:55 +0000
commit	6a6237293d0c02e0902b29a86e3e353e21f7bea6 (patch)
tree	5f734b64daf73bc3c11450f1a0bfc8341d4e595d
parent	95898ed6856eaa06331feeab75ec5f5a59af08b0 (diff)