summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/i915/Kconfig6
-rw-r--r--drivers/gpu/drm/i915/Kconfig.profile13
-rw-r--r--drivers/gpu/drm/i915/i915_request.c27
3 files changed, 44 insertions, 2 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 148be8e1a090..f0556310b851 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -133,3 +133,9 @@ depends on DRM_I915
depends on EXPERT
source "drivers/gpu/drm/i915/Kconfig.debug"
endmenu
+
+menu "drm/i915 Profile Guided Optimisation"
+ visible if EXPERT
+ depends on DRM_I915
+ source "drivers/gpu/drm/i915/Kconfig.profile"
+endmenu
diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
new file mode 100644
index 000000000000..0e5db98da8f3
--- /dev/null
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -0,0 +1,13 @@
+config DRM_I915_SPIN_REQUEST
+ int
+ default 5 # microseconds
+ help
+ Before sleeping waiting for a request (GPU operation) to complete,
+ we may spend some time polling for its completion. As the IRQ may
+ take a non-negligible time to setup, we do a short spin first to
+ check if the request will complete in the time it would have taken
+ us to enable the interrupt.
+
+ May be 0 to disable the initial spin. In practice, we estimate
+ the cost of enabling the interrupt (if currently disabled) to be
+ a few microseconds.
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index b836721d3b13..b1f00b59bb95 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1340,8 +1340,31 @@ long i915_request_wait(struct i915_request *rq,
trace_i915_request_wait_begin(rq, flags);
- /* Optimistic short spin before touching IRQs */
- if (__i915_spin_request(rq, state, 5))
+ /*
+ * Optimistic spin before touching IRQs.
+ *
+ * We may use a rather large value here to offset the penalty of
+ * switching away from the active task. Frequently, the client will
+ * wait upon an old swapbuffer to throttle itself to remain within a
+ * frame of the gpu. If the client is running in lockstep with the gpu,
+ * then it should not be waiting long at all, and a sleep now will incur
+ * extra scheduler latency in producing the next frame. To try to
+ * avoid adding the cost of enabling/disabling the interrupt to the
+ * short wait, we first spin to see if the request would have completed
+ * in the time taken to setup the interrupt.
+ *
+ * We need upto 5us to enable the irq, and upto 20us to hide the
+ * scheduler latency of a context switch, ignoring the secondary
+ * impacts from a context switch such as cache eviction.
+ *
+ * The scheme used for low-latency IO is called "hybrid interrupt
+ * polling". The suggestion there is to sleep until just before you
+ * expect to be woken by the device interrupt and then poll for its
+ * completion. That requires having a good predictor for the request
+ * duration, which we currently lack.
+ */
+ if (CONFIG_DRM_I915_SPIN_REQUEST &&
+ __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST))
goto out;
/*