summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_drv.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2019-01-16 15:33:04 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2019-01-16 22:45:31 +0000
commit9f58892ea9962002399132fd3f40c6a273f8d9e1 (patch)
tree7721b59cd6f41c2d184e7247672f67131c7c3f65 /drivers/gpu/drm/i915/i915_drv.c
parent18bb2bccb5492fb5c36908191b8af77e54c58814 (diff)
drm/i915: Pull all the reset functionality together into i915_reset.c
Currently the code to reset the GPU and our state is spread widely across a few files. Pull the logic together into a common file. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Acked-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190116153304.787-1-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_drv.c')
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c206
1 files changed, 1 insertions, 205 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index dafbbfadd1ad..f462a4d28af4 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -48,6 +48,7 @@
#include "i915_drv.h"
#include "i915_trace.h"
#include "i915_pmu.h"
+#include "i915_reset.h"
#include "i915_query.h"
#include "i915_vgpu.h"
#include "intel_drv.h"
@@ -2205,211 +2206,6 @@ static int i915_resume_switcheroo(struct drm_device *dev)
return i915_drm_resume(dev);
}
-/**
- * i915_reset - reset chip after a hang
- * @i915: #drm_i915_private to reset
- * @stalled_mask: mask of the stalled engines with the guilty requests
- * @reason: user error message for why we are resetting
- *
- * Reset the chip. Useful if a hang is detected. Marks the device as wedged
- * on failure.
- *
- * Caller must hold the struct_mutex.
- *
- * Procedure is fairly simple:
- * - reset the chip using the reset reg
- * - re-init context state
- * - re-init hardware status page
- * - re-init ring buffer
- * - re-init interrupt state
- * - re-init display
- */
-void i915_reset(struct drm_i915_private *i915,
- unsigned int stalled_mask,
- const char *reason)
-{
- struct i915_gpu_error *error = &i915->gpu_error;
- int ret;
- int i;
-
- GEM_TRACE("flags=%lx\n", error->flags);
-
- might_sleep();
- lockdep_assert_held(&i915->drm.struct_mutex);
- assert_rpm_wakelock_held(i915);
- GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
-
- if (!test_bit(I915_RESET_HANDOFF, &error->flags))
- return;
-
- /* Clear any previous failed attempts at recovery. Time to try again. */
- if (!i915_gem_unset_wedged(i915))
- goto wakeup;
-
- if (reason)
- dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
- error->reset_count++;
-
- ret = i915_gem_reset_prepare(i915);
- if (ret) {
- dev_err(i915->drm.dev, "GPU recovery failed\n");
- goto taint;
- }
-
- if (!intel_has_gpu_reset(i915)) {
- if (i915_modparams.reset)
- dev_err(i915->drm.dev, "GPU reset not supported\n");
- else
- DRM_DEBUG_DRIVER("GPU reset disabled\n");
- goto error;
- }
-
- for (i = 0; i < 3; i++) {
- ret = intel_gpu_reset(i915, ALL_ENGINES);
- if (ret == 0)
- break;
-
- msleep(100);
- }
- if (ret) {
- dev_err(i915->drm.dev, "Failed to reset chip\n");
- goto taint;
- }
-
- /* Ok, now get things going again... */
-
- /*
- * Everything depends on having the GTT running, so we need to start
- * there.
- */
- ret = i915_ggtt_enable_hw(i915);
- if (ret) {
- DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n",
- ret);
- goto error;
- }
-
- i915_gem_reset(i915, stalled_mask);
- intel_overlay_reset(i915);
-
- /*
- * Next we need to restore the context, but we don't use those
- * yet either...
- *
- * Ring buffer needs to be re-initialized in the KMS case, or if X
- * was running at the time of the reset (i.e. we weren't VT
- * switched away).
- */
- ret = i915_gem_init_hw(i915);
- if (ret) {
- DRM_ERROR("Failed to initialise HW following reset (%d)\n",
- ret);
- goto error;
- }
-
- i915_queue_hangcheck(i915);
-
-finish:
- i915_gem_reset_finish(i915);
-wakeup:
- clear_bit(I915_RESET_HANDOFF, &error->flags);
- wake_up_bit(&error->flags, I915_RESET_HANDOFF);
- return;
-
-taint:
- /*
- * History tells us that if we cannot reset the GPU now, we
- * never will. This then impacts everything that is run
- * subsequently. On failing the reset, we mark the driver
- * as wedged, preventing further execution on the GPU.
- * We also want to go one step further and add a taint to the
- * kernel so that any subsequent faults can be traced back to
- * this failure. This is important for CI, where if the
- * GPU/driver fails we would like to reboot and restart testing
- * rather than continue on into oblivion. For everyone else,
- * the system should still plod along, but they have been warned!
- */
- add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-error:
- i915_gem_set_wedged(i915);
- i915_retire_requests(i915);
- goto finish;
-}
-
-static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv,
- struct intel_engine_cs *engine)
-{
- return intel_gpu_reset(dev_priv, intel_engine_flag(engine));
-}
-
-/**
- * i915_reset_engine - reset GPU engine to recover from a hang
- * @engine: engine to reset
- * @msg: reason for GPU reset; or NULL for no dev_notice()
- *
- * Reset a specific GPU engine. Useful if a hang is detected.
- * Returns zero on successful reset or otherwise an error code.
- *
- * Procedure is:
- * - identifies the request that caused the hang and it is dropped
- * - reset engine (which will force the engine to idle)
- * - re-init/configure engine
- */
-int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
-{
- struct i915_gpu_error *error = &engine->i915->gpu_error;
- struct i915_request *active_request;
- int ret;
-
- GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
- GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
-
- active_request = i915_gem_reset_prepare_engine(engine);
- if (IS_ERR_OR_NULL(active_request)) {
- /* Either the previous reset failed, or we pardon the reset. */
- ret = PTR_ERR(active_request);
- goto out;
- }
-
- if (msg)
- dev_notice(engine->i915->drm.dev,
- "Resetting %s for %s\n", engine->name, msg);
- error->reset_engine_count[engine->id]++;
-
- if (!engine->i915->guc.execbuf_client)
- ret = intel_gt_reset_engine(engine->i915, engine);
- else
- ret = intel_guc_reset_engine(&engine->i915->guc, engine);
- if (ret) {
- /* If we fail here, we expect to fallback to a global reset */
- DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
- engine->i915->guc.execbuf_client ? "GuC " : "",
- engine->name, ret);
- goto out;
- }
-
- /*
- * The request that caused the hang is stuck on elsp, we know the
- * active request and can drop it, adjust head to skip the offending
- * request to resume executing remaining requests in the queue.
- */
- i915_gem_reset_engine(engine, active_request, true);
-
- /*
- * The engine and its registers (and workarounds in case of render)
- * have been reset to their default values. Follow the init_ring
- * process to program RING_MODE, HWSP and re-enable submission.
- */
- ret = engine->init_hw(engine);
- if (ret)
- goto out;
-
-out:
- intel_engine_cancel_stop_cs(engine);
- i915_gem_reset_finish_engine(engine);
- return ret;
-}
-
static int i915_pm_prepare(struct device *kdev)
{
struct pci_dev *pdev = to_pci_dev(kdev);