drm/i915: Pull all the reset functionality together into i915_reset.c

Currently the code to reset the GPU and our state is spread widely across a few files. Pull the logic together into a common file. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Acked-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190116153304.787-1-chris@chris-wilson.co.uk
author: Chris Wilson <chris@chris-wilson.co.uk> 2019-01-16 15:33:04 +0000
committer: Chris Wilson <chris@chris-wilson.co.uk> 2019-01-16 22:45:31 +0000
commit: 9f58892ea9962002399132fd3f40c6a273f8d9e1 (patch)
tree: 7721b59cd6f41c2d184e7247672f67131c7c3f65 /drivers/gpu/drm/i915/i915_drv.c
parent: 18bb2bccb5492fb5c36908191b8af77e54c58814 (diff)
1 files changed, 1 insertions, 205 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index dafbbfadd1ad..f462a4d28af4 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -48,6 +48,7 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "i915_pmu.h"
+#include "i915_reset.h"
 #include "i915_query.h"
 #include "i915_vgpu.h"
 #include "intel_drv.h"
@@ -2205,211 +2206,6 @@ static int i915_resume_switcheroo(struct drm_device *dev)
 	return i915_drm_resume(dev);
 }
 
-/**
- * i915_reset - reset chip after a hang
- * @i915: #drm_i915_private to reset
- * @stalled_mask: mask of the stalled engines with the guilty requests
- * @reason: user error message for why we are resetting
- *
- * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
- * on failure.
- *
- * Caller must hold the struct_mutex.
- *
- * Procedure is fairly simple:
- *   - reset the chip using the reset reg
- *   - re-init context state
- *   - re-init hardware status page
- *   - re-init ring buffer
- *   - re-init interrupt state
- *   - re-init display
- */
-void i915_reset(struct drm_i915_private *i915,
-		unsigned int stalled_mask,
-		const char *reason)
-{
-	struct i915_gpu_error *error = &i915->gpu_error;
-	int ret;
-	int i;
-
-	GEM_TRACE("flags=%lx\n", error->flags);
-
-	might_sleep();
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	assert_rpm_wakelock_held(i915);
-	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
-
-	if (!test_bit(I915_RESET_HANDOFF, &error->flags))
-		return;
-
-	/* Clear any previous failed attempts at recovery. Time to try again. */
-	if (!i915_gem_unset_wedged(i915))
-		goto wakeup;
-
-	if (reason)
-		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
-	error->reset_count++;
-
-	ret = i915_gem_reset_prepare(i915);
-	if (ret) {
-		dev_err(i915->drm.dev, "GPU recovery failed\n");
-		goto taint;
-	}
-
-	if (!intel_has_gpu_reset(i915)) {
-		if (i915_modparams.reset)
-			dev_err(i915->drm.dev, "GPU reset not supported\n");
-		else
-			DRM_DEBUG_DRIVER("GPU reset disabled\n");
-		goto error;
-	}
-
-	for (i = 0; i < 3; i++) {
-		ret = intel_gpu_reset(i915, ALL_ENGINES);
-		if (ret == 0)
-			break;
-
-		msleep(100);
-	}
-	if (ret) {
-		dev_err(i915->drm.dev, "Failed to reset chip\n");
-		goto taint;
-	}
-
-	/* Ok, now get things going again... */
-
-	/*
-	 * Everything depends on having the GTT running, so we need to start
-	 * there.
-	 */
-	ret = i915_ggtt_enable_hw(i915);
-	if (ret) {
-		DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n",
-			  ret);
-		goto error;
-	}
-
-	i915_gem_reset(i915, stalled_mask);
-	intel_overlay_reset(i915);
-
-	/*
-	 * Next we need to restore the context, but we don't use those
-	 * yet either...
-	 *
-	 * Ring buffer needs to be re-initialized in the KMS case, or if X
-	 * was running at the time of the reset (i.e. we weren't VT
-	 * switched away).
-	 */
-	ret = i915_gem_init_hw(i915);
-	if (ret) {
-		DRM_ERROR("Failed to initialise HW following reset (%d)\n",
-			  ret);
-		goto error;
-	}
-
-	i915_queue_hangcheck(i915);
-
-finish:
-	i915_gem_reset_finish(i915);
-wakeup:
-	clear_bit(I915_RESET_HANDOFF, &error->flags);
-	wake_up_bit(&error->flags, I915_RESET_HANDOFF);
-	return;
-
-taint:
-	/*
-	 * History tells us that if we cannot reset the GPU now, we
-	 * never will. This then impacts everything that is run
-	 * subsequently. On failing the reset, we mark the driver
-	 * as wedged, preventing further execution on the GPU.
-	 * We also want to go one step further and add a taint to the
-	 * kernel so that any subsequent faults can be traced back to
-	 * this failure. This is important for CI, where if the
-	 * GPU/driver fails we would like to reboot and restart testing
-	 * rather than continue on into oblivion. For everyone else,
-	 * the system should still plod along, but they have been warned!
-	 */
-	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-error:
-	i915_gem_set_wedged(i915);
-	i915_retire_requests(i915);
-	goto finish;
-}
-
-static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv,
-					struct intel_engine_cs *engine)
-{
-	return intel_gpu_reset(dev_priv, intel_engine_flag(engine));
-}
-
-/**
- * i915_reset_engine - reset GPU engine to recover from a hang
- * @engine: engine to reset
- * @msg: reason for GPU reset; or NULL for no dev_notice()
- *
- * Reset a specific GPU engine. Useful if a hang is detected.
- * Returns zero on successful reset or otherwise an error code.
- *
- * Procedure is:
- *  - identifies the request that caused the hang and it is dropped
- *  - reset engine (which will force the engine to idle)
- *  - re-init/configure engine
- */
-int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
-{
-	struct i915_gpu_error *error = &engine->i915->gpu_error;
-	struct i915_request *active_request;
-	int ret;
-
-	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
-	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
-
-	active_request = i915_gem_reset_prepare_engine(engine);
-	if (IS_ERR_OR_NULL(active_request)) {
-		/* Either the previous reset failed, or we pardon the reset. */
-		ret = PTR_ERR(active_request);
-		goto out;
-	}
-
-	if (msg)
-		dev_notice(engine->i915->drm.dev,
-			   "Resetting %s for %s\n", engine->name, msg);
-	error->reset_engine_count[engine->id]++;
-
-	if (!engine->i915->guc.execbuf_client)
-		ret = intel_gt_reset_engine(engine->i915, engine);
-	else
-		ret = intel_guc_reset_engine(&engine->i915->guc, engine);
-	if (ret) {
-		/* If we fail here, we expect to fallback to a global reset */
-		DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
-				 engine->i915->guc.execbuf_client ? "GuC " : "",
-				 engine->name, ret);
-		goto out;
-	}
-
-	/*
-	 * The request that caused the hang is stuck on elsp, we know the
-	 * active request and can drop it, adjust head to skip the offending
-	 * request to resume executing remaining requests in the queue.
-	 */
-	i915_gem_reset_engine(engine, active_request, true);
-
-	/*
-	 * The engine and its registers (and workarounds in case of render)
-	 * have been reset to their default values. Follow the init_ring
-	 * process to program RING_MODE, HWSP and re-enable submission.
-	 */
-	ret = engine->init_hw(engine);
-	if (ret)
-		goto out;
-
-out:
-	intel_engine_cancel_stop_cs(engine);
-	i915_gem_reset_finish_engine(engine);
-	return ret;
-}
-
 static int i915_pm_prepare(struct device *kdev)
 {
 	struct pci_dev *pdev = to_pci_dev(kdev);
author	Chris Wilson <chris@chris-wilson.co.uk>	2019-01-16 15:33:04 +0000
committer	Chris Wilson <chris@chris-wilson.co.uk>	2019-01-16 22:45:31 +0000
commit	9f58892ea9962002399132fd3f40c6a273f8d9e1 (patch)
tree	7721b59cd6f41c2d184e7247672f67131c7c3f65 /drivers/gpu/drm/i915/i915_drv.c
parent	18bb2bccb5492fb5c36908191b8af77e54c58814 (diff)