From e5eb3d63c6182d3f21fbfc836ded748d49d521f9 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 3 May 2012 14:48:16 +0200 Subject: [PATCH] drm/i915: add interface to simulate gpu hangs gpu reset is a very important piece of our infrastructure. Unfortunately we only really it test by actually hanging the gpu, which often has bad side-effects for the entire system. And the gpu hang handling code is one of the rather complicated pieces of code we have, consisting of - hang detection - error capture - actual gpu reset - reset of all the gem bookkeeping - reinitialition of the entire gpu This patch adds a debugfs to selectively stopping rings by ceasing to update the hw tail pointer, which will result in the gpu no longer updating it's head pointer and eventually to the hangcheck firing. This way we can exercise the gpu hang code under controlled conditions without a dying gpu taking down the entire systems. Patch motivated by me forgetting to properly reinitialize ppgtt after a gpu reset. Usage: echo $((1 << $ringnum)) > i915_ring_stop # stops one ring echo 0xffffffff > i915_ring_stop # stops all, future-proof version then run whatever testload is desired. i915_ring_stop automatically resets after a gpu hang is detected to avoid hanging the gpu to fast and declaring it wedged. v2: Incorporate feedback from Chris Wilson. v3: Add the missing cleanup. v4: Fix up inconsistent size of ring_stop_read vs _write, noticed by Eugeni Dodonov. Reviewed-by: Chris Wilson Reviewed-by: Eugeni Dodonov Signed-Off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 65 +++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.c | 2 + drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++ 4 files changed, 73 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ae68ac1c488..192b27e9046 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1585,6 +1585,64 @@ static const struct file_operations i915_wedged_fops = { .llseek = default_llseek, }; +static ssize_t +i915_ring_stop_read(struct file *filp, + char __user *ubuf, + size_t max, + loff_t *ppos) +{ + struct drm_device *dev = filp->private_data; + drm_i915_private_t *dev_priv = dev->dev_private; + char buf[20]; + int len; + + len = snprintf(buf, sizeof(buf), + "0x%08x\n", dev_priv->stop_rings); + + if (len > sizeof(buf)) + len = sizeof(buf); + + return simple_read_from_buffer(ubuf, max, ppos, buf, len); +} + +static ssize_t +i915_ring_stop_write(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + struct drm_device *dev = filp->private_data; + struct drm_i915_private *dev_priv = dev->dev_private; + char buf[20]; + int val = 0; + + if (cnt > 0) { + if (cnt > sizeof(buf) - 1) + return -EINVAL; + + if (copy_from_user(buf, ubuf, cnt)) + return -EFAULT; + buf[cnt] = 0; + + val = simple_strtoul(buf, NULL, 0); + } + + DRM_DEBUG_DRIVER("Stopping rings 0x%08x\n", val); + + mutex_lock(&dev->struct_mutex); + dev_priv->stop_rings = val; + mutex_unlock(&dev->struct_mutex); + + return cnt; +} + +static const struct file_operations i915_ring_stop_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = i915_ring_stop_read, + .write = i915_ring_stop_write, + .llseek = default_llseek, +}; static ssize_t i915_max_freq_read(struct file *filp, char __user *ubuf, @@ -1885,6 +1943,11 @@ int i915_debugfs_init(struct drm_minor *minor) &i915_cache_sharing_fops); if (ret) return ret; + ret = i915_debugfs_create(minor->debugfs_root, minor, + "i915_ring_stop", + &i915_ring_stop_fops); + if (ret) + return ret; return drm_debugfs_create_files(i915_debugfs_list, I915_DEBUGFS_ENTRIES, @@ -1903,6 +1966,8 @@ void i915_debugfs_cleanup(struct drm_minor *minor) 1, minor); drm_debugfs_remove_files((struct drm_info_list *) &i915_cache_sharing_fops, 1, minor); + drm_debugfs_remove_files((struct drm_info_list *) &i915_ring_stop_fops, + 1, minor); } #endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 8a98f9a1641..90a84f9de8e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -800,6 +800,8 @@ int i915_reset(struct drm_device *dev, u8 flags) if (!mutex_trylock(&dev->struct_mutex)) return -EBUSY; + dev_priv->stop_rings = 0; + i915_gem_reset(dev); ret = -ENODEV; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 00a20e985d2..090ec20293f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -412,6 +412,8 @@ typedef struct drm_i915_private { uint32_t last_instdone; uint32_t last_instdone1; + unsigned int stop_rings; + unsigned long cfb_size; unsigned int cfb_fb; enum plane cfb_plane; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 38096080a3d..3aabe8dfe5c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1210,7 +1210,11 @@ int intel_ring_begin(struct intel_ring_buffer *ring, void intel_ring_advance(struct intel_ring_buffer *ring) { + struct drm_i915_private *dev_priv = ring->dev->dev_private; + ring->tail &= ring->size - 1; + if (dev_priv->stop_rings & intel_ring_flag(ring)) + return; ring->write_tail(ring, ring->tail); } -- 2.41.0