diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2012-10-09 19:24:37 +0100 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2013-01-20 13:11:16 +0100 |
commit | d0a57789d5ec807fc218151b2fb2de4da30fbef5 (patch) | |
tree | 7725177c02c17a8df63ac146322b5d80411e60c7 /drivers | |
parent | 1f83fee08d625f8d0130f9fe5ef7b17c2e022f3c (diff) |
drm/i915: Only insert the mb() before updating the fence parameter
With a fence, we only need to insert a memory barrier around the actual
fence alteration for CPU accesses through the GTT. Performing the
barrier in flush-fence was inserting unnecessary and expensive barriers
for never fenced objects.
Note removing the barriers from flush-fence, which was effectively a
barrier before every direct access through the GTT, revealed that we
where missing a barrier before the first access through the GTT. Lack of
that barrier was sufficient to cause GPU hangs.
v2: Add a couple more comments to explain the new barriers
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 40 |
1 files changed, 30 insertions, 10 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2ca901194824..ce706555d011 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2611,9 +2611,22 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg, POSTING_READ(FENCE_REG_830_0 + reg * 4); } +inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) +{ + return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; +} + static void i915_gem_write_fence(struct drm_device *dev, int reg, struct drm_i915_gem_object *obj) { + struct drm_i915_private *dev_priv = dev->dev_private; + + /* Ensure that all CPU reads are completed before installing a fence + * and all writes before removing the fence. + */ + if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) + mb(); + switch (INTEL_INFO(dev)->gen) { case 7: case 6: @@ -2623,6 +2636,12 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg, case 2: i830_write_fence_reg(dev, reg, obj); break; default: BUG(); } + + /* And similarly be paranoid that no direct access to this region + * is reordered to before the fence is installed. + */ + if (i915_gem_object_needs_mb(obj)) + mb(); } static inline int fence_number(struct drm_i915_private *dev_priv, @@ -2652,7 +2671,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, } static int -i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) +i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) { if (obj->last_fenced_seqno) { int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); @@ -2662,12 +2681,6 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) obj->last_fenced_seqno = 0; } - /* Ensure that all CPU reads are completed before installing a fence - * and all writes before removing the fence. - */ - if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) - mb(); - obj->fenced_gpu_access = false; return 0; } @@ -2678,7 +2691,7 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj) struct drm_i915_private *dev_priv = obj->base.dev->dev_private; int ret; - ret = i915_gem_object_flush_fence(obj); + ret = i915_gem_object_wait_fence(obj); if (ret) return ret; @@ -2752,7 +2765,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) * will need to serialise the write to the associated fence register? */ if (obj->fence_dirty) { - ret = i915_gem_object_flush_fence(obj); + ret = i915_gem_object_wait_fence(obj); if (ret) return ret; } @@ -2773,7 +2786,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) if (reg->obj) { struct drm_i915_gem_object *old = reg->obj; - ret = i915_gem_object_flush_fence(old); + ret = i915_gem_object_wait_fence(old); if (ret) return ret; @@ -3068,6 +3081,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) i915_gem_object_flush_cpu_write_domain(obj); + /* Serialise direct access to this object with the barriers for + * coherent writes from the GPU, by effectively invalidating the + * GTT domain upon first access. + */ + if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) + mb(); + old_write_domain = obj->base.write_domain; old_read_domains = obj->base.read_domains; |