diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2017-08-11 12:11:16 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2017-08-15 15:46:57 +0100 |
commit | b8f55be64453ea77fc51bff6cd0d906d18ce1cd2 (patch) | |
tree | 4027cbcc167720619217458cf7a7213913f6dc81 /drivers/gpu/drm/i915/i915_gem.c | |
parent | 9c3a16c887f0f8f62813d841f028eabc153581f3 (diff) |
drm/i915: Split obj->cache_coherent to track r/w
Another month, another story in the cache coherency saga. This time, we
come to the realisation that i915_gem_object_is_coherent() has been
reporting whether we can read from the target without requiring a cache
invalidate; but we were using it in places for testing whether we could
write into the object without requiring a cache flush. So split the
tracking into two, one to decide before reads, one after writes.
See commit e27ab73d17ef ("drm/i915: Mark CPU cache as dirty on every
transition for CPU writes") for the previous entry in this saga.
v2: Be verbose
v3: Remove unused function (i915_gem_object_is_coherent)
v4: Fix inverted coherency check prior to execbuf (from v2)
v5: Add comment for nasty code where we are optimising on gcc's behalf.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101109
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101555
Testcase: igt/kms_mmap_write_crc
Testcase: igt/kms_pwrite_crc
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Tested-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170811111116.10373-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 25 |
1 files changed, 13 insertions, 12 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 000a764ee8d9..887fff281f4e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) if (obj->cache_dirty) return false; - if (!obj->cache_coherent) + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) return true; return obj->pin_display; @@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, if (needs_clflush && (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && - !obj->cache_coherent) + !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) drm_clflush_sg(pages); __start_cpu_write(obj); @@ -800,7 +800,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, if (ret) return ret; - if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) { + if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || + !static_cpu_has(X86_FEATURE_CLFLUSH)) { ret = i915_gem_object_set_to_cpu_domain(obj, false); if (ret) goto err_unpin; @@ -852,7 +853,8 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, if (ret) return ret; - if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) { + if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || + !static_cpu_has(X86_FEATURE_CLFLUSH)) { ret = i915_gem_object_set_to_cpu_domain(obj, true); if (ret) goto err_unpin; @@ -3673,8 +3675,7 @@ restart: list_for_each_entry(vma, &obj->vma_list, obj_link) vma->node.color = cache_level; - obj->cache_level = cache_level; - obj->cache_coherent = i915_gem_object_is_coherent(obj); + i915_gem_object_set_cache_coherency(obj, cache_level); obj->cache_dirty = true; /* Always invalidate stale cachelines */ return 0; @@ -4279,6 +4280,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) { struct drm_i915_gem_object *obj; struct address_space *mapping; + unsigned int cache_level; gfp_t mask; int ret; @@ -4317,7 +4319,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU; - if (HAS_LLC(dev_priv)) { + if (HAS_LLC(dev_priv)) /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than @@ -4330,12 +4332,11 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) * However, we maintain the display planes as UC, and so * need to rebind when first used as such. */ - obj->cache_level = I915_CACHE_LLC; - } else - obj->cache_level = I915_CACHE_NONE; + cache_level = I915_CACHE_LLC; + else + cache_level = I915_CACHE_NONE; - obj->cache_coherent = i915_gem_object_is_coherent(obj); - obj->cache_dirty = !obj->cache_coherent; + i915_gem_object_set_cache_coherency(obj, cache_level); trace_i915_gem_object_create(obj); |