summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2017-08-11 12:11:16 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2017-08-15 15:46:57 +0100
commitb8f55be64453ea77fc51bff6cd0d906d18ce1cd2 (patch)
tree4027cbcc167720619217458cf7a7213913f6dc81 /drivers/gpu/drm/i915/i915_gem.c
parent9c3a16c887f0f8f62813d841f028eabc153581f3 (diff)
drm/i915: Split obj->cache_coherent to track r/w
Another month, another story in the cache coherency saga. This time, we come to the realisation that i915_gem_object_is_coherent() has been reporting whether we can read from the target without requiring a cache invalidate; but we were using it in places for testing whether we could write into the object without requiring a cache flush. So split the tracking into two, one to decide before reads, one after writes. See commit e27ab73d17ef ("drm/i915: Mark CPU cache as dirty on every transition for CPU writes") for the previous entry in this saga. v2: Be verbose v3: Remove unused function (i915_gem_object_is_coherent) v4: Fix inverted coherency check prior to execbuf (from v2) v5: Add comment for nasty code where we are optimising on gcc's behalf. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101109 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101555 Testcase: igt/kms_mmap_write_crc Testcase: igt/kms_pwrite_crc Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Matt Roper <matthew.d.roper@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Tested-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20170811111116.10373-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c25
1 files changed, 13 insertions, 12 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 000a764ee8d9..887fff281f4e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
if (obj->cache_dirty)
return false;
- if (!obj->cache_coherent)
+ if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
return true;
return obj->pin_display;
@@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
if (needs_clflush &&
(obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
- !obj->cache_coherent)
+ !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
drm_clflush_sg(pages);
__start_cpu_write(obj);
@@ -800,7 +800,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
+ !static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, false);
if (ret)
goto err_unpin;
@@ -852,7 +853,8 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
+ !static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, true);
if (ret)
goto err_unpin;
@@ -3673,8 +3675,7 @@ restart:
list_for_each_entry(vma, &obj->vma_list, obj_link)
vma->node.color = cache_level;
- obj->cache_level = cache_level;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
+ i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_dirty = true; /* Always invalidate stale cachelines */
return 0;
@@ -4279,6 +4280,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
{
struct drm_i915_gem_object *obj;
struct address_space *mapping;
+ unsigned int cache_level;
gfp_t mask;
int ret;
@@ -4317,7 +4319,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- if (HAS_LLC(dev_priv)) {
+ if (HAS_LLC(dev_priv))
/* On some devices, we can have the GPU use the LLC (the CPU
* cache) for about a 10% performance improvement
* compared to uncached. Graphics requests other than
@@ -4330,12 +4332,11 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
* However, we maintain the display planes as UC, and so
* need to rebind when first used as such.
*/
- obj->cache_level = I915_CACHE_LLC;
- } else
- obj->cache_level = I915_CACHE_NONE;
+ cache_level = I915_CACHE_LLC;
+ else
+ cache_level = I915_CACHE_NONE;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
trace_i915_gem_object_create(obj);