summaryrefslogtreecommitdiff
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c71
1 files changed, 68 insertions, 3 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index ced14f1af6dc..c570f82e6827 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -605,6 +605,13 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
tlb_ubc->flush_required = true;
/*
+ * Ensure compiler does not re-order the setting of tlb_flush_batched
+ * before the PTE is cleared.
+ */
+ barrier();
+ mm->tlb_flush_batched = true;
+
+ /*
* If the PTE was dirty then it's best to assume it's writable. The
* caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
* before the page is queued for IO.
@@ -631,6 +638,35 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
return should_defer;
}
+
+/*
+ * Reclaim unmaps pages under the PTL but do not flush the TLB prior to
+ * releasing the PTL if TLB flushes are batched. It's possible for a parallel
+ * operation such as mprotect or munmap to race between reclaim unmapping
+ * the page and flushing the page. If this race occurs, it potentially allows
+ * access to data via a stale TLB entry. Tracking all mm's that have TLB
+ * batching in flight would be expensive during reclaim so instead track
+ * whether TLB batching occurred in the past and if so then do a flush here
+ * if required. This will cost one additional flush per reclaim cycle paid
+ * by the first operation at risk such as mprotect and mumap.
+ *
+ * This must be called under the PTL so that an access to tlb_flush_batched
+ * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
+ * via the PTL.
+ */
+void flush_tlb_batched_pending(struct mm_struct *mm)
+{
+ if (mm->tlb_flush_batched) {
+ flush_tlb_mm(mm);
+
+ /*
+ * Do not allow the compiler to re-order the clearing of
+ * tlb_flush_batched before the tlb is flushed.
+ */
+ barrier();
+ mm->tlb_flush_batched = false;
+ }
+}
#else
static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
{
@@ -851,11 +887,21 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
.address = address,
.flags = PVMW_SYNC,
};
+ unsigned long start = address, end;
int *cleaned = arg;
+ /*
+ * We have to assume the worse case ie pmd for invalidation. Note that
+ * the page can not be free from this function.
+ */
+ end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
+ mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
+
while (page_vma_mapped_walk(&pvmw)) {
+ unsigned long cstart, cend;
int ret = 0;
- address = pvmw.address;
+
+ cstart = address = pvmw.address;
if (pvmw.pte) {
pte_t entry;
pte_t *pte = pvmw.pte;
@@ -868,6 +914,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
entry = pte_wrprotect(entry);
entry = pte_mkclean(entry);
set_pte_at(vma->vm_mm, address, pte, entry);
+ cend = cstart + PAGE_SIZE;
ret = 1;
} else {
#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
@@ -882,6 +929,8 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
entry = pmd_wrprotect(entry);
entry = pmd_mkclean(entry);
set_pmd_at(vma->vm_mm, address, pmd, entry);
+ cstart &= PMD_MASK;
+ cend = cstart + PMD_SIZE;
ret = 1;
#else
/* unexpected pmd-mapped page? */
@@ -890,11 +939,13 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
}
if (ret) {
- mmu_notifier_invalidate_page(vma->vm_mm, address);
+ mmu_notifier_invalidate_range(vma->vm_mm, cstart, cend);
(*cleaned)++;
}
}
+ mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
+
return true;
}
@@ -1288,6 +1339,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
pte_t pteval;
struct page *subpage;
bool ret = true;
+ unsigned long start = address, end;
enum ttu_flags flags = (enum ttu_flags)arg;
/* munlock has nothing to gain from examining un-locked vmas */
@@ -1299,6 +1351,14 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
flags & TTU_MIGRATION, page);
}
+ /*
+ * We have to assume the worse case ie pmd for invalidation. Note that
+ * the page can not be free in this function as call of try_to_unmap()
+ * must hold a reference on the page.
+ */
+ end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
+ mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
+
while (page_vma_mapped_walk(&pvmw)) {
/*
* If the page is mlock()d, we cannot swap it out.
@@ -1409,6 +1469,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) {
WARN_ON_ONCE(1);
ret = false;
+ /* We have to invalidate as we cleared the pte */
page_vma_mapped_walk_done(&pvmw);
break;
}
@@ -1454,8 +1515,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
discard:
page_remove_rmap(subpage, PageHuge(page));
put_page(page);
- mmu_notifier_invalidate_page(mm, address);
+ mmu_notifier_invalidate_range(mm, address,
+ address + PAGE_SIZE);
}
+
+ mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
+
return ret;
}