diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 87 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 43 | ||||
-rw-r--r-- | mm/vmstat.c | 3 | ||||
-rw-r--r-- | mm/z3fold.c | 9 | ||||
-rw-r--r-- | mm/zsmalloc.c | 2 |
6 files changed, 89 insertions, 57 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index fef4cf210cc7..f3c4f9d22821 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1568,8 +1568,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, deactivate_page(page); if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) { - orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd, - tlb->fullmm); + pmdp_invalidate(vma, addr, pmd); orig_pmd = pmd_mkold(orig_pmd); orig_pmd = pmd_mkclean(orig_pmd); @@ -1724,37 +1723,69 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, { struct mm_struct *mm = vma->vm_mm; spinlock_t *ptl; - int ret = 0; + pmd_t entry; + bool preserve_write; + int ret; ptl = __pmd_trans_huge_lock(pmd, vma); - if (ptl) { - pmd_t entry; - bool preserve_write = prot_numa && pmd_write(*pmd); - ret = 1; + if (!ptl) + return 0; - /* - * Avoid trapping faults against the zero page. The read-only - * data is likely to be read-cached on the local CPU and - * local/remote hits to the zero page are not interesting. - */ - if (prot_numa && is_huge_zero_pmd(*pmd)) { - spin_unlock(ptl); - return ret; - } + preserve_write = prot_numa && pmd_write(*pmd); + ret = 1; - if (!prot_numa || !pmd_protnone(*pmd)) { - entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd); - entry = pmd_modify(entry, newprot); - if (preserve_write) - entry = pmd_mk_savedwrite(entry); - ret = HPAGE_PMD_NR; - set_pmd_at(mm, addr, pmd, entry); - BUG_ON(vma_is_anonymous(vma) && !preserve_write && - pmd_write(entry)); - } - spin_unlock(ptl); - } + /* + * Avoid trapping faults against the zero page. The read-only + * data is likely to be read-cached on the local CPU and + * local/remote hits to the zero page are not interesting. + */ + if (prot_numa && is_huge_zero_pmd(*pmd)) + goto unlock; + + if (prot_numa && pmd_protnone(*pmd)) + goto unlock; + + /* + * In case prot_numa, we are under down_read(mmap_sem). It's critical + * to not clear pmd intermittently to avoid race with MADV_DONTNEED + * which is also under down_read(mmap_sem): + * + * CPU0: CPU1: + * change_huge_pmd(prot_numa=1) + * pmdp_huge_get_and_clear_notify() + * madvise_dontneed() + * zap_pmd_range() + * pmd_trans_huge(*pmd) == 0 (without ptl) + * // skip the pmd + * set_pmd_at(); + * // pmd is re-established + * + * The race makes MADV_DONTNEED miss the huge pmd and don't clear it + * which may break userspace. + * + * pmdp_invalidate() is required to make sure we don't miss + * dirty/young flags set by hardware. + */ + entry = *pmd; + pmdp_invalidate(vma, addr, pmd); + + /* + * Recover dirty/young flags. It relies on pmdp_invalidate to not + * corrupt them. + */ + if (pmd_dirty(*pmd)) + entry = pmd_mkdirty(entry); + if (pmd_young(*pmd)) + entry = pmd_mkyoung(entry); + entry = pmd_modify(entry, newprot); + if (preserve_write) + entry = pmd_mk_savedwrite(entry); + ret = HPAGE_PMD_NR; + set_pmd_at(mm, addr, pmd, entry); + BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry)); +unlock: + spin_unlock(ptl); return ret; } diff --git a/mm/migrate.c b/mm/migrate.c index ed97c2c14fa8..738f1d5f8350 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -184,9 +184,9 @@ void putback_movable_pages(struct list_head *l) unlock_page(page); put_page(page); } else { - putback_lru_page(page); dec_node_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); + putback_lru_page(page); } } } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f3d603cef2c0..07efbc3a8656 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1090,10 +1090,10 @@ static void free_pcppages_bulk(struct zone *zone, int count, { int migratetype = 0; int batch_free = 0; - unsigned long nr_scanned, flags; + unsigned long nr_scanned; bool isolated_pageblocks; - spin_lock_irqsave(&zone->lock, flags); + spin_lock(&zone->lock); isolated_pageblocks = has_isolate_pageblock(zone); nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); if (nr_scanned) @@ -1142,7 +1142,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, trace_mm_page_pcpu_drain(page, 0, mt); } while (--count && --batch_free && !list_empty(list)); } - spin_unlock_irqrestore(&zone->lock, flags); + spin_unlock(&zone->lock); } static void free_one_page(struct zone *zone, @@ -1150,9 +1150,8 @@ static void free_one_page(struct zone *zone, unsigned int order, int migratetype) { - unsigned long nr_scanned, flags; - spin_lock_irqsave(&zone->lock, flags); - __count_vm_events(PGFREE, 1 << order); + unsigned long nr_scanned; + spin_lock(&zone->lock); nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); if (nr_scanned) __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); @@ -1162,7 +1161,7 @@ static void free_one_page(struct zone *zone, migratetype = get_pfnblock_migratetype(page, pfn); } __free_one_page(page, pfn, zone, order, migratetype); - spin_unlock_irqrestore(&zone->lock, flags); + spin_unlock(&zone->lock); } static void __meminit __init_single_page(struct page *page, unsigned long pfn, @@ -1240,6 +1239,7 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end) static void __free_pages_ok(struct page *page, unsigned int order) { + unsigned long flags; int migratetype; unsigned long pfn = page_to_pfn(page); @@ -1247,7 +1247,10 @@ static void __free_pages_ok(struct page *page, unsigned int order) return; migratetype = get_pfnblock_migratetype(page, pfn); + local_irq_save(flags); + __count_vm_events(PGFREE, 1 << order); free_one_page(page_zone(page), page, pfn, order, migratetype); + local_irq_restore(flags); } static void __init __free_pages_boot_core(struct page *page, unsigned int order) @@ -2219,9 +2222,8 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, int migratetype, bool cold) { int i, alloced = 0; - unsigned long flags; - spin_lock_irqsave(&zone->lock, flags); + spin_lock(&zone->lock); for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype); if (unlikely(page == NULL)) @@ -2257,7 +2259,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * pages added to the pcp list. */ __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); - spin_unlock_irqrestore(&zone->lock, flags); + spin_unlock(&zone->lock); return alloced; } @@ -2485,20 +2487,17 @@ void free_hot_cold_page(struct page *page, bool cold) { struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; + unsigned long flags; unsigned long pfn = page_to_pfn(page); int migratetype; - if (in_interrupt()) { - __free_pages_ok(page, 0); - return; - } - if (!free_pcp_prepare(page)) return; migratetype = get_pfnblock_migratetype(page, pfn); set_pcppage_migratetype(page, migratetype); - preempt_disable(); + local_irq_save(flags); + __count_vm_event(PGFREE); /* * We only track unmovable, reclaimable and movable on pcp lists. @@ -2515,7 +2514,6 @@ void free_hot_cold_page(struct page *page, bool cold) migratetype = MIGRATE_MOVABLE; } - __count_vm_event(PGFREE); pcp = &this_cpu_ptr(zone->pageset)->pcp; if (!cold) list_add(&page->lru, &pcp->lists[migratetype]); @@ -2529,7 +2527,7 @@ void free_hot_cold_page(struct page *page, bool cold) } out: - preempt_enable(); + local_irq_restore(flags); } /* @@ -2654,8 +2652,6 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, { struct page *page; - VM_BUG_ON(in_interrupt()); - do { if (list_empty(list)) { pcp->count += rmqueue_bulk(zone, 0, @@ -2686,8 +2682,9 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, struct list_head *list; bool cold = ((gfp_flags & __GFP_COLD) != 0); struct page *page; + unsigned long flags; - preempt_disable(); + local_irq_save(flags); pcp = &this_cpu_ptr(zone->pageset)->pcp; list = &pcp->lists[migratetype]; page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list); @@ -2695,7 +2692,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); zone_statistics(preferred_zone, zone); } - preempt_enable(); + local_irq_restore(flags); return page; } @@ -2711,7 +2708,7 @@ struct page *rmqueue(struct zone *preferred_zone, unsigned long flags; struct page *page; - if (likely(order == 0) && !in_interrupt()) { + if (likely(order == 0)) { page = rmqueue_pcplist(preferred_zone, zone, order, gfp_flags, migratetype); goto out; diff --git a/mm/vmstat.c b/mm/vmstat.c index 809025ed97ea..5a4f5c5a31e8 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1768,8 +1768,7 @@ void __init init_mm_internals(void) { int ret __maybe_unused; - mm_percpu_wq = alloc_workqueue("mm_percpu_wq", - WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0); #ifdef CONFIG_SMP ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead", diff --git a/mm/z3fold.c b/mm/z3fold.c index f9492bccfd79..54f63c4a809a 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -185,6 +185,12 @@ static inline void z3fold_page_lock(struct z3fold_header *zhdr) spin_lock(&zhdr->page_lock); } +/* Try to lock a z3fold page */ +static inline int z3fold_page_trylock(struct z3fold_header *zhdr) +{ + return spin_trylock(&zhdr->page_lock); +} + /* Unlock a z3fold page */ static inline void z3fold_page_unlock(struct z3fold_header *zhdr) { @@ -385,7 +391,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, spin_lock(&pool->lock); zhdr = list_first_entry_or_null(&pool->unbuddied[i], struct z3fold_header, buddy); - if (!zhdr) { + if (!zhdr || !z3fold_page_trylock(zhdr)) { spin_unlock(&pool->lock); continue; } @@ -394,7 +400,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, spin_unlock(&pool->lock); page = virt_to_page(zhdr); - z3fold_page_lock(zhdr); if (zhdr->first_chunks == 0) { if (zhdr->middle_chunks != 0 && chunks >= zhdr->start_middle) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index b7ee9c34dbd6..d41edd28298b 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -276,7 +276,7 @@ struct zs_pool { struct zspage { struct { unsigned int fullness:FULLNESS_BITS; - unsigned int class:CLASS_BITS; + unsigned int class:CLASS_BITS + 1; unsigned int isolated:ISOLATED_BITS; unsigned int magic:MAGIC_VAL_BITS; }; |