summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-alpha/barrier.h2
-rw-r--r--include/asm-alpha/rwsem.h5
-rw-r--r--include/asm-arm/tlb.h23
-rw-r--r--include/asm-arm26/tlb.h47
-rw-r--r--include/asm-generic/4level-fixup.h11
-rw-r--r--include/asm-generic/pgtable.h2
-rw-r--r--include/asm-generic/tlb.h23
-rw-r--r--include/asm-i386/mmzone.h6
-rw-r--r--include/asm-i386/pgtable.h3
-rw-r--r--include/asm-i386/rwsem.h5
-rw-r--r--include/asm-ia64/rwsem.h5
-rw-r--r--include/asm-ia64/tlb.h19
-rw-r--r--include/asm-m32r/mmzone.h6
-rw-r--r--include/asm-parisc/cacheflush.h35
-rw-r--r--include/asm-parisc/mmzone.h6
-rw-r--r--include/asm-parisc/tlbflush.h3
-rw-r--r--include/asm-ppc/rwsem.h5
-rw-r--r--include/asm-ppc64/mmzone.h3
-rw-r--r--include/asm-ppc64/pgtable.h4
-rw-r--r--include/asm-ppc64/rwsem.h5
-rw-r--r--include/asm-s390/rwsem.h5
-rw-r--r--include/asm-sh/rwsem.h5
-rw-r--r--include/asm-sparc64/rwsem.h5
-rw-r--r--include/asm-sparc64/tlb.h29
-rw-r--r--include/asm-um/pgtable.h2
-rw-r--r--include/asm-x86_64/rwsem.h5
-rw-r--r--include/linux/buffer_head.h6
-rw-r--r--include/linux/hugetlb.h2
-rw-r--r--include/linux/memory.h94
-rw-r--r--include/linux/memory_hotplug.h104
-rw-r--r--include/linux/mempolicy.h7
-rw-r--r--include/linux/mm.h150
-rw-r--r--include/linux/mmzone.h28
-rw-r--r--include/linux/rmap.h4
-rw-r--r--include/linux/rwsem-spinlock.h5
-rw-r--r--include/linux/sched.h65
-rw-r--r--include/linux/vmalloc.h8
37 files changed, 508 insertions, 234 deletions
diff --git a/include/asm-alpha/barrier.h b/include/asm-alpha/barrier.h
index 229c83fe77cb..681ff581afa5 100644
--- a/include/asm-alpha/barrier.h
+++ b/include/asm-alpha/barrier.h
@@ -1,6 +1,8 @@
#ifndef __BARRIER_H
#define __BARRIER_H
+#include <asm/compiler.h>
+
#define mb() \
__asm__ __volatile__("mb": : :"memory")
diff --git a/include/asm-alpha/rwsem.h b/include/asm-alpha/rwsem.h
index 8e058a67c9a4..fafdd4f7010a 100644
--- a/include/asm-alpha/rwsem.h
+++ b/include/asm-alpha/rwsem.h
@@ -262,5 +262,10 @@ static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
#endif
}
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return (sem->count != 0);
+}
+
#endif /* __KERNEL__ */
#endif /* _ALPHA_RWSEM_H */
diff --git a/include/asm-arm/tlb.h b/include/asm-arm/tlb.h
index 9bb325c54645..f49bfb78c221 100644
--- a/include/asm-arm/tlb.h
+++ b/include/asm-arm/tlb.h
@@ -27,11 +27,7 @@
*/
struct mmu_gather {
struct mm_struct *mm;
- unsigned int freed;
unsigned int fullmm;
-
- unsigned int flushes;
- unsigned int avoided_flushes;
};
DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -39,11 +35,9 @@ DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
static inline struct mmu_gather *
tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
{
- int cpu = smp_processor_id();
- struct mmu_gather *tlb = &per_cpu(mmu_gathers, cpu);
+ struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
tlb->mm = mm;
- tlb->freed = 0;
tlb->fullmm = full_mm_flush;
return tlb;
@@ -52,24 +46,13 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
static inline void
tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
{
- struct mm_struct *mm = tlb->mm;
- unsigned long freed = tlb->freed;
- int rss = get_mm_counter(mm, rss);
-
- if (rss < freed)
- freed = rss;
- add_mm_counter(mm, rss, -freed);
-
if (tlb->fullmm)
- flush_tlb_mm(mm);
+ flush_tlb_mm(tlb->mm);
/* keep the page table cache within bounds */
check_pgt_cache();
-}
-static inline unsigned int tlb_is_full_mm(struct mmu_gather *tlb)
-{
- return tlb->fullmm;
+ put_cpu_var(mmu_gathers);
}
#define tlb_remove_tlb_entry(tlb,ptep,address) do { } while (0)
diff --git a/include/asm-arm26/tlb.h b/include/asm-arm26/tlb.h
index 1316352a58f3..08ddd85b8d35 100644
--- a/include/asm-arm26/tlb.h
+++ b/include/asm-arm26/tlb.h
@@ -10,24 +10,20 @@
*/
struct mmu_gather {
struct mm_struct *mm;
- unsigned int freed;
- unsigned int fullmm;
-
- unsigned int flushes;
- unsigned int avoided_flushes;
+ unsigned int need_flush;
+ unsigned int fullmm;
};
-extern struct mmu_gather mmu_gathers[NR_CPUS];
+DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
static inline struct mmu_gather *
tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
{
- int cpu = smp_processor_id();
- struct mmu_gather *tlb = &mmu_gathers[cpu];
+ struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
tlb->mm = mm;
- tlb->freed = 0;
- tlb->fullmm = full_mm_flush;
+ tlb->need_flush = 0;
+ tlb->fullmm = full_mm_flush;
return tlb;
}
@@ -35,30 +31,13 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
static inline void
tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
{
- struct mm_struct *mm = tlb->mm;
- unsigned long freed = tlb->freed;
- int rss = get_mm_counter(mm, rss);
-
- if (rss < freed)
- freed = rss;
- add_mm_counter(mm, rss, -freed);
-
- if (freed) {
- flush_tlb_mm(mm);
- tlb->flushes++;
- } else {
- tlb->avoided_flushes++;
- }
+ if (tlb->need_flush)
+ flush_tlb_mm(tlb->mm);
/* keep the page table cache within bounds */
check_pgt_cache();
-}
-
-static inline unsigned int
-tlb_is_full_mm(struct mmu_gather *tlb)
-{
- return tlb->fullmm;
+ put_cpu_var(mmu_gathers);
}
#define tlb_remove_tlb_entry(tlb,ptep,address) do { } while (0)
@@ -71,7 +50,13 @@ tlb_is_full_mm(struct mmu_gather *tlb)
} while (0)
#define tlb_end_vma(tlb,vma) do { } while (0)
-#define tlb_remove_page(tlb,page) free_page_and_swap_cache(page)
+static inline void
+tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+ tlb->need_flush = 1;
+ free_page_and_swap_cache(page);
+}
+
#define pte_free_tlb(tlb,ptep) pte_free(ptep)
#define pmd_free_tlb(tlb,pmdp) pmd_free(pmdp)
diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h
index c20ec257ecc0..68c6fea994d9 100644
--- a/include/asm-generic/4level-fixup.h
+++ b/include/asm-generic/4level-fixup.h
@@ -10,14 +10,9 @@
#define pud_t pgd_t
-#define pmd_alloc(mm, pud, address) \
-({ pmd_t *ret; \
- if (pgd_none(*pud)) \
- ret = __pmd_alloc(mm, pud, address); \
- else \
- ret = pmd_offset(pud, address); \
- ret; \
-})
+#define pmd_alloc(mm, pud, address) \
+ ((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \
+ NULL: pmd_offset(pud, address))
#define pud_alloc(mm, pgd, address) (pgd)
#define pud_offset(pgd, start) (pgd)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index ff28c8b31f58..7dca30a26c53 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -8,7 +8,7 @@
* - update the page tables
* - inform the TLB about the new one
*
- * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock.
+ * We hold the mm semaphore for reading, and the pte lock.
*
* Note: the old pte is known to not be writable, so we don't need to
* worry about dirty bits etc getting lost.
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 7d0298347ee7..cdd4145243cd 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -35,16 +35,13 @@
#endif
/* struct mmu_gather is an opaque type used by the mm code for passing around
- * any data needed by arch specific code for tlb_remove_page. This structure
- * can be per-CPU or per-MM as the page table lock is held for the duration of
- * TLB shootdown.
+ * any data needed by arch specific code for tlb_remove_page.
*/
struct mmu_gather {
struct mm_struct *mm;
unsigned int nr; /* set to ~0U means fast mode */
unsigned int need_flush;/* Really unmapped some ptes? */
unsigned int fullmm; /* non-zero means full mm flush */
- unsigned long freed;
struct page * pages[FREE_PTE_NR];
};
@@ -57,7 +54,7 @@ DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
static inline struct mmu_gather *
tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
{
- struct mmu_gather *tlb = &per_cpu(mmu_gathers, smp_processor_id());
+ struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
tlb->mm = mm;
@@ -65,7 +62,6 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
tlb->nr = num_online_cpus() > 1 ? 0U : ~0U;
tlb->fullmm = full_mm_flush;
- tlb->freed = 0;
return tlb;
}
@@ -85,28 +81,17 @@ tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
/* tlb_finish_mmu
* Called at the end of the shootdown operation to free up any resources
- * that were required. The page table lock is still held at this point.
+ * that were required.
*/
static inline void
tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
{
- int freed = tlb->freed;
- struct mm_struct *mm = tlb->mm;
- int rss = get_mm_counter(mm, rss);
-
- if (rss < freed)
- freed = rss;
- add_mm_counter(mm, rss, -freed);
tlb_flush_mmu(tlb, start, end);
/* keep the page table cache within bounds */
check_pgt_cache();
-}
-static inline unsigned int
-tlb_is_full_mm(struct mmu_gather *tlb)
-{
- return tlb->fullmm;
+ put_cpu_var(mmu_gathers);
}
/* tlb_remove_page
diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h
index 348fe3a4879d..620a90641ea8 100644
--- a/include/asm-i386/mmzone.h
+++ b/include/asm-i386/mmzone.h
@@ -88,12 +88,6 @@ static inline int pfn_to_nid(unsigned long pfn)
__pgdat->node_start_pfn + __pgdat->node_spanned_pages; \
})
-#define local_mapnr(kvaddr) \
-({ \
- unsigned long __pfn = __pa(kvaddr) >> PAGE_SHIFT; \
- (__pfn - node_start_pfn(pfn_to_nid(__pfn))); \
-})
-
/* XXX: FIXME -- wli */
#define kern_addr_valid(kaddr) (0)
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index d101ac414f07..0e3ec809352d 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -203,7 +203,8 @@ extern unsigned long pg0[];
#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
-#define pmd_none(x) (!pmd_val(x))
+/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
+#define pmd_none(x) (!(unsigned long)pmd_val(x))
#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
diff --git a/include/asm-i386/rwsem.h b/include/asm-i386/rwsem.h
index 7625a675852f..be4ab859238e 100644
--- a/include/asm-i386/rwsem.h
+++ b/include/asm-i386/rwsem.h
@@ -284,5 +284,10 @@ LOCK_PREFIX "xadd %0,(%2)"
return tmp+delta;
}
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return (sem->count != 0);
+}
+
#endif /* __KERNEL__ */
#endif /* _I386_RWSEM_H */
diff --git a/include/asm-ia64/rwsem.h b/include/asm-ia64/rwsem.h
index e18b5ab0cb75..1327c91ea39c 100644
--- a/include/asm-ia64/rwsem.h
+++ b/include/asm-ia64/rwsem.h
@@ -186,4 +186,9 @@ __downgrade_write (struct rw_semaphore *sem)
#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count))
#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return (sem->count != 0);
+}
+
#endif /* _ASM_IA64_RWSEM_H */
diff --git a/include/asm-ia64/tlb.h b/include/asm-ia64/tlb.h
index 3a9a6d1be75c..834370b9dea1 100644
--- a/include/asm-ia64/tlb.h
+++ b/include/asm-ia64/tlb.h
@@ -60,7 +60,6 @@ struct mmu_gather {
unsigned int nr; /* == ~0U => fast mode */
unsigned char fullmm; /* non-zero means full mm flush */
unsigned char need_flush; /* really unmapped some PTEs? */
- unsigned long freed; /* number of pages freed */
unsigned long start_addr;
unsigned long end_addr;
struct page *pages[FREE_PTE_NR];
@@ -129,7 +128,7 @@ ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long e
static inline struct mmu_gather *
tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush)
{
- struct mmu_gather *tlb = &__get_cpu_var(mmu_gathers);
+ struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
tlb->mm = mm;
/*
@@ -147,25 +146,17 @@ tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush)
*/
tlb->nr = (num_online_cpus() == 1) ? ~0U : 0;
tlb->fullmm = full_mm_flush;
- tlb->freed = 0;
tlb->start_addr = ~0UL;
return tlb;
}
/*
* Called at the end of the shootdown operation to free up any resources that were
- * collected. The page table lock is still held at this point.
+ * collected.
*/
static inline void
tlb_finish_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
{
- unsigned long freed = tlb->freed;
- struct mm_struct *mm = tlb->mm;
- unsigned long rss = get_mm_counter(mm, rss);
-
- if (rss < freed)
- freed = rss;
- add_mm_counter(mm, rss, -freed);
/*
* Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
* tlb->end_addr.
@@ -174,12 +165,8 @@ tlb_finish_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
/* keep the page table cache within bounds */
check_pgt_cache();
-}
-static inline unsigned int
-tlb_is_full_mm(struct mmu_gather *tlb)
-{
- return tlb->fullmm;
+ put_cpu_var(mmu_gathers);
}
/*
diff --git a/include/asm-m32r/mmzone.h b/include/asm-m32r/mmzone.h
index d58878ec899e..adc7970a77ec 100644
--- a/include/asm-m32r/mmzone.h
+++ b/include/asm-m32r/mmzone.h
@@ -21,12 +21,6 @@ extern struct pglist_data *node_data[];
__pgdat->node_start_pfn + __pgdat->node_spanned_pages - 1; \
})
-#define local_mapnr(kvaddr) \
-({ \
- unsigned long __pfn = __pa(kvaddr) >> PAGE_SHIFT; \
- (__pfn - node_start_pfn(pfn_to_nid(__pfn))); \
-})
-
#define pfn_to_page(pfn) \
({ \
unsigned long __pfn = pfn; \
diff --git a/include/asm-parisc/cacheflush.h b/include/asm-parisc/cacheflush.h
index aa592d8c0e39..1bc3c83ee74b 100644
--- a/include/asm-parisc/cacheflush.h
+++ b/include/asm-parisc/cacheflush.h
@@ -100,30 +100,34 @@ static inline void flush_cache_range(struct vm_area_struct *vma,
/* Simple function to work out if we have an existing address translation
* for a user space vma. */
-static inline pte_t *__translation_exists(struct mm_struct *mm,
- unsigned long addr)
+static inline int translation_exists(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long pfn)
{
- pgd_t *pgd = pgd_offset(mm, addr);
+ pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
pmd_t *pmd;
- pte_t *pte;
+ pte_t pte;
if(pgd_none(*pgd))
- return NULL;
+ return 0;
pmd = pmd_offset(pgd, addr);
if(pmd_none(*pmd) || pmd_bad(*pmd))
- return NULL;
+ return 0;
- pte = pte_offset_map(pmd, addr);
+ /* We cannot take the pte lock here: flush_cache_page is usually
+ * called with pte lock already held. Whereas flush_dcache_page
+ * takes flush_dcache_mmap_lock, which is lower in the hierarchy:
+ * the vma itself is secure, but the pte might come or go racily.
+ */
+ pte = *pte_offset_map(pmd, addr);
+ /* But pte_unmap() does nothing on this architecture */
- /* The PA flush mappings show up as pte_none, but they're
- * valid none the less */
- if(pte_none(*pte) && ((pte_val(*pte) & _PAGE_FLUSH) == 0))
- return NULL;
- return pte;
-}
-#define translation_exists(vma, addr) __translation_exists((vma)->vm_mm, addr)
+ /* Filter out coincidental file entries and swap entries */
+ if (!(pte_val(pte) & (_PAGE_FLUSH|_PAGE_PRESENT)))
+ return 0;
+ return pte_pfn(pte) == pfn;
+}
/* Private function to flush a page from the cache of a non-current
* process. cr25 contains the Page Directory of the current user
@@ -175,9 +179,8 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
{
BUG_ON(!vma->vm_mm->context);
- if(likely(translation_exists(vma, vmaddr)))
+ if (likely(translation_exists(vma, vmaddr, pfn)))
__flush_cache_page(vma, vmaddr);
}
#endif
-
diff --git a/include/asm-parisc/mmzone.h b/include/asm-parisc/mmzone.h
index 595d3dce120a..ae039f4fd711 100644
--- a/include/asm-parisc/mmzone.h
+++ b/include/asm-parisc/mmzone.h
@@ -27,12 +27,6 @@ extern struct node_map_data node_data[];
})
#define node_localnr(pfn, nid) ((pfn) - node_start_pfn(nid))
-#define local_mapnr(kvaddr) \
-({ \
- unsigned long __pfn = __pa(kvaddr) >> PAGE_SHIFT; \
- (__pfn - node_start_pfn(pfn_to_nid(__pfn))); \
-})
-
#define pfn_to_page(pfn) \
({ \
unsigned long __pfn = (pfn); \
diff --git a/include/asm-parisc/tlbflush.h b/include/asm-parisc/tlbflush.h
index 84af4ab1fe51..e97aa8d1eff5 100644
--- a/include/asm-parisc/tlbflush.h
+++ b/include/asm-parisc/tlbflush.h
@@ -88,7 +88,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
if (npages >= 512) /* 2MB of space: arbitrary, should be tuned */
flush_tlb_all();
else {
-
+ preempt_disable();
mtsp(vma->vm_mm->context,1);
purge_tlb_start();
if (split_tlb) {
@@ -102,6 +102,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
pdtlb(start);
start += PAGE_SIZE;
}
+ preempt_enable();
}
purge_tlb_end();
}
diff --git a/include/asm-ppc/rwsem.h b/include/asm-ppc/rwsem.h
index 3e738f483c11..3501ea72f88c 100644
--- a/include/asm-ppc/rwsem.h
+++ b/include/asm-ppc/rwsem.h
@@ -168,5 +168,10 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
return atomic_add_return(delta, (atomic_t *)(&sem->count));
}
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return (sem->count != 0);
+}
+
#endif /* __KERNEL__ */
#endif /* _PPC_RWSEM_XADD_H */
diff --git a/include/asm-ppc64/mmzone.h b/include/asm-ppc64/mmzone.h
index ed473f4b0152..80a708e7093a 100644
--- a/include/asm-ppc64/mmzone.h
+++ b/include/asm-ppc64/mmzone.h
@@ -67,9 +67,6 @@ static inline int pa_to_nid(unsigned long pa)
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
#define node_end_pfn(nid) (NODE_DATA(nid)->node_end_pfn)
-#define local_mapnr(kvaddr) \
- ( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr))
-
#ifdef CONFIG_DISCONTIGMEM
/*
diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h
index c83679c9d2b0..2eb1778a3a15 100644
--- a/include/asm-ppc64/pgtable.h
+++ b/include/asm-ppc64/pgtable.h
@@ -478,10 +478,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
#define __HAVE_ARCH_PTE_SAME
#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
+#define pte_ERROR(e) \
+ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
#define pmd_ERROR(e) \
printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
#define pud_ERROR(e) \
- printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e))
+ printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
#define pgd_ERROR(e) \
printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
diff --git a/include/asm-ppc64/rwsem.h b/include/asm-ppc64/rwsem.h
index bd5c2f093575..7a647fae3765 100644
--- a/include/asm-ppc64/rwsem.h
+++ b/include/asm-ppc64/rwsem.h
@@ -163,5 +163,10 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
return atomic_add_return(delta, (atomic_t *)(&sem->count));
}
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return (sem->count != 0);
+}
+
#endif /* __KERNEL__ */
#endif /* _PPC_RWSEM_XADD_H */
diff --git a/include/asm-s390/rwsem.h b/include/asm-s390/rwsem.h
index 8c0cebbfc034..0422a085dd56 100644
--- a/include/asm-s390/rwsem.h
+++ b/include/asm-s390/rwsem.h
@@ -351,5 +351,10 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
return new;
}
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return (sem->count != 0);
+}
+
#endif /* __KERNEL__ */
#endif /* _S390_RWSEM_H */
diff --git a/include/asm-sh/rwsem.h b/include/asm-sh/rwsem.h
index 1be4337f5259..0262d3d1e5e0 100644
--- a/include/asm-sh/rwsem.h
+++ b/include/asm-sh/rwsem.h
@@ -166,5 +166,10 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
return atomic_add_return(delta, (atomic_t *)(&sem->count));
}
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return (sem->count != 0);
+}
+
#endif /* __KERNEL__ */
#endif /* _ASM_SH_RWSEM_H */
diff --git a/include/asm-sparc64/rwsem.h b/include/asm-sparc64/rwsem.h
index 4568ee4022df..cef5e8270421 100644
--- a/include/asm-sparc64/rwsem.h
+++ b/include/asm-sparc64/rwsem.h
@@ -56,6 +56,11 @@ static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
atomic_add(delta, (atomic_t *)(&sem->count));
}
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return (sem->count != 0);
+}
+
#endif /* __KERNEL__ */
#endif /* _SPARC64_RWSEM_H */
diff --git a/include/asm-sparc64/tlb.h b/include/asm-sparc64/tlb.h
index 9baf57db01d2..66138d959df5 100644
--- a/include/asm-sparc64/tlb.h
+++ b/include/asm-sparc64/tlb.h
@@ -25,9 +25,8 @@ struct mmu_gather {
struct mm_struct *mm;
unsigned int pages_nr;
unsigned int need_flush;
- unsigned int tlb_frozen;
+ unsigned int fullmm;
unsigned int tlb_nr;
- unsigned long freed;
unsigned long vaddrs[TLB_BATCH_NR];
struct page *pages[FREE_PTE_NR];
};
@@ -44,14 +43,13 @@ extern void flush_tlb_pending(void);
static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
{
- struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
+ struct mmu_gather *mp = &get_cpu_var(mmu_gathers);
BUG_ON(mp->tlb_nr);
mp->mm = mm;
mp->pages_nr = num_online_cpus() > 1 ? 0U : ~0U;
- mp->tlb_frozen = full_mm_flush;
- mp->freed = 0;
+ mp->fullmm = full_mm_flush;
return mp;
}
@@ -78,30 +76,19 @@ extern void smp_flush_tlb_mm(struct mm_struct *mm);
static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, unsigned long end)
{
- unsigned long freed = mp->freed;
- struct mm_struct *mm = mp->mm;
- unsigned long rss = get_mm_counter(mm, rss);
-
- if (rss < freed)
- freed = rss;
- add_mm_counter(mm, rss, -freed);
-
tlb_flush_mmu(mp);
- if (mp->tlb_frozen) {
- if (CTX_VALID(mm->context))
- do_flush_tlb_mm(mm);
- mp->tlb_frozen = 0;
+ if (mp->fullmm) {
+ if (CTX_VALID(mp->mm->context))
+ do_flush_tlb_mm(mp->mm);
+ mp->fullmm = 0;
} else
flush_tlb_pending();
/* keep the page table cache within bounds */
check_pgt_cache();
-}
-static inline unsigned int tlb_is_full_mm(struct mmu_gather *mp)
-{
- return mp->tlb_frozen;
+ put_cpu_var(mmu_gathers);
}
static inline void tlb_remove_page(struct mmu_gather *mp, struct page *page)
diff --git a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h
index 616d02b57ea9..ac64eb955868 100644
--- a/include/asm-um/pgtable.h
+++ b/include/asm-um/pgtable.h
@@ -138,7 +138,7 @@ extern unsigned long pg0[1024];
#define pte_clear(mm,addr,xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEWPAGE))
-#define pmd_none(x) (!(pmd_val(x) & ~_PAGE_NEWPAGE))
+#define pmd_none(x) (!((unsigned long)pmd_val(x) & ~_PAGE_NEWPAGE))
#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0)
diff --git a/include/asm-x86_64/rwsem.h b/include/asm-x86_64/rwsem.h
index c002175b6e82..46077e9c1910 100644
--- a/include/asm-x86_64/rwsem.h
+++ b/include/asm-x86_64/rwsem.h
@@ -274,5 +274,10 @@ LOCK_PREFIX "xaddl %0,(%2)"
return tmp+delta;
}
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return (sem->count != 0);
+}
+
#endif /* __KERNEL__ */
#endif /* _X8664_RWSEM_H */
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 88af42f5e04a..c937d6e65502 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -126,8 +126,8 @@ BUFFER_FNS(Eopnotsupp, eopnotsupp)
/* If we *know* page->private refers to buffer_heads */
#define page_buffers(page) \
({ \
- BUG_ON(!PagePrivate(page)); \
- ((struct buffer_head *)(page)->private); \
+ BUG_ON(!PagePrivate(page)); \
+ ((struct buffer_head *)page_private(page)); \
})
#define page_has_buffers(page) PagePrivate(page)
@@ -219,7 +219,7 @@ static inline void attach_page_buffers(struct page *page,
{
page_cache_get(page);
SetPagePrivate(page);
- page->private = (unsigned long)head;
+ set_page_private(page, (unsigned long)head);
}
static inline void get_bh(struct buffer_head *bh)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d664330d900e..0cea162b08c0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -16,7 +16,6 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
-void zap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
int hugetlb_report_meminfo(char *);
@@ -87,7 +86,6 @@ static inline unsigned long hugetlb_total_pages(void)
#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; })
-#define zap_hugepage_range(vma, start, len) BUG()
#define unmap_hugepage_range(vma, start, end) BUG()
#define is_hugepage_mem_enough(size) 0
#define hugetlb_report_meminfo(buf) 0
diff --git a/include/linux/memory.h b/include/linux/memory.h
new file mode 100644
index 000000000000..0def328ab5cf
--- /dev/null
+++ b/include/linux/memory.h
@@ -0,0 +1,94 @@
+/*
+ * include/linux/memory.h - generic memory definition
+ *
+ * This is mainly for topological representation. We define the
+ * basic "struct memory_block" here, which can be embedded in per-arch
+ * definitions or NUMA information.
+ *
+ * Basic handling of the devices is done in drivers/base/memory.c
+ * and system devices are handled in drivers/base/sys.c.
+ *
+ * Memory block are exported via sysfs in the class/memory/devices/
+ * directory.
+ *
+ */
+#ifndef _LINUX_MEMORY_H_
+#define _LINUX_MEMORY_H_
+
+#include <linux/sysdev.h>
+#include <linux/node.h>
+#include <linux/compiler.h>
+
+#include <asm/semaphore.h>
+
+struct memory_block {
+ unsigned long phys_index;
+ unsigned long state;
+ /*
+ * This serializes all state change requests. It isn't
+ * held during creation because the control files are
+ * created long after the critical areas during
+ * initialization.
+ */
+ struct semaphore state_sem;
+ int phys_device; /* to which fru does this belong? */
+ void *hw; /* optional pointer to fw/hw data */
+ int (*phys_callback)(struct memory_block *);
+ struct sys_device sysdev;
+};
+
+/* These states are exposed to userspace as text strings in sysfs */
+#define MEM_ONLINE (1<<0) /* exposed to userspace */
+#define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */
+#define MEM_OFFLINE (1<<2) /* exposed to userspace */
+
+/*
+ * All of these states are currently kernel-internal for notifying
+ * kernel components and architectures.
+ *
+ * For MEM_MAPPING_INVALID, all notifier chains with priority >0
+ * are called before pfn_to_page() becomes invalid. The priority=0
+ * entry is reserved for the function that actually makes
+ * pfn_to_page() stop working. Any notifiers that want to be called
+ * after that should have priority <0.
+ */
+#define MEM_MAPPING_INVALID (1<<3)
+
+#ifndef CONFIG_MEMORY_HOTPLUG
+static inline int memory_dev_init(void)
+{
+ return 0;
+}
+static inline int register_memory_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+static inline void unregister_memory_notifier(struct notifier_block *nb)
+{
+}
+#else
+extern int register_memory(struct memory_block *, struct mem_section *section, struct node *);
+extern int register_new_memory(struct mem_section *);
+extern int unregister_memory_section(struct mem_section *);
+extern int memory_dev_init(void);
+extern int register_memory_notifier(struct notifier_block *nb);
+extern void unregister_memory_notifier(struct notifier_block *nb);
+
+#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
+
+extern int invalidate_phys_mapping(unsigned long, unsigned long);
+struct notifier_block;
+
+extern int register_memory_notifier(struct notifier_block *nb);
+extern void unregister_memory_notifier(struct notifier_block *nb);
+
+extern struct sysdev_class memory_sysdev_class;
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+#define hotplug_memory_notifier(fn, pri) { \
+ static struct notifier_block fn##_mem_nb = \
+ { .notifier_call = fn, .priority = pri }; \
+ register_memory_notifier(&fn##_mem_nb); \
+}
+
+#endif /* _LINUX_MEMORY_H_ */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
new file mode 100644
index 000000000000..01f03bc06eff
--- /dev/null
+++ b/include/linux/memory_hotplug.h
@@ -0,0 +1,104 @@
+#ifndef __LINUX_MEMORY_HOTPLUG_H
+#define __LINUX_MEMORY_HOTPLUG_H
+
+#include <linux/mmzone.h>
+#include <linux/spinlock.h>
+#include <linux/mmzone.h>
+#include <linux/notifier.h>
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+/*
+ * pgdat resizing functions
+ */
+static inline
+void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
+{
+ spin_lock_irqsave(&pgdat->node_size_lock, *flags);
+}
+static inline
+void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
+{
+ spin_unlock_irqrestore(&pgdat->node_size_lock, *flags);
+}
+static inline
+void pgdat_resize_init(struct pglist_data *pgdat)
+{
+ spin_lock_init(&pgdat->node_size_lock);
+}
+/*
+ * Zone resizing functions
+ */
+static inline unsigned zone_span_seqbegin(struct zone *zone)
+{
+ return read_seqbegin(&zone->span_seqlock);
+}
+static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
+{
+ return read_seqretry(&zone->span_seqlock, iv);
+}
+static inline void zone_span_writelock(struct zone *zone)
+{
+ write_seqlock(&zone->span_seqlock);
+}
+static inline void zone_span_writeunlock(struct zone *zone)
+{
+ write_sequnlock(&zone->span_seqlock);
+}
+static inline void zone_seqlock_init(struct zone *zone)
+{
+ seqlock_init(&zone->span_seqlock);
+}
+extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
+extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
+extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
+/* need some defines for these for archs that don't support it */
+extern void online_page(struct page *page);
+/* VM interface that may be used by firmware interface */
+extern int add_memory(u64 start, u64 size);
+extern int remove_memory(u64 start, u64 size);
+extern int online_pages(unsigned long, unsigned long);
+
+/* reasonably generic interface to expand the physical pages in a zone */
+extern int __add_pages(struct zone *zone, unsigned long start_pfn,
+ unsigned long nr_pages);
+#else /* ! CONFIG_MEMORY_HOTPLUG */
+/*
+ * Stub functions for when hotplug is off
+ */
+static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
+static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
+static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
+
+static inline unsigned zone_span_seqbegin(struct zone *zone)
+{
+ return 0;
+}
+static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
+{
+ return 0;
+}
+static inline void zone_span_writelock(struct zone *zone) {}
+static inline void zone_span_writeunlock(struct zone *zone) {}
+static inline void zone_seqlock_init(struct zone *zone) {}
+
+static inline int mhp_notimplemented(const char *func)
+{
+ printk(KERN_WARNING "%s() called, with CONFIG_MEMORY_HOTPLUG disabled\n", func);
+ dump_stack();
+ return -ENOSYS;
+}
+
+static inline int __add_pages(struct zone *zone, unsigned long start_pfn,
+ unsigned long nr_pages)
+{
+ return mhp_notimplemented(__FUNCTION__);
+}
+#endif /* ! CONFIG_MEMORY_HOTPLUG */
+static inline int __remove_pages(struct zone *zone, unsigned long start_pfn,
+ unsigned long nr_pages)
+{
+ printk(KERN_WARNING "%s() called, not yet supported\n", __FUNCTION__);
+ dump_stack();
+ return -ENOSYS;
+}
+#endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 58385ee1c0ac..7af8cb836e78 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -27,10 +27,10 @@
#include <linux/config.h>
#include <linux/mmzone.h>
-#include <linux/bitmap.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
+#include <linux/nodemask.h>
struct vm_area_struct;
@@ -47,8 +47,7 @@ struct vm_area_struct;
* Locking policy for interlave:
* In process context there is no locking because only the process accesses
* its own state. All vma manipulation is somewhat protected by a down_read on
- * mmap_sem. For allocating in the interleave policy the page_table_lock
- * must be also aquired to protect il_next.
+ * mmap_sem.
*
* Freeing policy:
* When policy is MPOL_BIND v.zonelist is kmalloc'ed and must be kfree'd.
@@ -63,7 +62,7 @@ struct mempolicy {
union {
struct zonelist *zonelist; /* bind */
short preferred_node; /* preferred */
- DECLARE_BITMAP(nodes, MAX_NUMNODES); /* interleave */
+ nodemask_t nodes; /* interleave */
/* undefined for default */
} v;
};
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e1649578fb0c..5c1fb0a2e806 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -157,7 +157,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
-#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */
+#define VM_RESERVED 0x00080000 /* Pages managed in a special way */
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
@@ -226,13 +226,18 @@ struct page {
* to show when page is mapped
* & limit reverse map searches.
*/
- unsigned long private; /* Mapping-private opaque data:
+ union {
+ unsigned long private; /* Mapping-private opaque data:
* usually used for buffer_heads
* if PagePrivate set; used for
* swp_entry_t if PageSwapCache
* When page is free, this indicates
* order in the buddy system.
*/
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+ spinlock_t ptl;
+#endif
+ } u;
struct address_space *mapping; /* If low bit clear, points to
* inode address_space, or NULL.
* If page mapped as anonymous
@@ -260,6 +265,9 @@ struct page {
#endif /* WANT_PAGE_VIRTUAL */
};
+#define page_private(page) ((page)->u.private)
+#define set_page_private(page, v) ((page)->u.private = (v))
+
/*
* FIXME: take this include out, include page-flags.h in
* files which need it (119 of them)
@@ -311,17 +319,17 @@ extern void FASTCALL(__page_cache_release(struct page *));
#ifdef CONFIG_HUGETLB_PAGE
-static inline int page_count(struct page *p)
+static inline int page_count(struct page *page)
{
- if (PageCompound(p))
- p = (struct page *)p->private;
- return atomic_read(&(p)->_count) + 1;
+ if (PageCompound(page))
+ page = (struct page *)page_private(page);
+ return atomic_read(&page->_count) + 1;
}
static inline void get_page(struct page *page)
{
if (unlikely(PageCompound(page)))
- page = (struct page *)page->private;
+ page = (struct page *)page_private(page);
atomic_inc(&page->_count);
}
@@ -338,7 +346,7 @@ static inline void get_page(struct page *page)
static inline void put_page(struct page *page)
{
- if (!PageReserved(page) && put_page_testzero(page))
+ if (put_page_testzero(page))
__page_cache_release(page);
}
@@ -587,7 +595,7 @@ static inline int PageAnon(struct page *page)
static inline pgoff_t page_index(struct page *page)
{
if (unlikely(PageSwapCache(page)))
- return page->private;
+ return page_private(page);
return page->index;
}
@@ -682,7 +690,7 @@ struct zap_details {
unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *);
-unsigned long unmap_vmas(struct mmu_gather **tlb, struct mm_struct *mm,
+unsigned long unmap_vmas(struct mmu_gather **tlb,
struct vm_area_struct *start_vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *);
@@ -704,10 +712,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
}
extern int vmtruncate(struct inode * inode, loff_t offset);
-extern pud_t *FASTCALL(__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address));
-extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address));
-extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
-extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot);
extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
@@ -723,6 +727,7 @@ void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
+void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);
int __set_page_dirty_buffers(struct page *page);
int __set_page_dirty_nobuffers(struct page *page);
@@ -759,38 +764,83 @@ struct shrinker;
extern struct shrinker *set_shrinker(int, shrinker_t);
extern void remove_shrinker(struct shrinker *shrinker);
-/*
- * On a two-level or three-level page table, this ends up being trivial. Thus
- * the inlining and the symmetry break with pte_alloc_map() that does all
- * of this out-of-line.
- */
+int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
+int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
+int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+
/*
* The following ifdef needed to get the 4level-fixup.h header to work.
* Remove it when 4level-fixup.h has been removed.
*/
-#ifdef CONFIG_MMU
-#ifndef __ARCH_HAS_4LEVEL_HACK
+#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
{
- if (pgd_none(*pgd))
- return __pud_alloc(mm, pgd, address);
- return pud_offset(pgd, address);
+ return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
+ NULL: pud_offset(pgd, address);
}
static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
- if (pud_none(*pud))
- return __pmd_alloc(mm, pud, address);
- return pmd_offset(pud, address);
+ return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
+ NULL: pmd_offset(pud, address);
}
-#endif
-#endif /* CONFIG_MMU */
+#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
+
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+/*
+ * We tuck a spinlock to guard each pagetable page into its struct page,
+ * at page->private, with BUILD_BUG_ON to make sure that this will not
+ * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
+ * When freeing, reset page->mapping so free_pages_check won't complain.
+ */
+#define __pte_lockptr(page) &((page)->u.ptl)
+#define pte_lock_init(_page) do { \
+ spin_lock_init(__pte_lockptr(_page)); \
+} while (0)
+#define pte_lock_deinit(page) ((page)->mapping = NULL)
+#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
+#else
+/*
+ * We use mm->page_table_lock to guard all pagetable pages of the mm.
+ */
+#define pte_lock_init(page) do {} while (0)
+#define pte_lock_deinit(page) do {} while (0)
+#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
+#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+
+#define pte_offset_map_lock(mm, pmd, address, ptlp) \
+({ \
+ spinlock_t *__ptl = pte_lockptr(mm, pmd); \
+ pte_t *__pte = pte_offset_map(pmd, address); \
+ *(ptlp) = __ptl; \
+ spin_lock(__ptl); \
+ __pte; \
+})
+
+#define pte_unmap_unlock(pte, ptl) do { \
+ spin_unlock(ptl); \
+ pte_unmap(pte); \
+} while (0)
+
+#define pte_alloc_map(mm, pmd, address) \
+ ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+ NULL: pte_offset_map(pmd, address))
+
+#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
+ ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+ NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
+
+#define pte_alloc_kernel(pmd, address) \
+ ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+ NULL: pte_offset_kernel(pmd, address))
extern void free_area_init(unsigned long * zones_size);
extern void free_area_init_node(int nid, pg_data_t *pgdat,
unsigned long * zones_size, unsigned long zone_start_pfn,
unsigned long *zholes_size);
extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
+extern void setup_per_zone_pages_min(void);
extern void mem_init(void);
extern void show_mem(void);
extern void si_meminfo(struct sysinfo * val);
@@ -834,6 +884,7 @@ extern int split_vma(struct mm_struct *,
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
struct rb_node **, struct rb_node *);
+extern void unlink_file_vma(struct vm_area_struct *);
extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
unsigned long addr, unsigned long len, pgoff_t pgoff);
extern void exit_mmap(struct mm_struct *);
@@ -894,7 +945,8 @@ void handle_ra_miss(struct address_space *mapping,
unsigned long max_sane_readahead(unsigned long nr);
/* Do stack extension */
-extern int expand_stack(struct vm_area_struct * vma, unsigned long address);
+extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
@@ -917,40 +969,28 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
}
-extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
+struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
+struct page *vmalloc_to_page(void *addr);
+unsigned long vmalloc_to_pfn(void *addr);
+int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
+ unsigned long pfn, unsigned long size, pgprot_t);
-extern struct page * vmalloc_to_page(void *addr);
-extern unsigned long vmalloc_to_pfn(void *addr);
-extern struct page * follow_page(struct mm_struct *mm, unsigned long address,
- int write);
-extern int check_user_page_readable(struct mm_struct *mm, unsigned long address);
-int remap_pfn_range(struct vm_area_struct *, unsigned long,
- unsigned long, unsigned long, pgprot_t);
+struct page *follow_page(struct mm_struct *, unsigned long address,
+ unsigned int foll_flags);
+#define FOLL_WRITE 0x01 /* check pte is writable */
+#define FOLL_TOUCH 0x02 /* mark page accessed */
+#define FOLL_GET 0x04 /* do get_page on page */
+#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */
#ifdef CONFIG_PROC_FS
-void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
+void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
#else
-static inline void __vm_stat_account(struct mm_struct *mm,
+static inline void vm_stat_account(struct mm_struct *mm,
unsigned long flags, struct file *file, long pages)
{
}
#endif /* CONFIG_PROC_FS */
-static inline void vm_stat_account(struct vm_area_struct *vma)
-{
- __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
- vma_pages(vma));
-}
-
-static inline void vm_stat_unaccount(struct vm_area_struct *vma)
-{
- __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
- -vma_pages(vma));
-}
-
-/* update per process rss and vm hiwater data */
-extern void update_mem_hiwater(struct task_struct *tsk);
-
#ifndef CONFIG_DEBUG_PAGEALLOC
static inline void
kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7519eb4191e7..f5fa3082fd6a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -12,6 +12,7 @@
#include <linux/threads.h>
#include <linux/numa.h>
#include <linux/init.h>
+#include <linux/seqlock.h>
#include <asm/atomic.h>
/* Free memory management - zoned buddy allocator. */
@@ -137,6 +138,10 @@ struct zone {
* free areas of different sizes
*/
spinlock_t lock;
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /* see spanned/present_pages for more description */
+ seqlock_t span_seqlock;
+#endif
struct free_area free_area[MAX_ORDER];
@@ -220,6 +225,16 @@ struct zone {
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
unsigned long zone_start_pfn;
+ /*
+ * zone_start_pfn, spanned_pages and present_pages are all
+ * protected by span_seqlock. It is a seqlock because it has
+ * to be read outside of zone->lock, and it is done in the main
+ * allocator path. But, it is written quite infrequently.
+ *
+ * The lock is declared along with zone->lock because it is
+ * frequently read in proximity to zone->lock. It's good to
+ * give them a chance of being in the same cacheline.
+ */
unsigned long spanned_pages; /* total size, including holes */
unsigned long present_pages; /* amount of memory (excluding holes) */
@@ -273,6 +288,16 @@ typedef struct pglist_data {
struct page *node_mem_map;
#endif
struct bootmem_data *bdata;
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /*
+ * Must be held any time you expect node_start_pfn, node_present_pages
+ * or node_spanned_pages stay constant. Holding this will also
+ * guarantee that any pfn_valid() stays that way.
+ *
+ * Nests above zone->lock and zone->size_seqlock.
+ */
+ spinlock_t node_size_lock;
+#endif
unsigned long node_start_pfn;
unsigned long node_present_pages; /* total number of physical pages */
unsigned long node_spanned_pages; /* total size of physical page
@@ -293,6 +318,8 @@ typedef struct pglist_data {
#endif
#define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr))
+#include <linux/memory_hotplug.h>
+
extern struct pglist_data *pgdat_list;
void __get_zone_counts(unsigned long *active, unsigned long *inactive,
@@ -509,6 +536,7 @@ static inline struct mem_section *__nr_to_section(unsigned long nr)
return NULL;
return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
}
+extern int __section_nr(struct mem_section* ms);
/*
* We use the lower bits of the mem_map pointer to store
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index e80fb7ee6efd..35b30e6c8cf8 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -95,8 +95,8 @@ int try_to_unmap(struct page *);
/*
* Called from mm/filemap_xip.c to unmap empty zero page
*/
-pte_t *page_check_address(struct page *, struct mm_struct *, unsigned long);
-
+pte_t *page_check_address(struct page *, struct mm_struct *,
+ unsigned long, spinlock_t **);
/*
* Used by swapoff to help locate where page is expected in vma.
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index b52a2af25f1f..f30f805080ae 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -61,5 +61,10 @@ extern void FASTCALL(__up_read(struct rw_semaphore *sem));
extern void FASTCALL(__up_write(struct rw_semaphore *sem));
extern void FASTCALL(__downgrade_write(struct rw_semaphore *sem));
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return (sem->activity != 0);
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_RWSEM_SPINLOCK_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 27519df0f987..1c30bc308ef1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -249,6 +249,36 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
extern void arch_unmap_area(struct mm_struct *, unsigned long);
extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+/*
+ * The mm counters are not protected by its page_table_lock,
+ * so must be incremented atomically.
+ */
+#ifdef ATOMIC64_INIT
+#define set_mm_counter(mm, member, value) atomic64_set(&(mm)->_##member, value)
+#define get_mm_counter(mm, member) ((unsigned long)atomic64_read(&(mm)->_##member))
+#define add_mm_counter(mm, member, value) atomic64_add(value, &(mm)->_##member)
+#define inc_mm_counter(mm, member) atomic64_inc(&(mm)->_##member)
+#define dec_mm_counter(mm, member) atomic64_dec(&(mm)->_##member)
+typedef atomic64_t mm_counter_t;
+#else /* !ATOMIC64_INIT */
+/*
+ * The counters wrap back to 0 at 2^32 * PAGE_SIZE,
+ * that is, at 16TB if using 4kB page size.
+ */
+#define set_mm_counter(mm, member, value) atomic_set(&(mm)->_##member, value)
+#define get_mm_counter(mm, member) ((unsigned long)atomic_read(&(mm)->_##member))
+#define add_mm_counter(mm, member, value) atomic_add(value, &(mm)->_##member)
+#define inc_mm_counter(mm, member) atomic_inc(&(mm)->_##member)
+#define dec_mm_counter(mm, member) atomic_dec(&(mm)->_##member)
+typedef atomic_t mm_counter_t;
+#endif /* !ATOMIC64_INIT */
+
+#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+/*
+ * The mm counters are protected by its page_table_lock,
+ * so can be incremented directly.
+ */
#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
#define get_mm_counter(mm, member) ((mm)->_##member)
#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
@@ -256,6 +286,20 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
#define dec_mm_counter(mm, member) (mm)->_##member--
typedef unsigned long mm_counter_t;
+#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+
+#define get_mm_rss(mm) \
+ (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
+#define update_hiwater_rss(mm) do { \
+ unsigned long _rss = get_mm_rss(mm); \
+ if ((mm)->hiwater_rss < _rss) \
+ (mm)->hiwater_rss = _rss; \
+} while (0)
+#define update_hiwater_vm(mm) do { \
+ if ((mm)->hiwater_vm < (mm)->total_vm) \
+ (mm)->hiwater_vm = (mm)->total_vm; \
+} while (0)
+
struct mm_struct {
struct vm_area_struct * mmap; /* list of VMAs */
struct rb_root mm_rb;
@@ -279,15 +323,20 @@ struct mm_struct {
* by mmlist_lock
*/
+ /* Special counters, in some configurations protected by the
+ * page_table_lock, in other configurations by being atomic.
+ */
+ mm_counter_t _file_rss;
+ mm_counter_t _anon_rss;
+
+ unsigned long hiwater_rss; /* High-watermark of RSS usage */
+ unsigned long hiwater_vm; /* High-water virtual memory usage */
+
+ unsigned long total_vm, locked_vm, shared_vm, exec_vm;
+ unsigned long stack_vm, reserved_vm, def_flags, nr_ptes;
unsigned long start_code, end_code, start_data, end_data;
unsigned long start_brk, brk, start_stack;
unsigned long arg_start, arg_end, env_start, env_end;
- unsigned long total_vm, locked_vm, shared_vm;
- unsigned long exec_vm, stack_vm, reserved_vm, def_flags, nr_ptes;
-
- /* Special counters protected by the page_table_lock */
- mm_counter_t _rss;
- mm_counter_t _anon_rss;
unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
@@ -308,11 +357,7 @@ struct mm_struct {
/* aio bits */
rwlock_t ioctx_list_lock;
struct kioctx *ioctx_list;
-
struct kioctx default_kioctx;
-
- unsigned long hiwater_rss; /* High-water RSS usage */
- unsigned long hiwater_vm; /* High-water virtual memory usage */
};
struct sighand_struct {
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 3701a0673d2c..1d5577b2b752 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -32,10 +32,14 @@ struct vm_struct {
* Highlevel APIs for driver use
*/
extern void *vmalloc(unsigned long size);
+extern void *vmalloc_node(unsigned long size, int node);
extern void *vmalloc_exec(unsigned long size);
extern void *vmalloc_32(unsigned long size);
extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
-extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot);
+extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
+ pgprot_t prot);
+extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
+ pgprot_t prot, int node);
extern void vfree(void *addr);
extern void *vmap(struct page **pages, unsigned int count,
@@ -48,6 +52,8 @@ extern void vunmap(void *addr);
extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
unsigned long start, unsigned long end);
+extern struct vm_struct *get_vm_area_node(unsigned long size,
+ unsigned long flags, int node);
extern struct vm_struct *remove_vm_area(void *addr);
extern struct vm_struct *__remove_vm_area(void *addr);
extern int map_vm_area(struct vm_struct *area, pgprot_t prot,