summaryrefslogtreecommitdiff
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c489
1 files changed, 365 insertions, 124 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 85ad98c00fd9..eefd823deb67 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -56,6 +56,7 @@
#include <linux/kfifo.h>
#include <linux/ratelimit.h>
#include <linux/page-isolation.h>
+#include <linux/pagewalk.h>
#include "internal.h"
#include "ras/ras_event.h"
@@ -65,6 +66,19 @@ int sysctl_memory_failure_recovery __read_mostly = 1;
atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
+static bool __page_handle_poison(struct page *page)
+{
+ bool ret;
+
+ zone_pcp_disable(page_zone(page));
+ ret = dissolve_free_huge_page(page);
+ if (!ret)
+ ret = take_page_off_buddy(page);
+ zone_pcp_enable(page_zone(page));
+
+ return ret;
+}
+
static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, bool release)
{
if (hugepage_or_freepage) {
@@ -72,7 +86,7 @@ static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, boo
* Doing this check for free pages is also fine since dissolve_free_huge_page
* returns 0 for non-hugetlb pages as well.
*/
- if (dissolve_free_huge_page(page) || !take_page_off_buddy(page))
+ if (!__page_handle_poison(page))
/*
* We could fail to take off the target page from buddy
* for example due to racy page allocation, but that's
@@ -554,6 +568,148 @@ static void collect_procs(struct page *page, struct list_head *tokill,
collect_procs_file(page, tokill, force_early);
}
+struct hwp_walk {
+ struct to_kill tk;
+ unsigned long pfn;
+ int flags;
+};
+
+static void set_to_kill(struct to_kill *tk, unsigned long addr, short shift)
+{
+ tk->addr = addr;
+ tk->size_shift = shift;
+}
+
+static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
+ unsigned long poisoned_pfn, struct to_kill *tk)
+{
+ unsigned long pfn = 0;
+
+ if (pte_present(pte)) {
+ pfn = pte_pfn(pte);
+ } else {
+ swp_entry_t swp = pte_to_swp_entry(pte);
+
+ if (is_hwpoison_entry(swp))
+ pfn = hwpoison_entry_to_pfn(swp);
+ }
+
+ if (!pfn || pfn != poisoned_pfn)
+ return 0;
+
+ set_to_kill(tk, addr, shift);
+ return 1;
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
+ struct hwp_walk *hwp)
+{
+ pmd_t pmd = *pmdp;
+ unsigned long pfn;
+ unsigned long hwpoison_vaddr;
+
+ if (!pmd_present(pmd))
+ return 0;
+ pfn = pmd_pfn(pmd);
+ if (pfn <= hwp->pfn && hwp->pfn < pfn + HPAGE_PMD_NR) {
+ hwpoison_vaddr = addr + ((hwp->pfn - pfn) << PAGE_SHIFT);
+ set_to_kill(&hwp->tk, hwpoison_vaddr, PAGE_SHIFT);
+ return 1;
+ }
+ return 0;
+}
+#else
+static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
+ struct hwp_walk *hwp)
+{
+ return 0;
+}
+#endif
+
+static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
+ int ret = 0;
+ pte_t *ptep;
+ spinlock_t *ptl;
+
+ ptl = pmd_trans_huge_lock(pmdp, walk->vma);
+ if (ptl) {
+ ret = check_hwpoisoned_pmd_entry(pmdp, addr, hwp);
+ spin_unlock(ptl);
+ goto out;
+ }
+
+ if (pmd_trans_unstable(pmdp))
+ goto out;
+
+ ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp, addr, &ptl);
+ for (; addr != end; ptep++, addr += PAGE_SIZE) {
+ ret = check_hwpoisoned_entry(*ptep, addr, PAGE_SHIFT,
+ hwp->pfn, &hwp->tk);
+ if (ret == 1)
+ break;
+ }
+ pte_unmap_unlock(ptep - 1, ptl);
+out:
+ cond_resched();
+ return ret;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
+ unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
+ pte_t pte = huge_ptep_get(ptep);
+ struct hstate *h = hstate_vma(walk->vma);
+
+ return check_hwpoisoned_entry(pte, addr, huge_page_shift(h),
+ hwp->pfn, &hwp->tk);
+}
+#else
+#define hwpoison_hugetlb_range NULL
+#endif
+
+static struct mm_walk_ops hwp_walk_ops = {
+ .pmd_entry = hwpoison_pte_range,
+ .hugetlb_entry = hwpoison_hugetlb_range,
+};
+
+/*
+ * Sends SIGBUS to the current process with error info.
+ *
+ * This function is intended to handle "Action Required" MCEs on already
+ * hardware poisoned pages. They could happen, for example, when
+ * memory_failure() failed to unmap the error page at the first call, or
+ * when multiple local machine checks happened on different CPUs.
+ *
+ * MCE handler currently has no easy access to the error virtual address,
+ * so this function walks page table to find it. The returned virtual address
+ * is proper in most cases, but it could be wrong when the application
+ * process has multiple entries mapping the error page.
+ */
+static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
+ int flags)
+{
+ int ret;
+ struct hwp_walk priv = {
+ .pfn = pfn,
+ };
+ priv.tk.tsk = p;
+
+ mmap_read_lock(p->mm);
+ ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
+ (void *)&priv);
+ if (ret == 1 && priv.tk.addr)
+ kill_proc(&priv.tk, pfn, flags);
+ mmap_read_unlock(p->mm);
+ return ret ? -EFAULT : -EHWPOISON;
+}
+
static const char *action_name[] = {
[MF_IGNORED] = "Ignored",
[MF_FAILED] = "Failed",
@@ -658,6 +814,7 @@ static int truncate_error_page(struct page *p, unsigned long pfn,
*/
static int me_kernel(struct page *p, unsigned long pfn)
{
+ unlock_page(p);
return MF_IGNORED;
}
@@ -667,6 +824,7 @@ static int me_kernel(struct page *p, unsigned long pfn)
static int me_unknown(struct page *p, unsigned long pfn)
{
pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
+ unlock_page(p);
return MF_FAILED;
}
@@ -675,6 +833,7 @@ static int me_unknown(struct page *p, unsigned long pfn)
*/
static int me_pagecache_clean(struct page *p, unsigned long pfn)
{
+ int ret;
struct address_space *mapping;
delete_from_lru_cache(p);
@@ -683,8 +842,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
* For anonymous pages we're done the only reference left
* should be the one m_f() holds.
*/
- if (PageAnon(p))
- return MF_RECOVERED;
+ if (PageAnon(p)) {
+ ret = MF_RECOVERED;
+ goto out;
+ }
/*
* Now truncate the page in the page cache. This is really
@@ -698,7 +859,8 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
/*
* Page has been teared down in the meanwhile
*/
- return MF_FAILED;
+ ret = MF_FAILED;
+ goto out;
}
/*
@@ -706,7 +868,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
*
* Open: to take i_mutex or not for this? Right now we don't.
*/
- return truncate_error_page(p, pfn, mapping);
+ ret = truncate_error_page(p, pfn, mapping);
+out:
+ unlock_page(p);
+ return ret;
}
/*
@@ -782,24 +947,26 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
*/
static int me_swapcache_dirty(struct page *p, unsigned long pfn)
{
+ int ret;
+
ClearPageDirty(p);
/* Trigger EIO in shmem: */
ClearPageUptodate(p);
- if (!delete_from_lru_cache(p))
- return MF_DELAYED;
- else
- return MF_FAILED;
+ ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED;
+ unlock_page(p);
+ return ret;
}
static int me_swapcache_clean(struct page *p, unsigned long pfn)
{
+ int ret;
+
delete_from_swap_cache(p);
- if (!delete_from_lru_cache(p))
- return MF_RECOVERED;
- else
- return MF_FAILED;
+ ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED;
+ unlock_page(p);
+ return ret;
}
/*
@@ -820,6 +987,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
mapping = page_mapping(hpage);
if (mapping) {
res = truncate_error_page(hpage, pfn, mapping);
+ unlock_page(hpage);
} else {
res = MF_FAILED;
unlock_page(hpage);
@@ -830,11 +998,10 @@ static int me_huge_page(struct page *p, unsigned long pfn)
*/
if (PageAnon(hpage))
put_page(hpage);
- if (!dissolve_free_huge_page(p) && take_page_off_buddy(p)) {
+ if (__page_handle_poison(p)) {
page_ref_inc(p);
res = MF_RECOVERED;
}
- lock_page(hpage);
}
return res;
@@ -866,6 +1033,8 @@ static struct page_state {
unsigned long mask;
unsigned long res;
enum mf_action_page_type type;
+
+ /* Callback ->action() has to unlock the relevant page inside it. */
int (*action)(struct page *p, unsigned long pfn);
} error_states[] = {
{ reserved, reserved, MF_MSG_KERNEL, me_kernel },
@@ -929,6 +1098,7 @@ static int page_action(struct page_state *ps, struct page *p,
int result;
int count;
+ /* page p should be unlocked after returning from ps->action(). */
result = ps->action(p, pfn);
count = page_count(p) - 1;
@@ -949,18 +1119,36 @@ static int page_action(struct page_state *ps, struct page *p,
return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
}
-/**
- * __get_hwpoison_page() - Get refcount for memory error handling:
- * @page: raw error page (hit by memory error)
- *
- * Return: return 0 if failed to grab the refcount, otherwise true (some
- * non-zero value.)
+/*
+ * Return true if a page type of a given page is supported by hwpoison
+ * mechanism (while handling could fail), otherwise false. This function
+ * does not return true for hugetlb or device memory pages, so it's assumed
+ * to be called only in the context where we never have such pages.
*/
+static inline bool HWPoisonHandlable(struct page *page)
+{
+ return PageLRU(page) || __PageMovable(page);
+}
+
static int __get_hwpoison_page(struct page *page)
{
struct page *head = compound_head(page);
+ int ret = 0;
+ bool hugetlb = false;
+
+ ret = get_hwpoison_huge_page(head, &hugetlb);
+ if (hugetlb)
+ return ret;
+
+ /*
+ * This check prevents from calling get_hwpoison_unless_zero()
+ * for any unsupported type of page in order to reduce the risk of
+ * unexpected races caused by taking a page refcount.
+ */
+ if (!HWPoisonHandlable(head))
+ return 0;
- if (!PageHuge(head) && PageTransHuge(head)) {
+ if (PageTransHuge(head)) {
/*
* Non anonymous thp exists only in allocation/free time. We
* can't handle such a case correctly, so let's give it up.
@@ -986,15 +1174,6 @@ static int __get_hwpoison_page(struct page *page)
return 0;
}
-/*
- * Safely get reference count of an arbitrary page.
- *
- * Returns 0 for a free page, 1 for an in-use page,
- * -EIO for a page-type we cannot handle and -EBUSY if we raced with an
- * allocation.
- * We only incremented refcount in case the page was already in-use and it
- * is a known type we can handle.
- */
static int get_any_page(struct page *p, unsigned long flags)
{
int ret = 0, pass = 0;
@@ -1004,50 +1183,77 @@ static int get_any_page(struct page *p, unsigned long flags)
count_increased = true;
try_again:
- if (!count_increased && !__get_hwpoison_page(p)) {
- if (page_count(p)) {
- /* We raced with an allocation, retry. */
- if (pass++ < 3)
- goto try_again;
- ret = -EBUSY;
- } else if (!PageHuge(p) && !is_free_buddy_page(p)) {
- /* We raced with put_page, retry. */
+ if (!count_increased) {
+ ret = __get_hwpoison_page(p);
+ if (!ret) {
+ if (page_count(p)) {
+ /* We raced with an allocation, retry. */
+ if (pass++ < 3)
+ goto try_again;
+ ret = -EBUSY;
+ } else if (!PageHuge(p) && !is_free_buddy_page(p)) {
+ /* We raced with put_page, retry. */
+ if (pass++ < 3)
+ goto try_again;
+ ret = -EIO;
+ }
+ goto out;
+ } else if (ret == -EBUSY) {
+ /* We raced with freeing huge page to buddy, retry. */
if (pass++ < 3)
goto try_again;
- ret = -EIO;
+ goto out;
}
+ }
+
+ if (PageHuge(p) || HWPoisonHandlable(p)) {
+ ret = 1;
} else {
- if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
- ret = 1;
- } else {
- /*
- * A page we cannot handle. Check whether we can turn
- * it into something we can handle.
- */
- if (pass++ < 3) {
- put_page(p);
- shake_page(p, 1);
- count_increased = false;
- goto try_again;
- }
+ /*
+ * A page we cannot handle. Check whether we can turn
+ * it into something we can handle.
+ */
+ if (pass++ < 3) {
put_page(p);
- ret = -EIO;
+ shake_page(p, 1);
+ count_increased = false;
+ goto try_again;
}
+ put_page(p);
+ ret = -EIO;
}
-
+out:
return ret;
}
-static int get_hwpoison_page(struct page *p, unsigned long flags,
- enum mf_flags ctxt)
+/**
+ * get_hwpoison_page() - Get refcount for memory error handling
+ * @p: Raw error page (hit by memory error)
+ * @flags: Flags controlling behavior of error handling
+ *
+ * get_hwpoison_page() takes a page refcount of an error page to handle memory
+ * error on it, after checking that the error page is in a well-defined state
+ * (defined as a page-type we can successfully handle the memor error on it,
+ * such as LRU page and hugetlb page).
+ *
+ * Memory error handling could be triggered at any time on any type of page,
+ * so it's prone to race with typical memory management lifecycle (like
+ * allocation and free). So to avoid such races, get_hwpoison_page() takes
+ * extra care for the error page's state (as done in __get_hwpoison_page()),
+ * and has some retry logic in get_any_page().
+ *
+ * Return: 0 on failure,
+ * 1 on success for in-use pages in a well-defined state,
+ * -EIO for pages on which we can not handle memory errors,
+ * -EBUSY when get_hwpoison_page() has raced with page lifecycle
+ * operations like allocation and free.
+ */
+static int get_hwpoison_page(struct page *p, unsigned long flags)
{
int ret;
zone_pcp_disable(page_zone(p));
- if (ctxt == MF_SOFT_OFFLINE)
- ret = get_any_page(p, flags);
- else
- ret = __get_hwpoison_page(p);
+ ret = get_any_page(p, flags);
zone_pcp_enable(page_zone(p));
return ret;
@@ -1060,10 +1266,10 @@ static int get_hwpoison_page(struct page *p, unsigned long flags,
static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
int flags, struct page **hpagep)
{
- enum ttu_flags ttu = TTU_IGNORE_MLOCK;
+ enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC;
struct address_space *mapping;
LIST_HEAD(tokill);
- bool unmap_success = true;
+ bool unmap_success;
int kill = 1, forcekill;
struct page *hpage = *hpagep;
bool mlocked = PageMlocked(hpage);
@@ -1126,7 +1332,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED);
if (!PageHuge(hpage)) {
- unmap_success = try_to_unmap(hpage, ttu);
+ try_to_unmap(hpage, ttu);
} else {
if (!PageAnon(hpage)) {
/*
@@ -1134,21 +1340,20 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
* could potentially call huge_pmd_unshare. Because of
* this, take semaphore in write mode here and set
* TTU_RMAP_LOCKED to indicate we have taken the lock
- * at this higer level.
+ * at this higher level.
*/
mapping = hugetlb_page_mapping_lock_write(hpage);
if (mapping) {
- unmap_success = try_to_unmap(hpage,
- ttu|TTU_RMAP_LOCKED);
+ try_to_unmap(hpage, ttu|TTU_RMAP_LOCKED);
i_mmap_unlock_write(mapping);
- } else {
+ } else
pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
- unmap_success = false;
- }
} else {
- unmap_success = try_to_unmap(hpage, ttu);
+ try_to_unmap(hpage, ttu);
}
}
+
+ unmap_success = !page_mapped(hpage);
if (!unmap_success)
pr_err("Memory failure: %#lx: failed to unmap page (mapcount=%d)\n",
pfn, page_mapcount(hpage));
@@ -1228,32 +1433,41 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
if (TestSetPageHWPoison(head)) {
pr_err("Memory failure: %#lx: already hardware poisoned\n",
pfn);
- return 0;
+ res = -EHWPOISON;
+ if (flags & MF_ACTION_REQUIRED)
+ res = kill_accessing_process(current, page_to_pfn(head), flags);
+ return res;
}
num_poisoned_pages_inc();
- if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
- /*
- * Check "filter hit" and "race with other subpage."
- */
- lock_page(head);
- if (PageHWPoison(head)) {
- if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
- || (p != head && TestSetPageHWPoison(head))) {
- num_poisoned_pages_dec();
- unlock_page(head);
- return 0;
+ if (!(flags & MF_COUNT_INCREASED)) {
+ res = get_hwpoison_page(p, flags);
+ if (!res) {
+ /*
+ * Check "filter hit" and "race with other subpage."
+ */
+ lock_page(head);
+ if (PageHWPoison(head)) {
+ if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
+ || (p != head && TestSetPageHWPoison(head))) {
+ num_poisoned_pages_dec();
+ unlock_page(head);
+ return 0;
+ }
}
+ unlock_page(head);
+ res = MF_FAILED;
+ if (__page_handle_poison(p)) {
+ page_ref_inc(p);
+ res = MF_RECOVERED;
+ }
+ action_result(pfn, MF_MSG_FREE_HUGE, res);
+ return res == MF_RECOVERED ? 0 : -EBUSY;
+ } else if (res < 0) {
+ action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
+ return -EBUSY;
}
- unlock_page(head);
- res = MF_FAILED;
- if (!dissolve_free_huge_page(p) && take_page_off_buddy(p)) {
- page_ref_inc(p);
- res = MF_RECOVERED;
- }
- action_result(pfn, MF_MSG_FREE_HUGE, res);
- return res == MF_RECOVERED ? 0 : -EBUSY;
}
lock_page(head);
@@ -1288,7 +1502,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
goto out;
}
- res = identify_page_state(pfn, p, page_flags);
+ return identify_page_state(pfn, p, page_flags);
out:
unlock_page(head);
return res;
@@ -1404,9 +1618,10 @@ int memory_failure(unsigned long pfn, int flags)
struct page *hpage;
struct page *orig_head;
struct dev_pagemap *pgmap;
- int res;
+ int res = 0;
unsigned long page_flags;
bool retry = true;
+ static DEFINE_MUTEX(mf_mutex);
if (!sysctl_memory_failure_recovery)
panic("Memory failure on page %lx", pfn);
@@ -1424,13 +1639,21 @@ int memory_failure(unsigned long pfn, int flags)
return -ENXIO;
}
+ mutex_lock(&mf_mutex);
+
try_again:
- if (PageHuge(p))
- return memory_failure_hugetlb(pfn, flags);
+ if (PageHuge(p)) {
+ res = memory_failure_hugetlb(pfn, flags);
+ goto unlock_mutex;
+ }
+
if (TestSetPageHWPoison(p)) {
pr_err("Memory failure: %#lx: already hardware poisoned\n",
pfn);
- return 0;
+ res = -EHWPOISON;
+ if (flags & MF_ACTION_REQUIRED)
+ res = kill_accessing_process(current, pfn, flags);
+ goto unlock_mutex;
}
orig_head = hpage = compound_head(p);
@@ -1447,33 +1670,42 @@ try_again:
* In fact it's dangerous to directly bump up page count from 0,
* that may make page_ref_freeze()/page_ref_unfreeze() mismatch.
*/
- if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
- if (is_free_buddy_page(p)) {
- if (take_page_off_buddy(p)) {
- page_ref_inc(p);
- res = MF_RECOVERED;
- } else {
- /* We lost the race, try again */
- if (retry) {
- ClearPageHWPoison(p);
- num_poisoned_pages_dec();
- retry = false;
- goto try_again;
+ if (!(flags & MF_COUNT_INCREASED)) {
+ res = get_hwpoison_page(p, flags);
+ if (!res) {
+ if (is_free_buddy_page(p)) {
+ if (take_page_off_buddy(p)) {
+ page_ref_inc(p);
+ res = MF_RECOVERED;
+ } else {
+ /* We lost the race, try again */
+ if (retry) {
+ ClearPageHWPoison(p);
+ num_poisoned_pages_dec();
+ retry = false;
+ goto try_again;
+ }
+ res = MF_FAILED;
}
- res = MF_FAILED;
+ action_result(pfn, MF_MSG_BUDDY, res);
+ res = res == MF_RECOVERED ? 0 : -EBUSY;
+ } else {
+ action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
+ res = -EBUSY;
}
- action_result(pfn, MF_MSG_BUDDY, res);
- return res == MF_RECOVERED ? 0 : -EBUSY;
- } else {
- action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
- return -EBUSY;
+ goto unlock_mutex;
+ } else if (res < 0) {
+ action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
+ res = -EBUSY;
+ goto unlock_mutex;
}
}
if (PageTransHuge(hpage)) {
if (try_to_split_thp_page(p, "Memory Failure") < 0) {
action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
- return -EBUSY;
+ res = -EBUSY;
+ goto unlock_mutex;
}
VM_BUG_ON_PAGE(!page_count(p), p);
}
@@ -1497,7 +1729,7 @@ try_again:
if (PageCompound(p) && compound_head(p) != orig_head) {
action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
res = -EBUSY;
- goto out;
+ goto unlock_page;
}
/*
@@ -1517,17 +1749,22 @@ try_again:
num_poisoned_pages_dec();
unlock_page(p);
put_page(p);
- return 0;
+ goto unlock_mutex;
}
if (hwpoison_filter(p)) {
if (TestClearPageHWPoison(p))
num_poisoned_pages_dec();
unlock_page(p);
put_page(p);
- return 0;
+ goto unlock_mutex;
}
- if (!PageTransTail(p) && !PageLRU(p))
+ /*
+ * __munlock_pagevec may clear a writeback page's LRU flag without
+ * page_lock. We need wait writeback completion for this page or it
+ * may trigger vfs BUG while evict inode.
+ */
+ if (!PageTransTail(p) && !PageLRU(p) && !PageWriteback(p))
goto identify_page_state;
/*
@@ -1543,7 +1780,7 @@ try_again:
if (!hwpoison_user_mappings(p, pfn, flags, &p)) {
action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
res = -EBUSY;
- goto out;
+ goto unlock_page;
}
/*
@@ -1552,13 +1789,17 @@ try_again:
if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
res = -EBUSY;
- goto out;
+ goto unlock_page;
}
identify_page_state:
res = identify_page_state(pfn, p, page_flags);
-out:
+ mutex_unlock(&mf_mutex);
+ return res;
+unlock_page:
unlock_page(p);
+unlock_mutex:
+ mutex_unlock(&mf_mutex);
return res;
}
EXPORT_SYMBOL_GPL(memory_failure);
@@ -1735,7 +1976,7 @@ int unpoison_memory(unsigned long pfn)
return 0;
}
- if (!get_hwpoison_page(p, flags, 0)) {
+ if (!get_hwpoison_page(p, flags)) {
if (TestClearPageHWPoison(p))
num_poisoned_pages_dec();
unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
@@ -1951,7 +2192,7 @@ int soft_offline_page(unsigned long pfn, int flags)
retry:
get_online_mems();
- ret = get_hwpoison_page(page, flags, MF_SOFT_OFFLINE);
+ ret = get_hwpoison_page(page, flags);
put_online_mems();
if (ret > 0) {