summaryrefslogtreecommitdiff
path: root/fs/nfs/write.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs/write.c')
-rw-r--r--fs/nfs/write.c339
1 files changed, 285 insertions, 54 deletions
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3ee5af4e738e..5e2f10304548 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_commit_ops;
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
static const struct nfs_rw_ops nfs_rw_write_ops;
+static void nfs_clear_request_commit(struct nfs_page *req);
static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
@@ -91,8 +92,15 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
}
+/*
+ * nfs_page_find_head_request_locked - find head request associated with @page
+ *
+ * must be called while holding the inode lock.
+ *
+ * returns matching head request with reference held, or NULL if not found.
+ */
static struct nfs_page *
-nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
+nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
{
struct nfs_page *req = NULL;
@@ -104,25 +112,33 @@ nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
/* Linearly search the commit list for the correct req */
list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
if (freq->wb_page == page) {
- req = freq;
+ req = freq->wb_head;
break;
}
}
}
- if (req)
+ if (req) {
+ WARN_ON_ONCE(req->wb_head != req);
+
kref_get(&req->wb_kref);
+ }
return req;
}
-static struct nfs_page *nfs_page_find_request(struct page *page)
+/*
+ * nfs_page_find_head_request - find head request associated with @page
+ *
+ * returns matching head request with reference held, or NULL if not found.
+ */
+static struct nfs_page *nfs_page_find_head_request(struct page *page)
{
struct inode *inode = page_file_mapping(page)->host;
struct nfs_page *req = NULL;
spin_lock(&inode->i_lock);
- req = nfs_page_find_request_locked(NFS_I(inode), page);
+ req = nfs_page_find_head_request_locked(NFS_I(inode), page);
spin_unlock(&inode->i_lock);
return req;
}
@@ -274,36 +290,246 @@ static void nfs_end_page_writeback(struct nfs_page *req)
clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
}
-static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
+
+/* nfs_page_group_clear_bits
+ * @req - an nfs request
+ * clears all page group related bits from @req
+ */
+static void
+nfs_page_group_clear_bits(struct nfs_page *req)
+{
+ clear_bit(PG_TEARDOWN, &req->wb_flags);
+ clear_bit(PG_UNLOCKPAGE, &req->wb_flags);
+ clear_bit(PG_UPTODATE, &req->wb_flags);
+ clear_bit(PG_WB_END, &req->wb_flags);
+ clear_bit(PG_REMOVE, &req->wb_flags);
+}
+
+
+/*
+ * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req
+ *
+ * this is a helper function for nfs_lock_and_join_requests
+ *
+ * @inode - inode associated with request page group, must be holding inode lock
+ * @head - head request of page group, must be holding head lock
+ * @req - request that couldn't lock and needs to wait on the req bit lock
+ * @nonblock - if true, don't actually wait
+ *
+ * NOTE: this must be called holding page_group bit lock and inode spin lock
+ * and BOTH will be released before returning.
+ *
+ * returns 0 on success, < 0 on error.
+ */
+static int
+nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
+ struct nfs_page *req, bool nonblock)
+ __releases(&inode->i_lock)
+{
+ struct nfs_page *tmp;
+ int ret;
+
+ /* relinquish all the locks successfully grabbed this run */
+ for (tmp = head ; tmp != req; tmp = tmp->wb_this_page)
+ nfs_unlock_request(tmp);
+
+ WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
+
+ /* grab a ref on the request that will be waited on */
+ kref_get(&req->wb_kref);
+
+ nfs_page_group_unlock(head);
+ spin_unlock(&inode->i_lock);
+
+ /* release ref from nfs_page_find_head_request_locked */
+ nfs_release_request(head);
+
+ if (!nonblock)
+ ret = nfs_wait_on_request(req);
+ else
+ ret = -EAGAIN;
+ nfs_release_request(req);
+
+ return ret;
+}
+
+/*
+ * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
+ *
+ * @destroy_list - request list (using wb_this_page) terminated by @old_head
+ * @old_head - the old head of the list
+ *
+ * All subrequests must be locked and removed from all lists, so at this point
+ * they are only "active" in this function, and possibly in nfs_wait_on_request
+ * with a reference held by some other context.
+ */
+static void
+nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
+ struct nfs_page *old_head)
+{
+ while (destroy_list) {
+ struct nfs_page *subreq = destroy_list;
+
+ destroy_list = (subreq->wb_this_page == old_head) ?
+ NULL : subreq->wb_this_page;
+
+ WARN_ON_ONCE(old_head != subreq->wb_head);
+
+ /* make sure old group is not used */
+ subreq->wb_head = subreq;
+ subreq->wb_this_page = subreq;
+
+ nfs_clear_request_commit(subreq);
+
+ /* subreq is now totally disconnected from page group or any
+ * write / commit lists. last chance to wake any waiters */
+ nfs_unlock_request(subreq);
+
+ if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) {
+ /* release ref on old head request */
+ nfs_release_request(old_head);
+
+ nfs_page_group_clear_bits(subreq);
+
+ /* release the PG_INODE_REF reference */
+ if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))
+ nfs_release_request(subreq);
+ else
+ WARN_ON_ONCE(1);
+ } else {
+ WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));
+ /* zombie requests have already released the last
+ * reference and were waiting on the rest of the
+ * group to complete. Since it's no longer part of a
+ * group, simply free the request */
+ nfs_page_group_clear_bits(subreq);
+ nfs_free_request(subreq);
+ }
+ }
+}
+
+/*
+ * nfs_lock_and_join_requests - join all subreqs to the head req and return
+ * a locked reference, cancelling any pending
+ * operations for this page.
+ *
+ * @page - the page used to lookup the "page group" of nfs_page structures
+ * @nonblock - if true, don't block waiting for request locks
+ *
+ * This function joins all sub requests to the head request by first
+ * locking all requests in the group, cancelling any pending operations
+ * and finally updating the head request to cover the whole range covered by
+ * the (former) group. All subrequests are removed from any write or commit
+ * lists, unlinked from the group and destroyed.
+ *
+ * Returns a locked, referenced pointer to the head request - which after
+ * this call is guaranteed to be the only request associated with the page.
+ * Returns NULL if no requests are found for @page, or a ERR_PTR if an
+ * error was encountered.
+ */
+static struct nfs_page *
+nfs_lock_and_join_requests(struct page *page, bool nonblock)
{
struct inode *inode = page_file_mapping(page)->host;
- struct nfs_page *req;
+ struct nfs_page *head, *subreq;
+ struct nfs_page *destroy_list = NULL;
+ unsigned int total_bytes;
int ret;
+try_again:
+ total_bytes = 0;
+
+ WARN_ON_ONCE(destroy_list);
+
spin_lock(&inode->i_lock);
- for (;;) {
- req = nfs_page_find_request_locked(NFS_I(inode), page);
- if (req == NULL)
- break;
- if (nfs_lock_request(req))
- break;
- /* Note: If we hold the page lock, as is the case in nfs_writepage,
- * then the call to nfs_lock_request() will always
- * succeed provided that someone hasn't already marked the
- * request as dirty (in which case we don't care).
- */
+
+ /*
+ * A reference is taken only on the head request which acts as a
+ * reference to the whole page group - the group will not be destroyed
+ * until the head reference is released.
+ */
+ head = nfs_page_find_head_request_locked(NFS_I(inode), page);
+
+ if (!head) {
spin_unlock(&inode->i_lock);
- if (!nonblock)
- ret = nfs_wait_on_request(req);
- else
- ret = -EAGAIN;
- nfs_release_request(req);
- if (ret != 0)
+ return NULL;
+ }
+
+ /* lock each request in the page group */
+ nfs_page_group_lock(head);
+ subreq = head;
+ do {
+ /*
+ * Subrequests are always contiguous, non overlapping
+ * and in order. If not, it's a programming error.
+ */
+ WARN_ON_ONCE(subreq->wb_offset !=
+ (head->wb_offset + total_bytes));
+
+ /* keep track of how many bytes this group covers */
+ total_bytes += subreq->wb_bytes;
+
+ if (!nfs_lock_request(subreq)) {
+ /* releases page group bit lock and
+ * inode spin lock and all references */
+ ret = nfs_unroll_locks_and_wait(inode, head,
+ subreq, nonblock);
+
+ if (ret == 0)
+ goto try_again;
+
return ERR_PTR(ret);
- spin_lock(&inode->i_lock);
+ }
+
+ subreq = subreq->wb_this_page;
+ } while (subreq != head);
+
+ /* Now that all requests are locked, make sure they aren't on any list.
+ * Commit list removal accounting is done after locks are dropped */
+ subreq = head;
+ do {
+ nfs_list_remove_request(subreq);
+ subreq = subreq->wb_this_page;
+ } while (subreq != head);
+
+ /* unlink subrequests from head, destroy them later */
+ if (head->wb_this_page != head) {
+ /* destroy list will be terminated by head */
+ destroy_list = head->wb_this_page;
+ head->wb_this_page = head;
+
+ /* change head request to cover whole range that
+ * the former page group covered */
+ head->wb_bytes = total_bytes;
}
+
+ /*
+ * prepare head request to be added to new pgio descriptor
+ */
+ nfs_page_group_clear_bits(head);
+
+ /*
+ * some part of the group was still on the inode list - otherwise
+ * the group wouldn't be involved in async write.
+ * grab a reference for the head request, iff it needs one.
+ */
+ if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))
+ kref_get(&head->wb_kref);
+
+ nfs_page_group_unlock(head);
+
+ /* drop lock to clear_request_commit the head req and clean up
+ * requests on destroy list */
spin_unlock(&inode->i_lock);
- return req;
+
+ nfs_destroy_unlinked_subrequests(destroy_list, head);
+
+ /* clean up commit list state */
+ nfs_clear_request_commit(head);
+
+ /* still holds ref on head from nfs_page_find_head_request_locked
+ * and still has lock on head from lock loop */
+ return head;
}
/*
@@ -316,7 +542,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req;
int ret = 0;
- req = nfs_find_and_lock_request(page, nonblock);
+ req = nfs_lock_and_join_requests(page, nonblock);
if (!req)
goto out;
ret = PTR_ERR(req);
@@ -448,7 +674,9 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
set_page_private(req->wb_page, (unsigned long)req);
}
nfsi->npages++;
- set_bit(PG_INODE_REF, &req->wb_flags);
+ /* this a head request for a page group - mark it as having an
+ * extra reference so sub groups can follow suit */
+ WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
kref_get(&req->wb_kref);
spin_unlock(&inode->i_lock);
}
@@ -474,7 +702,9 @@ static void nfs_inode_remove_request(struct nfs_page *req)
nfsi->npages--;
spin_unlock(&inode->i_lock);
}
- nfs_release_request(req);
+
+ if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
+ nfs_release_request(req);
}
static void
@@ -638,7 +868,6 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_commit_info cinfo;
unsigned long bytes = 0;
- bool do_destroy;
if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
goto out;
@@ -668,7 +897,6 @@ remove_req:
next:
nfs_unlock_request(req);
nfs_end_page_writeback(req);
- do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
nfs_release_request(req);
}
out:
@@ -769,7 +997,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
spin_lock(&inode->i_lock);
for (;;) {
- req = nfs_page_find_request_locked(NFS_I(inode), page);
+ req = nfs_page_find_head_request_locked(NFS_I(inode), page);
if (req == NULL)
goto out_unlock;
@@ -877,7 +1105,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
* dropped page.
*/
do {
- req = nfs_page_find_request(page);
+ req = nfs_page_find_head_request(page);
if (req == NULL)
return 0;
l_ctx = req->wb_lock_context;
@@ -934,12 +1162,14 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
if (nfs_have_delegated_attributes(inode))
goto out;
- if (nfsi->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE))
+ if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
return false;
smp_rmb();
if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
return false;
out:
+ if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+ return false;
return PageUptodate(page) != 0;
}
@@ -1567,27 +1797,28 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
struct nfs_page *req;
int ret = 0;
- for (;;) {
- wait_on_page_writeback(page);
- req = nfs_page_find_request(page);
- if (req == NULL)
- break;
- if (nfs_lock_request(req)) {
- nfs_clear_request_commit(req);
- nfs_inode_remove_request(req);
- /*
- * In case nfs_inode_remove_request has marked the
- * page as being dirty
- */
- cancel_dirty_page(page, PAGE_CACHE_SIZE);
- nfs_unlock_and_release_request(req);
- break;
- }
- ret = nfs_wait_on_request(req);
- nfs_release_request(req);
- if (ret < 0)
- break;
+ wait_on_page_writeback(page);
+
+ /* blocking call to cancel all requests and join to a single (head)
+ * request */
+ req = nfs_lock_and_join_requests(page, false);
+
+ if (IS_ERR(req)) {
+ ret = PTR_ERR(req);
+ } else if (req) {
+ /* all requests from this page have been cancelled by
+ * nfs_lock_and_join_requests, so just remove the head
+ * request from the inode / page_private pointer and
+ * release it */
+ nfs_inode_remove_request(req);
+ /*
+ * In case nfs_inode_remove_request has marked the
+ * page as being dirty
+ */
+ cancel_dirty_page(page, PAGE_CACHE_SIZE);
+ nfs_unlock_and_release_request(req);
}
+
return ret;
}