From 5d422301f97b821301efcdb6fc9d1a83a5c102d6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Mar 2013 16:57:48 -0400 Subject: NFSv4: Fail I/O if the state recovery fails irrevocably If state recovery fails with an ESTALE or a ENOENT, then we shouldn't keep retrying. Instead, mark the stateid as being invalid and fail the I/O with an EIO error. For other operations such as POSIX and BSD file locking, truncate etc, fail with an EBADF to indicate that this file descriptor is no longer valid. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6ace365c6334..fec1c5bb4863 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -699,6 +699,8 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) list_for_each_entry(state, &nfsi->open_states, inode_states) { if (state->owner != owner) continue; + if (!nfs4_valid_open_stateid(state)) + continue; if (atomic_inc_not_zero(&state->count)) return state; } @@ -1286,14 +1288,17 @@ static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_s return 1; } -void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state) +int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state) { struct nfs_client *clp = server->nfs_client; + if (!nfs4_valid_open_stateid(state)) + return -EBADF; nfs4_state_mark_reclaim_nograce(clp, state); dprintk("%s: scheduling stateid recovery for server %s\n", __func__, clp->cl_hostname); nfs4_schedule_state_manager(clp); + return 0; } EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); @@ -1323,6 +1328,11 @@ void nfs_inode_find_state_and_recover(struct inode *inode, nfs4_schedule_state_manager(clp); } +static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error) +{ + set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags); +} + static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) { @@ -1398,6 +1408,8 @@ restart: list_for_each_entry(state, &sp->so_states, open_states) { if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) continue; + if (!nfs4_valid_open_stateid(state)) + continue; if (state->state == 0) continue; atomic_inc(&state->count); @@ -1430,10 +1442,7 @@ restart: * Open state on this file cannot be recovered * All we can do is revert to using the zero stateid. */ - memset(&state->stateid, 0, - sizeof(state->stateid)); - /* Mark the file as being 'closed' */ - state->state = 0; + nfs4_state_mark_recovery_failed(state, status); break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: -- cgit v1.2.3 From c58c844187df61ef7cc103d0abb5dd6198bcfcd6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 Mar 2013 19:45:14 -0400 Subject: NFS: Don't accept more reads/writes if the open context recovery failed If the state recovery failed, we want to ensure that the application doesn't try to use the same file descriptor for more reads or writes. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index fec1c5bb4863..8db102c7add6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1328,9 +1328,25 @@ void nfs_inode_find_state_and_recover(struct inode *inode, nfs4_schedule_state_manager(clp); } +static void nfs4_state_mark_open_context_bad(struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *ctx; + + spin_lock(&inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + if (ctx->state != state) + continue; + set_bit(NFS_CONTEXT_BAD, &ctx->flags); + } + spin_unlock(&inode->i_lock); +} + static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error) { set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags); + nfs4_state_mark_open_context_bad(state); } -- cgit v1.2.3 From 5521abfdcf4d67c3441d4414f29e1acd7cc43380 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 16 Mar 2013 20:54:34 -0400 Subject: NFSv4: Resend the READ/WRITE RPC call if a stateid change causes an error Adds logic to ensure that if the server returns a BAD_STATEID, or other state related error, then we check if the stateid has already changed. If it has, then rather than start state recovery, we should just resend the failed RPC call with the new stateid. Allow nfs4_select_rw_stateid to notify that the stateid is unstable by having it return -EWOULDBLOCK if an RPC is underway that might change the stateid. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 8db102c7add6..4e95bd72f480 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -989,13 +989,14 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) return 0; } -static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, +static int nfs4_copy_lock_stateid(nfs4_stateid *dst, + struct nfs4_state *state, const struct nfs_lockowner *lockowner) { struct nfs4_lock_state *lsp; fl_owner_t fl_owner; pid_t fl_pid; - bool ret = false; + int ret = -ENOENT; if (lockowner == NULL) @@ -1010,7 +1011,10 @@ static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { nfs4_stateid_copy(dst, &lsp->ls_stateid); - ret = true; + ret = 0; + smp_rmb(); + if (!list_empty(&lsp->ls_seqid.list)) + ret = -EWOULDBLOCK; } spin_unlock(&state->state_lock); nfs4_put_lock_state(lsp); @@ -1018,28 +1022,38 @@ out: return ret; } -static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) +static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) { + int ret; int seq; do { seq = read_seqbegin(&state->seqlock); nfs4_stateid_copy(dst, &state->stateid); + ret = 0; + smp_rmb(); + if (!list_empty(&state->owner->so_seqid.list)) + ret = -EWOULDBLOCK; } while (read_seqretry(&state->seqlock, seq)); + return ret; } /* * Byte-range lock aware utility to initialize the stateid of read/write * requests. */ -void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, +int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, fmode_t fmode, const struct nfs_lockowner *lockowner) { + int ret = 0; if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) - return; - if (nfs4_copy_lock_stateid(dst, state, lockowner)) - return; - nfs4_copy_open_stateid(dst, state); + goto out; + ret = nfs4_copy_lock_stateid(dst, state, lockowner); + if (ret != -ENOENT) + goto out; + ret = nfs4_copy_open_stateid(dst, state); +out: + return ret; } struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask) -- cgit v1.2.3 From 3b66486c4c7136f8d4bbe1306d581fadc6bce4c7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Mar 2013 15:31:15 -0400 Subject: NFSv4.1: Select the "most recent locking state" for read/write/setattr stateids Follow the practice described in section 8.2.2 of RFC5661: When sending a read/write or setattr stateid, set the seqid field to zero in order to signal that the NFS server should apply the most recent locking state. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 4e95bd72f480..685b1e953ed8 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1053,6 +1053,8 @@ int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, goto out; ret = nfs4_copy_open_stateid(dst, state); out: + if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41)) + dst->seqid = 0; return ret; } -- cgit v1.2.3 From 91876b13b8b3cbff5cca8476d4b4eebe5f6038cc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 28 Mar 2013 14:01:33 -0400 Subject: NFSv4: Fix another reboot recovery race If the open_context for the file is not yet fully initialised, then open recovery cannot succeed, and since nfs4_state_find_open_context returns an ENOENT, we end up treating the file as being irrecoverable. What we really want to do, is just defer the recovery until later. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 685b1e953ed8..b924bdd69494 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1476,6 +1476,8 @@ restart: */ nfs4_state_mark_recovery_failed(state, status); break; + case -EAGAIN: + ssleep(1); case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_BAD_STATEID: -- cgit v1.2.3 From 4edaa308888b4bd629fa025cc6d5b2bf1a2a51db Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 16 Mar 2013 15:56:20 -0400 Subject: NFS: Use "krb5i" to establish NFSv4 state whenever possible Currently our client uses AUTH_UNIX for state management on Kerberos NFS mounts in some cases. For example, if the first mount of a server specifies "sec=sys," the SETCLIENTID operation is performed with AUTH_UNIX. Subsequent mounts using stronger security flavors can not change the flavor used for lease establishment. This might be less security than an administrator was expecting. Dave Noveck's migration issues draft recommends the use of an integrity-protecting security flavor for the SETCLIENTID operation. Let's ignore the mount's sec= setting and use krb5i as the default security flavor for SETCLIENTID. If our client can't establish a GSS context (eg. because it doesn't have a keytab or the server doesn't support Kerberos) we fall back to using AUTH_NULL. For an operation that requires a machine credential (which never represents a particular user) AUTH_NULL is as secure as AUTH_UNIX. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 37 +++++-------------------------------- 1 file changed, 5 insertions(+), 32 deletions(-) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index b924bdd69494..a30f51eb048f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1866,26 +1866,13 @@ int nfs4_discover_server_trunking(struct nfs_client *clp, { const struct nfs4_state_recovery_ops *ops = clp->cl_mvops->reboot_recovery_ops; - rpc_authflavor_t *flavors, flav, save; struct rpc_clnt *clnt; struct rpc_cred *cred; - int i, len, status; + int i, status; dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname); - len = NFS_MAX_SECFLAVORS; - flavors = kcalloc(len, sizeof(*flavors), GFP_KERNEL); - if (flavors == NULL) { - status = -ENOMEM; - goto out; - } - len = rpcauth_list_flavors(flavors, len); - if (len < 0) { - status = len; - goto out_free; - } clnt = clp->cl_rpcclient; - save = clnt->cl_auth->au_flavor; i = 0; mutex_lock(&nfs_clid_init_mutex); @@ -1900,12 +1887,6 @@ again: switch (status) { case 0: break; - - case -EACCES: - if (clp->cl_machine_cred == NULL) - break; - /* Handle case where the user hasn't set up machine creds */ - nfs4_clear_machine_cred(clp); case -NFS4ERR_DELAY: case -ETIMEDOUT: case -EAGAIN: @@ -1914,17 +1895,12 @@ again: dprintk("NFS: %s after status %d, retrying\n", __func__, status); goto again; - + case -EACCES: + if (i++) + break; case -NFS4ERR_CLID_INUSE: case -NFS4ERR_WRONGSEC: - status = -EPERM; - if (i >= len) - break; - - flav = flavors[i++]; - if (flav == save) - flav = flavors[i++]; - clnt = rpc_clone_client_set_auth(clnt, flav); + clnt = rpc_clone_client_set_auth(clnt, RPC_AUTH_NULL); if (IS_ERR(clnt)) { status = PTR_ERR(clnt); break; @@ -1944,9 +1920,6 @@ again: out_unlock: mutex_unlock(&nfs_clid_init_mutex); -out_free: - kfree(flavors); -out: dprintk("NFS: %s: status = %d\n", __func__, status); return status; } -- cgit v1.2.3 From ea33e6c3e79d23ef693b7ed3171ef90dddfbfc15 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Apr 2013 13:22:50 -0400 Subject: NFSv4: Fix issues in nfs4_discover_server_trunking - Ensure that we exit with ENOENT if the call to ops->get_clid_cred() fails. - Handle the case where ops->detect_trunking() exits with an unexpected error, and return EIO. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a30f51eb048f..209df6976a5b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1876,8 +1876,8 @@ int nfs4_discover_server_trunking(struct nfs_client *clp, i = 0; mutex_lock(&nfs_clid_init_mutex); - status = -ENOENT; again: + status = -ENOENT; cred = ops->get_clid_cred(clp); if (cred == NULL) goto out_unlock; @@ -1916,6 +1916,11 @@ again: case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery * in nfs4_exchange_id */ status = -EKEYEXPIRED; + break; + default: + pr_warn("NFS: %s unhandled error %d. Exiting with error EIO\n", + __func__, status); + status = -EIO; } out_unlock: -- cgit v1.2.3 From 845cbceb22c67030df76552892ad4935669bf2e5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Apr 2013 15:37:04 -0400 Subject: NFSv4: Don't clear the machine cred when client establish returns EACCES The expected behaviour is that the client will decide at mount time whether or not to use a krb5i machine cred, or AUTH_NULL. Signed-off-by: Trond Myklebust Cc: Chuck Lever Cc: Bryan Schumaker --- fs/nfs/nfs4state.c | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 209df6976a5b..c54600962bc6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -154,18 +154,6 @@ struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) return cred; } -static void nfs4_clear_machine_cred(struct nfs_client *clp) -{ - struct rpc_cred *cred; - - spin_lock(&clp->cl_lock); - cred = clp->cl_machine_cred; - clp->cl_machine_cred = NULL; - spin_unlock(&clp->cl_lock); - if (cred != NULL) - put_rpccred(cred); -} - static struct rpc_cred * nfs4_get_renew_cred_server_locked(struct nfs_server *server) { @@ -1768,10 +1756,6 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); return -EPERM; case -EACCES: - if (clp->cl_machine_cred == NULL) - return -EACCES; - /* Handle case where the user hasn't set up machine creds */ - nfs4_clear_machine_cred(clp); case -NFS4ERR_DELAY: case -ETIMEDOUT: case -EAGAIN: -- cgit v1.2.3 From bc7a05ca5156915a5aada26d64ee035fdd5e5d25 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Apr 2013 17:50:28 -0400 Subject: NFSv4: Handle timeouts correctly when probing for lease validity When we send a RENEW or SEQUENCE operation in order to probe if the lease is still valid, we want it to be able to time out since the lease we are probing is likely to time out too. Currently, because we use soft mount semantics for these RPC calls, the return value is EIO, which causes the state manager to exit with an "unhandled error" message. This patch changes the call semantics, so that the RPC layer returns ETIMEDOUT instead of EIO. We then have the state manager default to a simple retry instead of exiting. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index b924bdd69494..1eb17285c99a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1739,6 +1739,10 @@ static int nfs4_check_lease(struct nfs_client *clp) } status = ops->renew_lease(clp, cred); put_rpccred(cred); + if (status == -ETIMEDOUT) { + set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); + return 0; + } out: return nfs4_recovery_handle_error(clp, status); } -- cgit v1.2.3 From 92b40e93849e29f9ca661de6442bb66282738bf7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 20 Apr 2013 01:25:45 -0400 Subject: NFSv4: Use the open stateid if the delegation has the wrong mode Fix nfs4_select_rw_stateid() so that it chooses the open stateid (or an all-zero stateid) if the delegation does not match the selected read/write mode. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1eb17285c99a..b7796950eceb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1024,12 +1024,16 @@ out: static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) { + const nfs4_stateid *src; int ret; int seq; do { + src = &zero_stateid; seq = read_seqbegin(&state->seqlock); - nfs4_stateid_copy(dst, &state->stateid); + if (test_bit(NFS_OPEN_STATE, &state->flags)) + src = &state->open_stateid; + nfs4_stateid_copy(dst, src); ret = 0; smp_rmb(); if (!list_empty(&state->owner->so_seqid.list)) -- cgit v1.2.3 From 79d852bf5e7691dc78cc6322ecd1860c50940785 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 22 Apr 2013 15:42:48 -0400 Subject: NFS: Retry SETCLIENTID with AUTH_SYS instead of AUTH_NONE Recently I changed the SETCLIENTID code to use AUTH_GSS(krb5i), and then retry with AUTH_NONE if that didn't work. This was to enable Kerberos NFS mounts to work without forcing Linux NFS clients to have a keytab on hand. Rick Macklem reports that the FreeBSD server accepts AUTH_NONE only for NULL operations (thus certainly not for SETCLIENTID). Falling back to AUTH_NONE means our proposed 3.10 NFS client will not interoperate with FreeBSD servers over NFSv4 unless Kerberos is fully configured on both ends. If the Linux client falls back to using AUTH_SYS instead for SETCLIENTID, all should work fine as long as the NFS server is configured to allow AUTH_SYS for SETCLIENTID. This may still prevent access to Kerberos-only FreeBSD servers by Linux clients with no keytab. Rick is of the opinion that the security settings the server applies to its pseudo-fs should also apply to the SETCLIENTID operation. Linux and Solaris NFS servers do not place that limitation on SETCLIENTID. The security settings for the server's pseudo-fs are determined automatically as the union of security flavors allowed on real exports, as recommended by RFC 3530bis; and the flavors allowed for SETCLIENTID are all flavors supported by the respective server implementation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c54600962bc6..a75e712490f4 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1884,7 +1884,7 @@ again: break; case -NFS4ERR_CLID_INUSE: case -NFS4ERR_WRONGSEC: - clnt = rpc_clone_client_set_auth(clnt, RPC_AUTH_NULL); + clnt = rpc_clone_client_set_auth(clnt, RPC_AUTH_UNIX); if (IS_ERR(clnt)) { status = PTR_ERR(clnt); break; -- cgit v1.2.3