From b92dccf65bab3b6b7deb79ff3321dc256eb0f53b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:03 -0500 Subject: NFS: Fix a busy inodes issue... The nfs_open_context may live longer than the file descriptor that spawned it, so it needs to carry a reference to the vfsmount. If not, then generic_shutdown_super() may end up being called before reads and writes have been flushed out. Make a couple of functions static while we're at it... Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b4dc6e2e10c9..1161725d75e1 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -78,6 +78,7 @@ struct nfs_access_entry { struct nfs4_state; struct nfs_open_context { atomic_t count; + struct vfsmount *vfsmnt; struct dentry *dentry; struct rpc_cred *cred; struct nfs4_state *state; @@ -311,12 +312,9 @@ extern void nfs_begin_attr_update(struct inode *); extern void nfs_end_attr_update(struct inode *); extern void nfs_begin_data_update(struct inode *); extern void nfs_end_data_update(struct inode *); -extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx); -extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); -extern void nfs_file_clear_open_context(struct file *filp); /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ extern u32 root_nfs_parse_addr(char *name); /*__init*/ -- cgit v1.2.3 From 7bab377fcb495ee2e5a1cd69d235f8d84c76e3af Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:06 -0500 Subject: lockd: Don't expose the process pid to the NLM server Instead we use the nlm_lockowner->pid. Signed-off-by: Trond Myklebust --- include/linux/lockd/xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index d7a5cc4cfa97..bb0a0f1caa91 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -28,6 +28,7 @@ struct nlm_lock { int len; /* length of "caller" */ struct nfs_fh fh; struct xdr_netobj oh; + u32 svid; struct file_lock fl; }; -- cgit v1.2.3 From 24c5d9d7ea5a64fb5f157d17aa2c67a3300f8a08 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:08 -0500 Subject: SUNRPC: Run rpci->queue_timeout on the rpciod workqueue instead of generic Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 8b25629accd8..a390c9b8a01e 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -276,6 +276,7 @@ void rpc_show_tasks(void); #endif int rpc_init_mempool(void); void rpc_destroy_mempool(void); +extern struct workqueue_struct *rpciod_workqueue; static inline void rpc_exit(struct rpc_task *task, int status) { -- cgit v1.2.3 From 24bd68f46b1ad08d69bf32779f860df867780a7a Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Mon, 20 Mar 2006 13:44:11 -0500 Subject: NFS: Code comments update in NFS read_cache_mtime is no longer used in nfs_inode. This patch removes references of read_cache_mtime in the code comments. Signed-off-by: Goldwyn Rodrigues Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1161725d75e1..b71da4d4b137 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -119,8 +119,7 @@ struct nfs_inode { unsigned long cache_validity; /* bit mask */ /* - * read_cache_jiffies is when we started read-caching this inode, - * and read_cache_mtime is the mtime of the inode at that time. + * read_cache_jiffies is when we started read-caching this inode. * attrtimeo is for how long the cached information is assumed * to be valid. A successful attribute revalidation doubles * attrtimeo (up to acregmax/acdirmax), a failure resets it to @@ -129,11 +128,6 @@ struct nfs_inode { * We need to revalidate the cached attrs for this inode if * * jiffies - read_cache_jiffies > attrtimeo - * - * and invalidate any cached data/flush out any dirty pages if - * we find that - * - * mtime != read_cache_mtime */ unsigned long read_cache_jiffies; unsigned long attrtimeo; -- cgit v1.2.3 From b4629fe2f094b719847f31be1ee5ab38300038b2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:12 -0500 Subject: VFS: New /proc file /proc/self/mountstats Create a new file under /proc/self, called mountstats, where mounted file systems can export information (configuration options, performance counters, and so on). Use a mechanism similar to /proc/mounts and s_ops->show_options. This mechanism does not violate namespace security, and is safe to use while other processes are unmounting file systems. Thanks to Mike Waychison for his review and comments. Test-plan: Test concurrent mount/unmount operations while cat'ing /proc/self/mountstats. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 128d0082522c..be21e860a9f2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1086,6 +1086,7 @@ struct super_operations { void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct vfsmount *); + int (*show_stats)(struct seq_file *, struct vfsmount *); ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); -- cgit v1.2.3 From 7a480e250c7ca9187275d8574ae9e48a6b602cb9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:12 -0500 Subject: NFS: show retransmit settings when displaying mount options Sometimes it's important to know the exact RPC retransmit settings the kernel is using for an NFS mount point. Add this facility to the NFS client's show_options method. Test plan: Set various retransmit settings via the mount command, and check that the settings are reflected in /proc/mounts. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 3d3a305488cf..a522ab97358d 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -26,6 +26,8 @@ struct nfs_server { unsigned int acregmax; unsigned int acdirmin; unsigned int acdirmax; + unsigned long retrans_timeo; /* retransmit timeout */ + unsigned int retrans_count; /* number of retransmit tries */ unsigned int namelen; char * hostname; /* remote hostname */ struct nfs_fh fh; -- cgit v1.2.3 From d9ef5a8c26aab09762afce43df64736720b4860e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:13 -0500 Subject: NFS: introduce mechanism for tracking NFS client metrics Add a per-superblock performance counter facility to the NFS client. This facility mimics the counters available for block devices and for networking. Expose these new counters via the new /proc/self/mountstats interface. Thanks to Andrew Morton and Trond Myklebust for their review and comments. Test plan: fsx and iozone on UP and SMP systems, with and without pre-emption. Watch for memory overwrite bugs, and performance loss (significantly more CPU required per op). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a522ab97358d..d65e69a06b72 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -4,6 +4,8 @@ #include #include +struct nfs_iostats; + /* * NFS client parameters stored in the superblock. */ @@ -12,6 +14,7 @@ struct nfs_server { struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ + struct nfs_iostats * io_stats; /* I/O statistics */ struct backing_dev_info backing_dev_info; int flags; /* various flags */ unsigned int caps; /* server capabilities */ -- cgit v1.2.3 From 67ec9f46b889bfb1ab0a4e307d53929d5f0692bf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:15 -0500 Subject: NFS: report how long an NFS file system has been mounted Add a field in nfs_server to record a timestamp when a mount succeeds. Report the number of seconds the file system has been mounted via nfs_show_stats(). Test plan: Mount an NFS file system, watch the mountstats reports and compare with clock time. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d65e69a06b72..65dec21af774 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -35,6 +35,7 @@ struct nfs_server { char * hostname; /* remote hostname */ struct nfs_fh fh; struct sockaddr_in addr; + unsigned long mount_time; /* when this fs was mounted */ #ifdef CONFIG_NFS_V4 /* Our own IP address, as a null-terminated string. * This is used to generate the clientid, and the callback address. -- cgit v1.2.3 From e19b63dafdf7d615b0d36b90990a07e7792b9d3a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:15 -0500 Subject: SUNRPC: track length of RPC wait queues RPC wait queue length will eventually be exported to userland via the RPC iostats interface. Test plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index a390c9b8a01e..6c23f73a799a 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -203,6 +203,7 @@ struct rpc_wait_queue { unsigned char priority; /* current priority */ unsigned char count; /* # task groups remaining serviced so far */ unsigned char nr; /* # tasks remaining for cookie */ + unsigned short qlen; /* total # tasks waiting in queue */ #ifdef RPC_DEBUG const char * name; #endif -- cgit v1.2.3 From 262ca07de4d7f1bff20361c1353bb14b3607afb2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:16 -0500 Subject: SUNRPC: add a handful of per-xprt counters Monitor generic transport events. Add a transport switch callout to format transport counters for export to user-land. Test plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 6ef99b14ff09..7eebbab7160b 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -114,6 +114,7 @@ struct rpc_xprt_ops { void (*release_request)(struct rpc_task *task); void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); + void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); }; struct rpc_xprt { @@ -187,6 +188,18 @@ struct rpc_xprt { struct list_head recv; + struct { + unsigned long bind_count, /* total number of binds */ + connect_count, /* total number of connects */ + connect_start, /* connect start timestamp */ + connect_time, /* jiffies waiting for connect */ + sends, /* how many complete requests */ + recvs, /* how many complete requests */ + bad_xids; /* lookup_rqst didn't find XID */ + + unsigned long long req_u, /* average requests on the wire */ + bklog_u; /* backlog queue utilization */ + } stat; void (*old_data_ready)(struct sock *, int); void (*old_state_change)(struct sock *); -- cgit v1.2.3 From ef759a2e54ed434b2f72b52a14edecd6d4eadf74 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:17 -0500 Subject: SUNRPC: introduce per-task RPC iostats Account for various things that occur while an RPC task is executed. Separate timers for RPC round trip and RPC execution time show how long RPC requests wait in queue before being sent. Eventually these will be accumulated at xprt_release time in one place where they can be viewed from userland. Test plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 6c23f73a799a..45a64ae963ee 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -86,6 +86,12 @@ struct rpc_task { struct work_struct tk_work; /* Async task work queue */ struct rpc_wait tk_wait; /* RPC wait */ } u; + + unsigned short tk_timeouts; /* maj timeouts */ + size_t tk_bytes_sent; /* total bytes sent */ + unsigned long tk_start; /* RPC task init timestamp */ + long tk_rtt; /* round-trip time (jiffies) */ + #ifdef RPC_DEBUG unsigned short tk_pid; /* debugging aid */ #endif -- cgit v1.2.3 From 11c556b3d8d481829ab5f9933a25d29b00913b5a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:22 -0500 Subject: SUNRPC: provide a mechanism for collecting stats in the RPC client Add a simple mechanism for collecting stats in the RPC client. Stats are tabulated during xprt_release. Note that per_cpu shenanigans are not required here because the RPC client already serializes on the transport write lock. Test plan: Compile kernel with CONFIG_NFS enabled. Basic performance regression testing with high-speed networking and high performance server. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 3 +- include/linux/sunrpc/metrics.h | 77 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 include/linux/sunrpc/metrics.h (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index f147e6b84332..0f3662002ffc 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -45,7 +45,8 @@ struct rpc_clnt { char * cl_server; /* server machine name */ char * cl_protname; /* protocol name */ struct rpc_auth * cl_auth; /* authenticator */ - struct rpc_stat * cl_stats; /* statistics */ + struct rpc_stat * cl_stats; /* per-program statistics */ + struct rpc_iostats * cl_metrics; /* per-client statistics */ unsigned int cl_softrtry : 1,/* soft timeouts */ cl_intr : 1,/* interruptible */ diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h new file mode 100644 index 000000000000..8f96e9dc369a --- /dev/null +++ b/include/linux/sunrpc/metrics.h @@ -0,0 +1,77 @@ +/* + * linux/include/linux/sunrpc/metrics.h + * + * Declarations for RPC client per-operation metrics + * + * Copyright (C) 2005 Chuck Lever + * + * RPC client per-operation statistics provide latency and retry + * information about each type of RPC procedure in a given RPC program. + * These statistics are not for detailed problem diagnosis, but simply + * to indicate whether the problem is local or remote. + * + * These counters are not meant to be human-readable, but are meant to be + * integrated into system monitoring tools such as "sar" and "iostat". As + * such, the counters are sampled by the tools over time, and are never + * zeroed after a file system is mounted. Moving averages can be computed + * by the tools by taking the difference between two instantaneous samples + * and dividing that by the time between the samples. + * + * The counters are maintained in a single array per RPC client, indexed + * by procedure number. There is no need to maintain separate counter + * arrays per-CPU because these counters are always modified behind locks. + */ + +#ifndef _LINUX_SUNRPC_METRICS_H +#define _LINUX_SUNRPC_METRICS_H + +#include + +#define RPC_IOSTATS_VERS "1.0" + +struct rpc_iostats { + /* + * These counters give an idea about how many request + * transmissions are required, on average, to complete that + * particular procedure. Some procedures may require more + * than one transmission because the server is unresponsive, + * the client is retransmitting too aggressively, or the + * requests are large and the network is congested. + */ + unsigned long om_ops, /* count of operations */ + om_ntrans, /* count of RPC transmissions */ + om_timeouts; /* count of major timeouts */ + + /* + * These count how many bytes are sent and received for a + * given RPC procedure type. This indicates how much load a + * particular procedure is putting on the network. These + * counts include the RPC and ULP headers, and the request + * payload. + */ + unsigned long long om_bytes_sent, /* count of bytes out */ + om_bytes_recv; /* count of bytes in */ + + /* + * The length of time an RPC request waits in queue before + * transmission, the network + server latency of the request, + * and the total time the request spent from init to release + * are measured. + */ + unsigned long long om_queue, /* jiffies queued for xmit */ + om_rtt, /* jiffies for RPC RTT */ + om_execute; /* jiffies for RPC execution */ +} ____cacheline_aligned; + +struct rpc_task; +struct rpc_clnt; + +/* + * EXPORTed functions for managing rpc_iostats structures + */ +struct rpc_iostats * rpc_alloc_iostats(struct rpc_clnt *); +void rpc_count_iostats(struct rpc_task *); +void rpc_print_iostats(struct seq_file *, struct rpc_clnt *); +void rpc_free_iostats(struct rpc_iostats *); + +#endif /* _LINUX_SUNRPC_METRICS_H */ -- cgit v1.2.3 From cc0175c1dc1de8f6af0eb0631dcc5b999a6fcc42 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:22 -0500 Subject: SUNRPC: display human-readable procedure name in rpc_iostats output Add fields to the rpc_procinfo struct that allow the display of a human-readable name for each procedure in the rpc_iostats output. Also fix it so that the NFSv4 stats are broken up correctly by sub-procedure number. NFSv4 uses only two real RPC procedures: NULL, and COMPOUND. Test plan: Mount with NFSv2, NFSv3, and NFSv4, and do "cat /proc/self/mountstats". Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 0f3662002ffc..3bec751ee249 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -101,6 +101,8 @@ struct rpc_procinfo { unsigned int p_bufsiz; /* req. buffer size */ unsigned int p_count; /* call count */ unsigned int p_timer; /* Which RTT timer to use */ + u32 p_statidx; /* Which procedure to account */ + char * p_name; /* name of procedure */ }; #define RPC_CONGESTED(clnt) (RPCXPRT_CONGESTED((clnt)->cl_xprt)) -- cgit v1.2.3 From dead28da8e3fb32601d38fb32b7021122e0a3d21 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:23 -0500 Subject: SUNRPC: eliminate rpc_call() Clean-up: replace rpc_call() helper with direct call to rpc_call_sync. This makes NFSv2 and NFSv3 synchronous calls more computationally efficient, and reduces stack consumption in functions that used to invoke rpc_call more than once. Test plan: Compile kernel with CONFIG_NFS enabled. Connectathon on NFS version 2, version 3, and version 4 mount points. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 14 -------------- include/linux/sunrpc/sched.h | 1 - 2 files changed, 15 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 3bec751ee249..e37c06128e51 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -140,20 +140,6 @@ size_t rpc_max_payload(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); int rpc_ping(struct rpc_clnt *clnt, int flags); -static __inline__ -int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) -{ - struct rpc_message msg = { - .rpc_proc = &clnt->cl_procinfo[proc], - .rpc_argp = argp, - .rpc_resp = resp, - .rpc_cred = NULL - }; - return rpc_call_sync(clnt, &msg, flags); -} - -extern void rpciod_wake_up(void); - /* * Helper function for NFSroot support */ diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 45a64ae963ee..82a91bb22362 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -276,7 +276,6 @@ void * rpc_malloc(struct rpc_task *, size_t); void rpc_free(struct rpc_task *); int rpciod_up(void); void rpciod_down(void); -void rpciod_wake_up(void); int __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *)); #ifdef RPC_DEBUG void rpc_show_tasks(void); -- cgit v1.2.3 From 2e0af86f618c697b44e2d67dff151256c58201c4 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 20 Mar 2006 13:44:26 -0500 Subject: locks: remove unused posix_block_lock posix_lock_file() is used to add a blocked lock to Lockd's block, so posix_block_lock() is no longer needed. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index be21e860a9f2..b01482c721ae 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -757,7 +757,6 @@ extern void locks_remove_flock(struct file *); extern struct file_lock *posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); -extern void posix_block_lock(struct file_lock *, struct file_lock *); extern int posix_unblock_lock(struct file *, struct file_lock *); extern int posix_locks_deadlock(struct file_lock *, struct file_lock *); extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); -- cgit v1.2.3 From 8dc7c3115b611c00006eac3ee5b108296432aab7 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 20 Mar 2006 13:44:26 -0500 Subject: locks,lockd: fix race in nlmsvc_testlock posix_test_lock() returns a pointer to a struct file_lock which is unprotected and can be removed while in use by the caller. Move the conflicting lock from the return to a parameter, and copy the conflicting lock. In most cases the caller ends up putting the copy of the conflicting lock on the stack. On i386, sizeof(struct file_lock) appears to be about 100 bytes. We're assuming that's reasonable. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index b01482c721ae..8ef4dd788a83 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -754,7 +754,7 @@ extern void locks_init_lock(struct file_lock *); extern void locks_copy_lock(struct file_lock *, struct file_lock *); extern void locks_remove_posix(struct file *, fl_owner_t); extern void locks_remove_flock(struct file *); -extern struct file_lock *posix_test_lock(struct file *, struct file_lock *); +extern int posix_test_lock(struct file *, struct file_lock *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file *, struct file_lock *); -- cgit v1.2.3 From 7117bf3dfb10b534a017260d9fc643bc1d0afd2a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Mar 2006 13:44:26 -0500 Subject: lockd: Remove FL_LOCKD flag Currently lockd identifies its own locks using the FL_LOCKD flag. This doesn't scale well to multiple lock managers--if we did this in nfsv4 too, for example, we'd be left with only one free flag bit. Instead, we just check whether the file manager ops (fl_lmops) set on this lock are our own. The only use for this is in nlm_traverse_locks, which uses it to find locks that need cleaning up when freeing a host or a file. In the long run it might be nice to do reference counting instead of traversing all the locks like this.... Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8ef4dd788a83..d2cffee8fc11 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -667,7 +667,6 @@ extern spinlock_t files_lock; #define FL_POSIX 1 #define FL_FLOCK 2 #define FL_ACCESS 8 /* not trying to lock, just looking */ -#define FL_LOCKD 16 /* lock held by rpc.lockd */ #define FL_LEASE 32 /* lease held on this file */ #define FL_SLEEP 128 /* A blocking lock */ -- cgit v1.2.3 From 788e7a89a03e364855583c0ab4649b94925efbb9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:27 -0500 Subject: NFS: Cleanup of NFS write code in preparation for asynchronous o_direct This patch inverts the callback hierarchy for NFS write calls. Instead of having the NFSv2/v3/v4-specific code set up the RPC callback ops, we allow the original caller to do so. This allows for more flexibility w.r.t. how to set up and tear down the nfs_write_data structure while still allowing the NFSv3/v4 code to perform error handling. The greater flexibility is needed by the asynchronous O_DIRECT code, which wants to be able to hold on to the original nfs_write_data structures after the WRITE RPC call has completed in order to be able to replay them if the COMMIT call determines that the server has rebooted. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 7 ------- include/linux/nfs_xdr.h | 3 ++- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b71da4d4b137..782e59765696 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -407,13 +407,6 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc); extern int nfs_writepages(struct address_space *, struct writeback_control *); extern int nfs_flush_incompatible(struct file *file, struct page *page); extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); -extern void nfs_writeback_done(struct rpc_task *task, void *data); -extern void nfs_writedata_release(void *data); - -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -extern void nfs_commit_done(struct rpc_task *, void *data); -extern void nfs_commit_release(void *data); -#endif /* * Try to write back everything synchronously (but check the diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6d6f69ec5675..277750cc70c0 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -714,7 +714,6 @@ struct nfs_write_data { #ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ #endif - void (*complete) (struct nfs_write_data *, int); struct page *page_array[NFS_PAGEVEC_SIZE + 1]; }; @@ -770,7 +769,9 @@ struct nfs_rpc_ops { u32 * (*decode_dirent)(u32 *, struct nfs_entry *, int plus); void (*read_setup) (struct nfs_read_data *); void (*write_setup) (struct nfs_write_data *, int how); + int (*write_done) (struct rpc_task *, struct nfs_write_data *); void (*commit_setup) (struct nfs_write_data *, int how); + int (*commit_done) (struct rpc_task *, struct nfs_write_data *); int (*file_open) (struct inode *, struct file *); int (*file_release) (struct inode *, struct file *); int (*lock)(struct file *, int, struct file_lock *); -- cgit v1.2.3 From ec06c096edec0755534c7126f4caded69de131c2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:27 -0500 Subject: NFS: Cleanup of NFS read code Same callback hierarchy inversion as for the NFS write calls. This patch is not strictly speaking needed by the O_DIRECT code, but avoids confusing differences between the asynchronous read and write code. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 4 ++-- include/linux/nfs_xdr.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 782e59765696..f55827be4f8e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -492,8 +492,8 @@ static inline void nfs_writedata_free(struct nfs_write_data *p) extern int nfs_readpage(struct file *, struct page *); extern int nfs_readpages(struct file *, struct address_space *, struct list_head *, unsigned); -extern void nfs_readpage_result(struct rpc_task *, void *); -extern void nfs_readdata_release(void *data); +extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); +extern void nfs_readdata_release(void *data); /* diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 277750cc70c0..7fafc4c546b7 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -695,7 +695,6 @@ struct nfs_read_data { #ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ #endif - void (*complete) (struct nfs_read_data *, int); struct page *page_array[NFS_PAGEVEC_SIZE + 1]; }; @@ -768,6 +767,7 @@ struct nfs_rpc_ops { struct nfs_pathconf *); u32 * (*decode_dirent)(u32 *, struct nfs_entry *, int plus); void (*read_setup) (struct nfs_read_data *); + int (*read_done) (struct rpc_task *, struct nfs_read_data *); void (*write_setup) (struct nfs_write_data *, int how); int (*write_done) (struct rpc_task *, struct nfs_write_data *); void (*commit_setup) (struct nfs_write_data *, int how); -- cgit v1.2.3 From 462d5b3296b56289efec426499a83faad4c08d9e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:32 -0500 Subject: NFS: make direct write path generate write requests concurrently Duplicate infrastructure from direct read path that will allow write path to generate multiple write requests concurrently. This will enable us to add support for aio in this path. Temporarily we will lose the ability to do UNSTABLE writes followed by a COMMIT in the direct write path. However, all applications I am aware of that use NFS O_DIRECT currently write in relatively small chunks, so this should not be inconvenient in any way. Test plan: Millions of fsx-odirect ops. OraSim. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f55827be4f8e..6c130a6b0f4d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -407,6 +407,8 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc); extern int nfs_writepages(struct address_space *, struct writeback_control *); extern int nfs_flush_incompatible(struct file *file, struct page *page); extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); +extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); +extern void nfs_writedata_release(void *); /* * Try to write back everything synchronously (but check the -- cgit v1.2.3 From e17b1fc4b35399935f00a635206e183d9292fe4f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:35 -0500 Subject: NFS: Make nfs_commit_alloc() extern We need to use nfs_commit_alloc() in fs/nfs/direct.c. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 6c130a6b0f4d..423f202b881c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -410,6 +410,11 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); extern void nfs_writedata_release(void *); +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount); +void nfs_commit_free(struct nfs_write_data *p); +#endif + /* * Try to write back everything synchronously (but check the * return value!) -- cgit v1.2.3 From fad61490419b3e494f300e9b2579810ef3bcda31 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:36 -0500 Subject: nfs: Use UNSTABLE + COMMIT for NFS O_DIRECT writes Currently NFS O_DIRECT writes use FILE_SYNC so that a COMMIT is not necessary. This simplifies the internal logic, but this could be a difficult workload for some servers. Instead, let's send UNSTABLE writes, and after they all complete, send a COMMIT for the dirty range. After the COMMIT returns successfully, then do the wake_up or fire off aio_complete(). Test plan: Async direct I/O tests against Solaris (or any server that requires committed unstable writes). Reboot server during test. Based on an earlier patch by Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 423f202b881c..9f84c8a5ea43 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -422,6 +422,7 @@ void nfs_commit_free(struct nfs_write_data *p); extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); +extern void nfs_commit_release(void *wdata); #else static inline int nfs_commit_inode(struct inode *inode, int how) -- cgit v1.2.3 From 3feb2d49394b7874348a6e43c076b780c1d222c5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:37 -0500 Subject: NFS: Uninline nfs_writedata_(alloc|free) and nfs_readdata_(alloc|free) Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 67 +++----------------------------------------------- 1 file changed, 4 insertions(+), 63 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 9f84c8a5ea43..55de0770df4a 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -462,37 +462,8 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page) /* * Allocate and free nfs_write_data structures */ -extern mempool_t *nfs_wdata_mempool; - -static inline struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) -{ - struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); - - if (p) { - memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&p->pages); - if (pagecount < NFS_PAGEVEC_SIZE) - p->pagevec = &p->page_array[0]; - else { - size_t size = ++pagecount * sizeof(struct page *); - p->pagevec = kmalloc(size, GFP_NOFS); - if (p->pagevec) { - memset(p->pagevec, 0, size); - } else { - mempool_free(p, nfs_wdata_mempool); - p = NULL; - } - } - } - return p; -} - -static inline void nfs_writedata_free(struct nfs_write_data *p) -{ - if (p && (p->pagevec != &p->page_array[0])) - kfree(p->pagevec); - mempool_free(p, nfs_wdata_mempool); -} +extern struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount); +extern void nfs_writedata_free(struct nfs_write_data *p); /* * linux/fs/nfs/read.c @@ -503,41 +474,11 @@ extern int nfs_readpages(struct file *, struct address_space *, extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); extern void nfs_readdata_release(void *data); - /* * Allocate and free nfs_read_data structures */ -extern mempool_t *nfs_rdata_mempool; - -static inline struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) -{ - struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); - - if (p) { - memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&p->pages); - if (pagecount < NFS_PAGEVEC_SIZE) - p->pagevec = &p->page_array[0]; - else { - size_t size = ++pagecount * sizeof(struct page *); - p->pagevec = kmalloc(size, GFP_NOFS); - if (p->pagevec) { - memset(p->pagevec, 0, size); - } else { - mempool_free(p, nfs_rdata_mempool); - p = NULL; - } - } - } - return p; -} - -static inline void nfs_readdata_free(struct nfs_read_data *p) -{ - if (p && (p->pagevec != &p->page_array[0])) - kfree(p->pagevec); - mempool_free(p, nfs_rdata_mempool); -} +extern struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount); +extern void nfs_readdata_free(struct nfs_read_data *p); /* * linux/fs/nfs3proc.c -- cgit v1.2.3 From 6849c0cab69f5d1a0fc7b05fa5bfb3dec53f86df Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:39 -0500 Subject: lockd: Add refcounting to struct nlm_block Otherwise, the block may disappear from underneath us when in nlmsvc_retry_blocked. Signed-off-by: Trond Myklebust --- include/linux/lockd/lockd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index ef21ed296039..08ab9773f762 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -110,6 +111,7 @@ struct nlm_file { */ #define NLM_NEVER (~(unsigned long) 0) struct nlm_block { + struct kref b_count; /* Reference count */ struct nlm_block * b_next; /* linked list (all blocks) */ struct nlm_block * b_fnext; /* linked list (per file) */ struct nlm_rqst b_call; /* RPC args & callback info */ @@ -119,7 +121,6 @@ struct nlm_block { unsigned int b_id; /* block id */ unsigned char b_queued; /* re-queued */ unsigned char b_granted; /* VFS granted lock */ - unsigned char b_incall; /* doing callback */ unsigned char b_done; /* callback complete */ struct nlm_file * b_file; /* file in question */ }; -- cgit v1.2.3 From 5e1abf8cb713a0b94f5a400c7b9b797990cd9dec Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:39 -0500 Subject: lockd: Clean up of the server-side GRANTED code Signed-off-by: Trond Myklebust --- include/linux/lockd/lockd.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 08ab9773f762..860a93f6ce6d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -153,8 +153,6 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout); u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *); void nlmclnt_recovery(struct nlm_host *, u32); int nlmclnt_reclaim(struct nlm_host *, struct file_lock *); -int nlmclnt_setgrantargs(struct nlm_rqst *, struct nlm_lock *); -void nlmclnt_freegrantargs(struct nlm_rqst *); /* * Host cache -- cgit v1.2.3 From 26bcbf965f857c710adafd16cf424f043006b5dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Mar 2006 13:44:40 -0500 Subject: lockd: stop abusing file_lock_list Currently lockd directly access the file_lock_list from fs/locks.c. It does so to mark locks granted or reclaimable. This is very suboptimal, because a) lockd needs to poke into locks.c internals, and b) it needs to iterate over all locks in the system for marking locks granted or reclaimable. This patch adds lists for granted and reclaimable locks to the nlm_host structure instead, and adds locks to those. nlmclnt_lock: now adds the lock to h_granted instead of setting the NFS_LCK_GRANTED, still O(1) nlmclnt_mark_reclaim: goes away completely, replaced by a list_splice_init. Complexity reduced from O(locks in the system) to O(1) reclaimer: iterates over h_reclaim now, complexity reduced from O(locks in the system) to O(locks per nlm_host) Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- include/linux/fs.h | 2 -- include/linux/lockd/lockd.h | 2 ++ include/linux/nfs_fs_i.h | 8 +------- 3 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d2cffee8fc11..5dc0fa288a4c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -730,8 +730,6 @@ struct file_lock { #define OFFT_OFFSET_MAX INT_LIMIT(off_t) #endif -extern struct list_head file_lock_list; - #include extern int fcntl_getlk(struct file *, struct flock __user *); diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 860a93f6ce6d..b0f63b6ab0d4 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -59,6 +59,8 @@ struct nlm_host { unsigned long h_expires; /* eligible for GC */ struct list_head h_lockowners; /* Lockowners for the client */ spinlock_t h_lock; + struct list_head h_granted; /* Locks in GRANTED state */ + struct list_head h_reclaim; /* Locks in RECLAIM state */ }; /* diff --git a/include/linux/nfs_fs_i.h b/include/linux/nfs_fs_i.h index e2c18dabff86..861730275ba0 100644 --- a/include/linux/nfs_fs_i.h +++ b/include/linux/nfs_fs_i.h @@ -12,8 +12,8 @@ struct nlm_lockowner; */ struct nfs_lock_info { u32 state; - u32 flags; struct nlm_lockowner *owner; + struct list_head list; }; struct nfs4_lock_state; @@ -21,10 +21,4 @@ struct nfs4_lock_info { struct nfs4_lock_state *owner; }; -/* - * Lock flag values - */ -#define NFS_LCK_GRANTED 0x0001 /* lock has been granted */ -#define NFS_LCK_RECLAIM 0x0002 /* lock marked for reclaiming */ - #endif -- cgit v1.2.3 From 3a649b884637c4fdff50a6beebc3dc0e6082e048 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:44 -0500 Subject: NLM: Simplify client locks Signed-off-by: Trond Myklebust --- include/linux/lockd/lockd.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index b0f63b6ab0d4..cb9933d04091 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -86,7 +86,6 @@ struct nlm_rqst { struct nlm_host * a_host; /* host handle */ struct nlm_args a_args; /* arguments */ struct nlm_res a_res; /* result */ - struct nlm_wait * a_block; unsigned int a_retries; /* Retry count */ char a_owner[NLMCLNT_OHSIZE]; }; @@ -149,9 +148,9 @@ extern unsigned long nlmsvc_timeout; * Lockd client functions */ struct nlm_rqst * nlmclnt_alloc_call(void); -int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl); -void nlmclnt_finish_block(struct nlm_rqst *req); -long nlmclnt_block(struct nlm_rqst *req, long timeout); +struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl); +void nlmclnt_finish_block(struct nlm_wait *block); +int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout); u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *); void nlmclnt_recovery(struct nlm_host *, u32); int nlmclnt_reclaim(struct nlm_host *, struct file_lock *); -- cgit v1.2.3 From 92737230dd3f1478033819d4bc20339f8da852da Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:45 -0500 Subject: NLM: Add nlmclnt_release_call Add a helper function to simplify the freeing of NLM client requests. Signed-off-by: Trond Myklebust --- include/linux/lockd/lockd.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index cb9933d04091..e7ba8110d579 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -86,8 +86,9 @@ struct nlm_rqst { struct nlm_host * a_host; /* host handle */ struct nlm_args a_args; /* arguments */ struct nlm_res a_res; /* result */ + struct nlm_block * a_block; unsigned int a_retries; /* Retry count */ - char a_owner[NLMCLNT_OHSIZE]; + u8 a_owner[NLMCLNT_OHSIZE]; }; /* @@ -115,7 +116,7 @@ struct nlm_block { struct kref b_count; /* Reference count */ struct nlm_block * b_next; /* linked list (all blocks) */ struct nlm_block * b_fnext; /* linked list (per file) */ - struct nlm_rqst b_call; /* RPC args & callback info */ + struct nlm_rqst * b_call; /* RPC args & callback info */ struct svc_serv * b_daemon; /* NLM service */ struct nlm_host * b_host; /* host handle for RPC clnt */ unsigned long b_when; /* next re-xmit */ @@ -147,7 +148,9 @@ extern unsigned long nlmsvc_timeout; /* * Lockd client functions */ -struct nlm_rqst * nlmclnt_alloc_call(void); +struct nlm_rqst * nlm_alloc_call(struct nlm_host *host); +void nlm_release_call(struct nlm_rqst *); +int nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *); struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl); void nlmclnt_finish_block(struct nlm_wait *block); int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout); @@ -172,7 +175,6 @@ extern struct nlm_host *nlm_find_client(void); /* * Server-side lock handling */ -int nlmsvc_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *); u32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *, struct nlm_lock *, int, struct nlm_cookie *); u32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *); -- cgit v1.2.3 From d47166244860eb5dfdb12ee4703968beef8a0db2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:45 -0500 Subject: lockd: Add helper for *_RES callbacks Signed-off-by: Trond Myklebust --- include/linux/lockd/lockd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index e7ba8110d579..a04137d0c5de 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -151,6 +151,7 @@ extern unsigned long nlmsvc_timeout; struct nlm_rqst * nlm_alloc_call(struct nlm_host *host); void nlm_release_call(struct nlm_rqst *); int nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *); +int nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *); struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl); void nlmclnt_finish_block(struct nlm_wait *block); int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout); -- cgit v1.2.3 From 5428154827c2bf7cfdc9dab60db1e0eaa57c027a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:49 -0500 Subject: SUNRPC: Fix a 'Busy inodes' error in rpc_pipefs Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/rpc_pipe_fs.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index e37c06128e51..8fe9f35eba31 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -60,6 +60,7 @@ struct rpc_clnt { int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; char cl_pathname[30];/* Path in rpc_pipe_fs */ + struct vfsmount * cl_vfsmnt; struct dentry * cl_dentry; /* inode */ struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 63929349571f..2c2189cb30aa 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -45,6 +45,8 @@ extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *); extern int rpc_rmdir(char *); extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags); extern int rpc_unlink(char *); +extern struct vfsmount *rpc_get_mount(void); +extern void rpc_put_mount(void); #endif #endif -- cgit v1.2.3 From c42de9dd67250fe984e0e31c9b542d721af6454b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:51 -0500 Subject: NFS: Fix a race in nfs_sync_inode() Kudos to Neil Brown for spotting the problem: "in nfs_sync_inode, there is effectively the sequence: nfs_wait_on_requests nfs_flush_inode nfs_commit_inode This seems a bit racy to me as if the only requests are on the ->commit list, and nfs_commit_inode is called separately after nfs_wait_on_requests completes, and before nfs_commit_inode start (say: by nfs_write_inode) then none of these function will return >0, yet there will be some pending request that aren't waited for." The solution is to search for requests to wait upon, search for dirty requests, and search for uncommitted requests while holding the nfsi->req_lock The patch also cleans up nfs_sync_inode(), getting rid of the redundant FLUSH_WAIT flag. It turns out that we were always setting it. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 55de0770df4a..cbebd7d1b9e8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -56,9 +56,7 @@ * When flushing a cluster of dirty pages, there can be different * strategies: */ -#define FLUSH_AGING 0 /* only flush old buffers */ #define FLUSH_SYNC 1 /* file being synced, or contention */ -#define FLUSH_WAIT 2 /* wait for completion */ #define FLUSH_STABLE 4 /* commit to stable storage */ #define FLUSH_LOWPRI 8 /* low priority background flush */ #define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */ @@ -419,7 +417,7 @@ void nfs_commit_free(struct nfs_write_data *p); * Try to write back everything synchronously (but check the * return value!) */ -extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int); +extern int nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); extern void nfs_commit_release(void *wdata); @@ -440,7 +438,7 @@ nfs_have_writebacks(struct inode *inode) static inline int nfs_wb_all(struct inode *inode) { - int error = nfs_sync_inode(inode, 0, 0, FLUSH_WAIT); + int error = nfs_sync_inode_wait(inode, 0, 0, 0); return (error < 0) ? error : 0; } @@ -449,8 +447,8 @@ nfs_wb_all(struct inode *inode) */ static inline int nfs_wb_page_priority(struct inode *inode, struct page* page, int how) { - int error = nfs_sync_inode(inode, page->index, 1, - how | FLUSH_WAIT | FLUSH_STABLE); + int error = nfs_sync_inode_wait(inode, page->index, 1, + how | FLUSH_STABLE); return (error < 0) ? error : 0; } -- cgit v1.2.3 From eaa82edf20d738a7ae31f4b0a5f72f64c14a58df Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Mar 2006 23:24:04 -0500 Subject: SUNRPC,RPCSEC_GSS: fix krb5 sequence numbers. Use a spinlock to ensure unique sequence numbers when creating krb5 gss tokens. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_krb5.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 2c3601d31045..1279280d7196 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -53,6 +53,8 @@ struct krb5_ctx { struct xdr_netobj mech_used; }; +extern spinlock_t krb5_seq_lock; + #define KG_TOK_MIC_MSG 0x0101 #define KG_TOK_WRAP_MSG 0x0201 -- cgit v1.2.3 From f3ee439f43381e45b191cf721b4a51d41f33301f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Mar 2006 23:24:13 -0500 Subject: LOCKD: nlmsvc_traverse_blocks return is unused Note that we never return non-zero. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/lockd/lockd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index a04137d0c5de..995f89dc8c04 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -183,7 +183,7 @@ u32 nlmsvc_testlock(struct nlm_file *, struct nlm_lock *, struct nlm_lock *); u32 nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *); unsigned long nlmsvc_retry_blocked(void); -int nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, +void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, int action); void nlmsvc_grant_reply(struct svc_rqst *, struct nlm_cookie *, u32); -- cgit v1.2.3 From 5f12191bc000ea31970339a5f54c11087506711c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Mar 2006 23:24:25 -0500 Subject: LOCKD: Make nlmsvc_traverse_shares return void The nlmsvc_traverse_shares return value is always zero, hence useless. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/lockd/share.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lockd/share.h b/include/linux/lockd/share.h index 5d8aa325f140..c75a424ebe4c 100644 --- a/include/linux/lockd/share.h +++ b/include/linux/lockd/share.h @@ -25,6 +25,6 @@ u32 nlmsvc_share_file(struct nlm_host *, struct nlm_file *, struct nlm_args *); u32 nlmsvc_unshare_file(struct nlm_host *, struct nlm_file *, struct nlm_args *); -int nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *, int); +void nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *, int); #endif /* LINUX_LOCKD_SHARE_H */ -- cgit v1.2.3