summaryrefslogtreecommitdiff
path: root/fs/orangefs
AgeCommit message (Collapse)Author
2018-06-05vfs: change inode times to use struct timespec64Deepa Dinamani
struct timespec is not y2038 safe. Transition vfs to use y2038 safe struct timespec64 instead. The change was made with the help of the following cocinelle script. This catches about 80% of the changes. All the header file and logic changes are included in the first 5 rules. The rest are trivial substitutions. I avoid changing any of the function signatures or any other filesystem specific data structures to keep the patch simple for review. The script can be a little shorter by combining different cases. But, this version was sufficient for my usecase. virtual patch @ depends on patch @ identifier now; @@ - struct timespec + struct timespec64 current_time ( ... ) { - struct timespec now = current_kernel_time(); + struct timespec64 now = current_kernel_time64(); ... - return timespec_trunc( + return timespec64_trunc( ... ); } @ depends on patch @ identifier xtime; @@ struct \( iattr \| inode \| kstat \) { ... - struct timespec xtime; + struct timespec64 xtime; ... } @ depends on patch @ identifier t; @@ struct inode_operations { ... int (*update_time) (..., - struct timespec t, + struct timespec64 t, ...); ... } @ depends on patch @ identifier t; identifier fn_update_time =~ "update_time$"; @@ fn_update_time (..., - struct timespec *t, + struct timespec64 *t, ...) { ... } @ depends on patch @ identifier t; @@ lease_get_mtime( ... , - struct timespec *t + struct timespec64 *t ) { ... } @te depends on patch forall@ identifier ts; local idexpression struct inode *inode_node; identifier i_xtime =~ "^i_[acm]time$"; identifier ia_xtime =~ "^ia_[acm]time$"; identifier fn_update_time =~ "update_time$"; identifier fn; expression e, E3; local idexpression struct inode *node1; local idexpression struct inode *node2; local idexpression struct iattr *attr1; local idexpression struct iattr *attr2; local idexpression struct iattr attr; identifier i_xtime1 =~ "^i_[acm]time$"; identifier i_xtime2 =~ "^i_[acm]time$"; identifier ia_xtime1 =~ "^ia_[acm]time$"; identifier ia_xtime2 =~ "^ia_[acm]time$"; @@ ( ( - struct timespec ts; + struct timespec64 ts; | - struct timespec ts = current_time(inode_node); + struct timespec64 ts = current_time(inode_node); ) <+... when != ts ( - timespec_equal(&inode_node->i_xtime, &ts) + timespec64_equal(&inode_node->i_xtime, &ts) | - timespec_equal(&ts, &inode_node->i_xtime) + timespec64_equal(&ts, &inode_node->i_xtime) | - timespec_compare(&inode_node->i_xtime, &ts) + timespec64_compare(&inode_node->i_xtime, &ts) | - timespec_compare(&ts, &inode_node->i_xtime) + timespec64_compare(&ts, &inode_node->i_xtime) | ts = current_time(e) | fn_update_time(..., &ts,...) | inode_node->i_xtime = ts | node1->i_xtime = ts | ts = inode_node->i_xtime | <+... attr1->ia_xtime ...+> = ts | ts = attr1->ia_xtime | ts.tv_sec | ts.tv_nsec | btrfs_set_stack_timespec_sec(..., ts.tv_sec) | btrfs_set_stack_timespec_nsec(..., ts.tv_nsec) | - ts = timespec64_to_timespec( + ts = ... -) | - ts = ktime_to_timespec( + ts = ktime_to_timespec64( ...) | - ts = E3 + ts = timespec_to_timespec64(E3) | - ktime_get_real_ts(&ts) + ktime_get_real_ts64(&ts) | fn(..., - ts + timespec64_to_timespec(ts) ,...) ) ...+> ( <... when != ts - return ts; + return timespec64_to_timespec(ts); ...> ) | - timespec_equal(&node1->i_xtime1, &node2->i_xtime2) + timespec64_equal(&node1->i_xtime2, &node2->i_xtime2) | - timespec_equal(&node1->i_xtime1, &attr2->ia_xtime2) + timespec64_equal(&node1->i_xtime2, &attr2->ia_xtime2) | - timespec_compare(&node1->i_xtime1, &node2->i_xtime2) + timespec64_compare(&node1->i_xtime1, &node2->i_xtime2) | node1->i_xtime1 = - timespec_trunc(attr1->ia_xtime1, + timespec64_trunc(attr1->ia_xtime1, ...) | - attr1->ia_xtime1 = timespec_trunc(attr2->ia_xtime2, + attr1->ia_xtime1 = timespec64_trunc(attr2->ia_xtime2, ...) | - ktime_get_real_ts(&attr1->ia_xtime1) + ktime_get_real_ts64(&attr1->ia_xtime1) | - ktime_get_real_ts(&attr.ia_xtime1) + ktime_get_real_ts64(&attr.ia_xtime1) ) @ depends on patch @ struct inode *node; struct iattr *attr; identifier fn; identifier i_xtime =~ "^i_[acm]time$"; identifier ia_xtime =~ "^ia_[acm]time$"; expression e; @@ ( - fn(node->i_xtime); + fn(timespec64_to_timespec(node->i_xtime)); | fn(..., - node->i_xtime); + timespec64_to_timespec(node->i_xtime)); | - e = fn(attr->ia_xtime); + e = fn(timespec64_to_timespec(attr->ia_xtime)); ) @ depends on patch forall @ struct inode *node; struct iattr *attr; identifier i_xtime =~ "^i_[acm]time$"; identifier ia_xtime =~ "^ia_[acm]time$"; identifier fn; @@ { + struct timespec ts; <+... ( + ts = timespec64_to_timespec(node->i_xtime); fn (..., - &node->i_xtime, + &ts, ...); | + ts = timespec64_to_timespec(attr->ia_xtime); fn (..., - &attr->ia_xtime, + &ts, ...); ) ...+> } @ depends on patch forall @ struct inode *node; struct iattr *attr; struct kstat *stat; identifier ia_xtime =~ "^ia_[acm]time$"; identifier i_xtime =~ "^i_[acm]time$"; identifier xtime =~ "^[acm]time$"; identifier fn, ret; @@ { + struct timespec ts; <+... ( + ts = timespec64_to_timespec(node->i_xtime); ret = fn (..., - &node->i_xtime, + &ts, ...); | + ts = timespec64_to_timespec(node->i_xtime); ret = fn (..., - &node->i_xtime); + &ts); | + ts = timespec64_to_timespec(attr->ia_xtime); ret = fn (..., - &attr->ia_xtime, + &ts, ...); | + ts = timespec64_to_timespec(attr->ia_xtime); ret = fn (..., - &attr->ia_xtime); + &ts); | + ts = timespec64_to_timespec(stat->xtime); ret = fn (..., - &stat->xtime); + &ts); ) ...+> } @ depends on patch @ struct inode *node; struct inode *node2; identifier i_xtime1 =~ "^i_[acm]time$"; identifier i_xtime2 =~ "^i_[acm]time$"; identifier i_xtime3 =~ "^i_[acm]time$"; struct iattr *attrp; struct iattr *attrp2; struct iattr attr ; identifier ia_xtime1 =~ "^ia_[acm]time$"; identifier ia_xtime2 =~ "^ia_[acm]time$"; struct kstat *stat; struct kstat stat1; struct timespec64 ts; identifier xtime =~ "^[acmb]time$"; expression e; @@ ( ( node->i_xtime2 \| attrp->ia_xtime2 \| attr.ia_xtime2 \) = node->i_xtime1 ; | node->i_xtime2 = \( node2->i_xtime1 \| timespec64_trunc(...) \); | node->i_xtime2 = node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \); | node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \); | stat->xtime = node2->i_xtime1; | stat1.xtime = node2->i_xtime1; | ( node->i_xtime2 \| attrp->ia_xtime2 \) = attrp->ia_xtime1 ; | ( attrp->ia_xtime1 \| attr.ia_xtime1 \) = attrp2->ia_xtime2; | - e = node->i_xtime1; + e = timespec64_to_timespec( node->i_xtime1 ); | - e = attrp->ia_xtime1; + e = timespec64_to_timespec( attrp->ia_xtime1 ); | node->i_xtime1 = current_time(...); | node->i_xtime2 = node->i_xtime1 = node->i_xtime3 = - e; + timespec_to_timespec64(e); | node->i_xtime1 = node->i_xtime3 = - e; + timespec_to_timespec64(e); | - node->i_xtime1 = e; + node->i_xtime1 = timespec_to_timespec64(e); ) Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com> Cc: <anton@tuxera.com> Cc: <balbi@kernel.org> Cc: <bfields@fieldses.org> Cc: <darrick.wong@oracle.com> Cc: <dhowells@redhat.com> Cc: <dsterba@suse.com> Cc: <dwmw2@infradead.org> Cc: <hch@lst.de> Cc: <hirofumi@mail.parknet.co.jp> Cc: <hubcap@omnibond.com> Cc: <jack@suse.com> Cc: <jaegeuk@kernel.org> Cc: <jaharkes@cs.cmu.edu> Cc: <jslaby@suse.com> Cc: <keescook@chromium.org> Cc: <mark@fasheh.com> Cc: <miklos@szeredi.hu> Cc: <nico@linaro.org> Cc: <reiserfs-devel@vger.kernel.org> Cc: <richard@nod.at> Cc: <sage@redhat.com> Cc: <sfrench@samba.org> Cc: <swhiteho@redhat.com> Cc: <tj@kernel.org> Cc: <trond.myklebust@primarydata.com> Cc: <tytso@mit.edu> Cc: <viro@zeniv.linux.org.uk>
2018-06-01orangefs: use sparse annotations for holding locks across function calls.Mike Marshall
Sparse complained and Al Viro knew what to do... Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-06-01orangefs: make debug_help_fops staticMike Marshall
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-06-01orangefs: remove unused function orangefs_get_bufmap_initMike Marshall
get_bufmap_init is used in the out-of-tree module, but was left in the upstream version as an oversight. Tip-of-the-hat to sparse and Al Viro. Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-06-01orangefs: specify user pointers when using dev_map_desc and bufmapMike Marshall
Sparse lead me to the dev_map_desc one and Al Viro lead me to the bufmap one. Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-06-01orangefs: formatting cleanupsMike Marshall
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-06-01orangefs: set i_size on new symlinkMartin Brandenburg
As long as a symlink inode remains in-core, the destination (and therefore size) will not be re-fetched from the server, as it cannot change. The original implementation of the attribute cache assumed that setting the expiry time in the past was sufficient to cause a re-fetch of all attributes on the next getattr. That does not work in this case. The bug manifested itself as follows. When the command sequence touch foo; ln -s foo bar; ls -l bar is run, the output was lrwxrwxrwx. 1 fedora fedora 4906 Apr 24 19:10 bar -> foo However, after a re-mount, ls -l bar produces lrwxrwxrwx. 1 fedora fedora 3 Apr 24 19:10 bar -> foo After this commit, even before a re-mount, the output is lrwxrwxrwx. 1 fedora fedora 3 Apr 24 19:10 bar -> foo Reported-by: Becky Ligon <ligon@clemson.edu> Signed-off-by: Martin Brandenburg <martin@omnibond.com> Fixes: 71680c18c8f2 ("orangefs: Cache getattr results.") Cc: stable@vger.kernel.org Cc: hubcap@omnibond.com Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-06-01orangefs: report attributes_mask and attributes for statxMartin Brandenburg
OrangeFS formerly failed to set attributes_mask with the result that software could not see immutable and append flags present in the filesystem. Reported-by: Becky Ligon <ligon@clemson.edu> Signed-off-by: Martin Brandenburg <martin@omnibond.com> Fixes: 68a24a6cc4a6 ("orangefs: implement statx") Cc: stable@vger.kernel.org Cc: hubcap@omnibond.com Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-06-01orangefs: make struct orangefs_file_vm_ops staticColin Ian King
The struct orangefs_file_vm_ops is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: fs/orangefs/file.c:547:35: warning: symbol 'orangefs_file_vm_ops' was not declared. Should it be static? Signed-off-by: Colin Ian King <colin.king@canonical.com> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-06-01orangefs: revamp block sizesMartin Brandenburg
Now the superblock block size is PAGE_SIZE. The inode block size is PAGE_SIZE for directories and symlinks, but is the server-reported block size for regular files. The block size in the OrangeFS private inode is now deleted. Stat now reports PAGE_SIZE for directories and symlinks and the server-reported block size for regular files. The user-space visible change is that the block size for directores and symlinks and the superblock is now PAGE_SIZE rather than the size of the client-core shared memory buffers, which was typically four megabytes. Reported-by: Becky Ligon <ligon@clemson.edu> Signed-off-by: Martin Brandenburg <martin@omnibond.com> Cc: hubcap@omnibond.com Cc: walt@omnibond.com Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-05-22orangefs_lookup: simplifyAl Viro
d_splice_alias() can handle NULL and ERR_PTR() for inode just fine... Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-05-11do d_instantiate/unlock_new_inode combinations safelyAl Viro
For anything NFS-exported we do _not_ want to unlock new inode before it has grown an alias; original set of fixes got the ordering right, but missed the nasty complication in case of lockdep being enabled - unlock_new_inode() does lockdep_annotate_inode_mutex_key(inode) which can only be done before anyone gets a chance to touch ->i_mutex. Unfortunately, flipping the order and doing unlock_new_inode() before d_instantiate() opens a window when mkdir can race with open-by-fhandle on a guessed fhandle, leading to multiple aliases for a directory inode and all the breakage that follows from that. Correct solution: a new primitive (d_instantiate_new()) combining these two in the right order - lockdep annotate, then d_instantiate(), then the rest of unlock_new_inode(). All combinations of d_instantiate() with unlock_new_inode() should be converted to that. Cc: stable@kernel.org # 2.6.29 and later Tested-by: Mike Marshall <hubcap@omnibond.com> Reviewed-by: Andreas Dilger <adilger@dilger.ca> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-04-15orangefs_kill_sb(): deal with allocation failuresAl Viro
orangefs_fill_sb() might've failed to allocate ORANGEFS_SB(s); don't oops in that case. Cc: stable@kernel.org Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-04-09Merge tag 'for-linus-4.17-ofs' of ↵Linus Torvalds
git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux Pull orangefs updates from Mike Marshall: "Fixes and cleanups: - Documentation cleanups - removal of unused code - make some structs static - implement Orangefs vm_operations fault callout - eliminate two single-use functions and put their cleaned up code in line. - replace a vmalloc/memset instance with vzalloc - fix a race condition bug in wait code" * tag 'for-linus-4.17-ofs' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux: Orangefs: documentation updates orangefs: document package install and xfstests procedure orangefs: remove unused code orangefs: make several *_operations structs static orangefs: implement vm_ops->fault orangefs: open code short single-use functions orangefs: replace vmalloc and memset with vzalloc orangefs: bug fix for a race condition when getting a slot
2018-04-06Merge branch 'work.misc' of ↵Linus Torvalds
git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs Pull misc vfs updates from Al Viro: "Assorted stuff, including Christoph's I_DIRTY patches" * 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: fs: move I_DIRTY_INODE to fs.h ubifs: fix bogus __mark_inode_dirty(I_DIRTY_SYNC | I_DIRTY_DATASYNC) call ntfs: fix bogus __mark_inode_dirty(I_DIRTY_SYNC | I_DIRTY_DATASYNC) call gfs2: fix bogus __mark_inode_dirty(I_DIRTY_SYNC | I_DIRTY_DATASYNC) calls fs: fold open_check_o_direct into do_dentry_open vfs: Replace stray non-ASCII homoglyph characters with their ASCII equivalents vfs: make sure struct filename->iname is word-aligned get rid of pointless includes of fs_struct.h [poll] annotate SAA6588_CMD_POLL users
2018-04-03orangefs: remove unused codeMartin Brandenburg
Signed-off-by: Martin Brandenburg <martin@omnibond.com> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-04-03orangefs: make several *_operations structs staticMartin Brandenburg
Signed-off-by: Martin Brandenburg <martin@omnibond.com> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-04-03orangefs: implement vm_ops->faultMartin Brandenburg
Must retrieve size before running filemap_fault so the kernel has an up-to-date size. This should have been caught by xfstests generic/246, but it was masked by orangefs_new_inode, which set i_size to PAGE_SIZE. When nothing caused a getattr prior to a pagefault, i_size was still PAGE_SIZE. Since xfstests only read 10 bytes, it did not catch this bug. When orangefs_new_inode was modified to perform a getattr instead, i_size was set to zero, as it was a newly created file. Then orangefs_file_write_iter did NOT set i_size. Instead it invalidated the attribute cache, which should have caused the next caller to retrieve i_size. But the fault handler did not know it was supposed to retrieve i_size. So during xfstests, i_size was still zero, and filemap_fault returned VM_FAULT_SIGBUS. Fixes xfstests generic/452. Signed-off-by: Martin Brandenburg <martin@omnibond.com> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-04-02orangefs: open code short single-use functionsMartin Brandenburg
Signed-off-by: Martin Brandenburg <martin@omnibond.com> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-04-02orangefs: replace vmalloc and memset with vzallocColin Ian King
Use vzalloc instead of the vmalloc, memset combo Signed-off-by: Colin Ian King <colin.king@canonical.com> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-04-02orangefs: bug fix for a race condition when getting a slotDavid Reynolds
When a slot becomes free, call wake_up_locked regardless of the number of slots available. Without this patch, wake_up_locked is only called when going from no free slots to one. This means that there is a chance a waiting task will not be woken up. In many cases, the system will bounce between 0 and 1 free slots, and the waiting tasks will be woken up. But if there is still a waiting task and another slot becomes available before the number of free slots reaches zero, that waiting task may never be woken up since the number of free slots may never reach zero again. The bug behavior is easy to reproduce with the following script, where /mnt/orangefs is an OrangeFS file system. for i in {1..100}; do for j in {1..20}; do dd if=/dev/zero of=/mnt/orangefs/tmp$j bs=32768 count=32 & done wait done Signed-off-by: David Reynolds <david@omnibond.com> Reviewed-by: Martin Brandenburg <martin@omnibond.com> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-03-27treewide: Fix typos in printkMasanari Iida
This patch fixes spelling typos found in printk. Signed-off-by: Masanari Iida <standby24x7@gmail.com> Acked-by: Randy Dunlap <rdunlap@infradead.org> Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2018-02-22get rid of pointless includes of fs_struct.hAl Viro
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-02-11vfs: do bulk POLL* -> EPOLL* replacementLinus Torvalds
This is the mindless scripted replacement of kernel use of POLL* variables as described by Al, done by this script: for V in IN OUT PRI ERR RDNORM RDBAND WRNORM WRBAND HUP RDHUP NVAL MSG; do L=`git grep -l -w POLL$V | grep -v '^t' | grep -v /um/ | grep -v '^sa' | grep -v '/poll.h$'|grep -v '^D'` for f in $L; do sed -i "-es/^\([^\"]*\)\(\<POLL$V\>\)/\\1E\\2/" $f; done done with de-mangling cleanups yet to come. NOTE! On almost all architectures, the EPOLL* constants have the same values as the POLL* constants do. But they keyword here is "almost". For various bad reasons they aren't the same, and epoll() doesn't actually work quite correctly in some cases due to this on Sparc et al. The next patch from Al will sort out the final differences, and we should be all done. Scripted-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-02-08Merge tag 'for-linus-4.16' of ↵Linus Torvalds
git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux Pull orangefs updates from Mike Marshall: "Mostly cleanups, but three bug fixes: - don't pass garbage return codes back up the call chain (Mike Marshall) - fix stale inode test (Martin Brandenburg) - fix off-by-one errors (Xiongfeng Wang) Also add Martin as a reviewer in the Maintainers file" * tag 'for-linus-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux: orangefs: reverse sense of is-inode-stale test in d_revalidate orangefs: simplify orangefs_inode_is_stale Orangefs: don't propogate whacky error codes orangefs: use correct string length orangefs: make orangefs_make_bad_inode static orangefs: remove ORANGEFS_KERNEL_DEBUG orangefs: remove gossip_ldebug and gossip_lerr orangefs: make orangefs_client_debug_init static MAINTAINERS: update orangefs list and add myself as reviewer
2018-02-06orangefs: reverse sense of is-inode-stale test in d_revalidateMartin Brandenburg
If a dentry is deleted, then a dentry is recreated with the same handle but a different type (i.e. it was a file and now it's a symlink), then its a different inode. The check was backwards, so d_revalidate would not have noticed. Due to the design of the OrangeFS server, this is rather unlikely. It's also possible for the dentry to be deleted and recreated with the same type. This would be undetectable. It's a bit of a ship of Theseus. Signed-off-by: Martin Brandenburg <martin@omnibond.com> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-02-06orangefs: simplify orangefs_inode_is_staleMartin Brandenburg
Check whether this is a new inode at location of call. Raises the question of what to do with an unknown inode type. Old code would've marked the inode bad and returned ESTALE. Signed-off-by: Martin Brandenburg <martin@omnibond.com> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-02-06Orangefs: don't propogate whacky error codesMike Marshall
When we get an error return code from userspace (the client-core) we check to make sure it is a valid code. This patch maps the whacky return code to -EINVAL instead of propagating garbage back up the call chain potentially resulting in a hard-to-find train-wreck. The client-core doesn't have any business returning whacky return codes, but if it does, we don't want the kernel to crash as a result. Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-02-06orangefs: use correct string lengthXiongfeng Wang
gcc-8 reports fs/orangefs/dcache.c: In function 'orangefs_d_revalidate': ./include/linux/string.h:245:9: warning: '__builtin_strncpy' specified bound 256 equals destination size [-Wstringop-truncation] fs/orangefs/namei.c: In function 'orangefs_rename': ./include/linux/string.h:245:9: warning: '__builtin_strncpy' specified bound 256 equals destination size [-Wstringop-truncation] fs/orangefs/super.c: In function 'orangefs_mount': ./include/linux/string.h:245:9: warning: '__builtin_strncpy' specified bound 256 equals destination size [-Wstringop-truncation] We need one less byte or call strlcpy() to make it a nul-terminated string. Signed-off-by: Xiongfeng Wang <xiongfeng.wang@linaro.org> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-02-06orangefs: make orangefs_make_bad_inode staticMartin Brandenburg
Signed-off-by: Martin Brandenburg <martin@omnibond.com> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-02-06orangefs: remove ORANGEFS_KERNEL_DEBUGMartin Brandenburg
It wasn't possible to enable it, and it would've had very little effect. Signed-off-by: Martin Brandenburg <martin@omnibond.com> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-02-06orangefs: remove gossip_ldebug and gossip_lerrMartin Brandenburg
gossip_ldebug is unused. gossip_lerr is used in two places. The messages are unique so line numbers are unnecessary. Also remove support for compiling gossip messages out. It wasn't possible to enable it anyway. Signed-off-by: Martin Brandenburg <martin@omnibond.com> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-02-06orangefs: make orangefs_client_debug_init staticMartin Brandenburg
Signed-off-by: Martin Brandenburg <martin@omnibond.com> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2018-02-03Merge tag 'usercopy-v4.16-rc1' of ↵Linus Torvalds
git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux Pull hardened usercopy whitelisting from Kees Cook: "Currently, hardened usercopy performs dynamic bounds checking on slab cache objects. This is good, but still leaves a lot of kernel memory available to be copied to/from userspace in the face of bugs. To further restrict what memory is available for copying, this creates a way to whitelist specific areas of a given slab cache object for copying to/from userspace, allowing much finer granularity of access control. Slab caches that are never exposed to userspace can declare no whitelist for their objects, thereby keeping them unavailable to userspace via dynamic copy operations. (Note, an implicit form of whitelisting is the use of constant sizes in usercopy operations and get_user()/put_user(); these bypass all hardened usercopy checks since these sizes cannot change at runtime.) This new check is WARN-by-default, so any mistakes can be found over the next several releases without breaking anyone's system. The series has roughly the following sections: - remove %p and improve reporting with offset - prepare infrastructure and whitelist kmalloc - update VFS subsystem with whitelists - update SCSI subsystem with whitelists - update network subsystem with whitelists - update process memory with whitelists - update per-architecture thread_struct with whitelists - update KVM with whitelists and fix ioctl bug - mark all other allocations as not whitelisted - update lkdtm for more sensible test overage" * tag 'usercopy-v4.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: (38 commits) lkdtm: Update usercopy tests for whitelisting usercopy: Restrict non-usercopy caches to size 0 kvm: x86: fix KVM_XEN_HVM_CONFIG ioctl kvm: whitelist struct kvm_vcpu_arch arm: Implement thread_struct whitelist for hardened usercopy arm64: Implement thread_struct whitelist for hardened usercopy x86: Implement thread_struct whitelist for hardened usercopy fork: Provide usercopy whitelisting for task_struct fork: Define usercopy region in thread_stack slab caches fork: Define usercopy region in mm_struct slab caches net: Restrict unwhitelisted proto caches to size 0 sctp: Copy struct sctp_sock.autoclose to userspace using put_user() sctp: Define usercopy region in SCTP proto slab cache caif: Define usercopy region in caif proto slab cache ip: Define usercopy region in IP proto slab cache net: Define usercopy region in struct proto slab cache scsi: Define usercopy region in scsi_sense_cache slab cache cifs: Define usercopy region in cifs_request slab cache vxfs: Define usercopy region in vxfs_inode slab cache ufs: Define usercopy region in ufs_inode_cache slab cache ...
2018-01-30Merge branch 'misc.poll' of ↵Linus Torvalds
git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs Pull poll annotations from Al Viro: "This introduces a __bitwise type for POLL### bitmap, and propagates the annotations through the tree. Most of that stuff is as simple as 'make ->poll() instances return __poll_t and do the same to local variables used to hold the future return value'. Some of the obvious brainos found in process are fixed (e.g. POLLIN misspelled as POLL_IN). At that point the amount of sparse warnings is low and most of them are for genuine bugs - e.g. ->poll() instance deciding to return -EINVAL instead of a bitmap. I hadn't touched those in this series - it's large enough as it is. Another problem it has caught was eventpoll() ABI mess; select.c and eventpoll.c assumed that corresponding POLL### and EPOLL### were equal. That's true for some, but not all of them - EPOLL### are arch-independent, but POLL### are not. The last commit in this series separates userland POLL### values from the (now arch-independent) kernel-side ones, converting between them in the few places where they are copied to/from userland. AFAICS, this is the least disruptive fix preserving poll(2) ABI and making epoll() work on all architectures. As it is, it's simply broken on sparc - try to give it EPOLLWRNORM and it will trigger only on what would've triggered EPOLLWRBAND on other architectures. EPOLLWRBAND and EPOLLRDHUP, OTOH, are never triggered at all on sparc. With this patch they should work consistently on all architectures" * 'misc.poll' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (37 commits) make kernel-side POLL... arch-independent eventpoll: no need to mask the result of epi_item_poll() again eventpoll: constify struct epoll_event pointers debugging printk in sg_poll() uses %x to print POLL... bitmap annotate poll(2) guts 9p: untangle ->poll() mess ->si_band gets POLL... bitmap stored into a user-visible long field ring_buffer_poll_wait() return value used as return value of ->poll() the rest of drivers/*: annotate ->poll() instances media: annotate ->poll() instances fs: annotate ->poll() instances ipc, kernel, mm: annotate ->poll() instances net: annotate ->poll() instances apparmor: annotate ->poll() instances tomoyo: annotate ->poll() instances sound: annotate ->poll() instances acpi: annotate ->poll() instances crypto: annotate ->poll() instances block: annotate ->poll() instances x86: annotate ->poll() instances ...
2018-01-25orangefs: fix deadlock; do not write i_size in read_iterMartin Brandenburg
After do_readv_writev, the inode cache is invalidated anyway, so i_size will never be read. It will be fetched from the server which will also know about updates from other machines. Fixes deadlock on 32-bit SMP. See https://marc.info/?l=linux-fsdevel&m=151268557427760&w=2 Signed-off-by: Martin Brandenburg <martin@omnibond.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Mike Marshall <hubcap@omnibond.com> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-01-22orangefs: initialize op on loop restart in orangefs_devreq_readMartin Brandenburg
In orangefs_devreq_read, there is a loop which picks an op off the list of pending ops. If the loop fails to find an op, there is nothing to read, and it returns EAGAIN. If the op has been given up on, the loop is restarted via a goto. The bug is that the variable which the found op is written to is not reinitialized, so if there are no more eligible ops on the list, the code runs again on the already handled op. This is triggered by interrupting a process while the op is being copied to the client-core. It's a fairly small window, but it's there. Signed-off-by: Martin Brandenburg <martin@omnibond.com> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-01-22orangefs: use list_for_each_entry_safe in purge_waiting_opsMartin Brandenburg
set_op_state_purged can delete the op. Signed-off-by: Martin Brandenburg <martin@omnibond.com> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-01-15orangefs: Define usercopy region in orangefs_inode_cache slab cacheDavid Windsor
orangefs symlink pathnames, stored in struct orangefs_inode_s.link_target and therefore contained in the orangefs_inode_cache, need to be copied to/from userspace. cache object allocation: fs/orangefs/super.c: orangefs_alloc_inode(...): ... orangefs_inode = kmem_cache_alloc(orangefs_inode_cache, ...); ... return &orangefs_inode->vfs_inode; fs/orangefs/orangefs-utils.c: exofs_symlink(...): ... inode->i_link = orangefs_inode->link_target; example usage trace: readlink_copy+0x43/0x70 vfs_readlink+0x62/0x110 SyS_readlinkat+0x100/0x130 fs/namei.c: readlink_copy(..., link): ... copy_to_user(..., link, len); (inlined in vfs_readlink) generic_readlink(dentry, ...): struct inode *inode = d_inode(dentry); const char *link = inode->i_link; ... readlink_copy(..., link); In support of usercopy hardening, this patch defines a region in the orangefs_inode_cache slab cache in which userspace copy operations are allowed. This region is known as the slab cache's usercopy region. Slab caches can now check that each dynamically sized copy operation involving cache-managed memory falls entirely within the slab's usercopy region. This patch is modified from Brad Spengler/PaX Team's PAX_USERCOPY whitelisting code in the last public patch of grsecurity/PaX based on my understanding of the code. Changes or omissions from the original code are mine and don't reflect the original grsecurity/PaX code. Signed-off-by: David Windsor <dave@nullcore.net> [kees: adjust commit log, provide usage trace] Cc: Mike Marshall <hubcap@omnibond.com> Signed-off-by: Kees Cook <keescook@chromium.org>
2017-11-27fs: annotate ->poll() instancesAl Viro
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2017-11-27orangefs: fix a braino in ->poll()Al Viro
It's POLLIN, not POLL_IN... Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2017-11-27Rename superblock flags (MS_xyz -> SB_xyz)Linus Torvalds
This is a pure automated search-and-replace of the internal kernel superblock flags. The s_flags are now called SB_*, with the names and the values for the moment mirroring the MS_* flags that they're equivalent to. Note how the MS_xyz flags are the ones passed to the mount system call, while the SB_xyz flags are what we then use in sb->s_flags. The script to do this was: # places to look in; re security/*: it generally should *not* be # touched (that stuff parses mount(2) arguments directly), but # there are two places where we really deal with superblock flags. FILES="drivers/mtd drivers/staging/lustre fs ipc mm \ include/linux/fs.h include/uapi/linux/bfs_fs.h \ security/apparmor/apparmorfs.c security/apparmor/include/lib.h" # the list of MS_... constants SYMS="RDONLY NOSUID NODEV NOEXEC SYNCHRONOUS REMOUNT MANDLOCK \ DIRSYNC NOATIME NODIRATIME BIND MOVE REC VERBOSE SILENT \ POSIXACL UNBINDABLE PRIVATE SLAVE SHARED RELATIME KERNMOUNT \ I_VERSION STRICTATIME LAZYTIME SUBMOUNT NOREMOTELOCK NOSEC BORN \ ACTIVE NOUSER" SED_PROG= for i in $SYMS; do SED_PROG="$SED_PROG -e s/MS_$i/SB_$i/g"; done # we want files that contain at least one of MS_..., # with fs/namespace.c and fs/pnode.c excluded. L=$(for i in $SYMS; do git grep -w -l MS_$i $FILES; done| sort|uniq|grep -v '^fs/namespace.c'|grep -v '^fs/pnode.c') for f in $L; do sed -i $f $SED_PROG; done Requested-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-11-21Merge tag 'for-linus-4.15-ofs1' of ↵Linus Torvalds
git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux Pull orangefs updates from Mike Marshall: "Fix: - stop setting atime on inode dirty (Martin Brandenburg) Cleanups: - remove initialization of i_version (Jeff Layton) - use ARRAY_SIZE (Jérémy Lefaure) - call op_release sooner when creating inodes (Mike MarshallMartin Brandenburg)" * tag 'for-linus-4.15-ofs1' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux: orangefs: call op_release sooner when creating inodes orangefs: stop setting atime on inode dirty orangefs: use ARRAY_SIZE orangefs: remove initialization of i_version
2017-11-17Merge branch 'work.iov_iter' of ↵Linus Torvalds
git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs Pull iov_iter updates from Al Viro: - bio_{map,copy}_user_iov() series; those are cleanups - fixes from the same pile went into mainline (and stable) in late September. - fs/iomap.c iov_iter-related fixes - new primitive - iov_iter_for_each_range(), which applies a function to kernel-mapped segments of an iov_iter. Usable for kvec and bvec ones, the latter does kmap()/kunmap() around the callback. _Not_ usable for iovec- or pipe-backed iov_iter; the latter is not hard to fix if the need ever appears, the former is by design. Another related primitive will have to wait for the next cycle - it passes page + offset + size instead of pointer + size, and that one will be usable for everything _except_ kvec. Unfortunately, that one didn't get exposure in -next yet, so... - a bit more lustre iov_iter work, including a use case for iov_iter_for_each_range() (checksum calculation) - vhost/scsi leak fix in failure exit - misc cleanups and detritectomy... * 'work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (21 commits) iomap_dio_actor(): fix iov_iter bugs switch ksocknal_lib_recv_...() to use of iov_iter_for_each_range() lustre: switch struct ksock_conn to iov_iter vhost/scsi: switch to iov_iter_get_pages() fix a page leak in vhost_scsi_iov_to_sgl() error recovery new primitive: iov_iter_for_each_range() lnet_return_rx_credits_locked: don't abuse list_entry xen: don't open-code iov_iter_kvec() orangefs: remove detritus from struct orangefs_kiocb_s kill iov_shorten() bio_alloc_map_data(): do bmd->iter setup right there bio_copy_user_iov(): saner bio size calculation bio_map_user_iov(): get rid of copying iov_iter bio_copy_from_iter(): get rid of copying iov_iter move more stuff down into bio_copy_user_iov() blk_rq_map_user_iov(): move iov_iter_advance() down bio_map_user_iov(): get rid of the iov_for_each() bio_map_user_iov(): move alignment check into the main loop don't rely upon subsequent bio_add_pc_page() calls failing ... and with iov_iter_get_pages_alloc() it becomes even simpler ...
2017-11-13orangefs: call op_release sooner when creating inodesMartin Brandenburg
Prevents holding an unnecessary op while the kernel processes another op and yields the CPU. Signed-off-by: Martin Brandenburg <martin@omnibond.com> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2017-11-13orangefs: stop setting atime on inode dirtyMartin Brandenburg
The previous code path was to mark the inode dirty, let orangefs_inode_dirty set a flag in our private inode, then later during inode release call orangefs_flush_inode which notices the flag and writes the atime out. The code path worked almost identically for mtime, ctime, and mode except that those flags are set explicitly and not as side effects of dirty. Now orangefs_flush_inode is removed. Marking an inode dirty does not imply an atime update. Any place where flags were set before is now an explicit call to orangefs_inode_setattr. Since OrangeFS does not utilize inode writeback, the attribute change should be written out immediately. Fixes generic/120. In namei.c, there are several places where the directory mtime and ctime are set, but only the mtime is sent to the server. These don't seem right, but I've left them as is for now. Signed-off-by: Martin Brandenburg <martin@omnibond.com> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2017-11-13orangefs: use ARRAY_SIZEJérémy Lefaure
Using the ARRAY_SIZE macro improves the readability of the code. Found with Coccinelle with the following semantic patch: @r depends on (org || report)@ type T; T[] E; position p; @@ ( (sizeof(E)@p /sizeof(*E)) | (sizeof(E)@p /sizeof(E[...])) | (sizeof(E)@p /sizeof(T)) ) Signed-off-by: Jérémy Lefaure <jeremy.lefaure@lse.epita.fr> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2017-11-13orangefs: remove initialization of i_versionJeff Layton
...as it's completely unused. Signed-off-by: Jeff Layton <jlayton@redhat.com> Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2017-11-02License cleanup: add SPDX GPL-2.0 license identifier to files with no licenseGreg Kroah-Hartman
Many source files in the tree are missing licensing information, which makes it harder for compliance tools to determine the correct license. By default all files without license information are under the default license of the kernel, which is GPL version 2. Update the files which contain no license information with the 'GPL-2.0' SPDX license identifier. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This patch is based on work done by Thomas Gleixner and Kate Stewart and Philippe Ombredanne. How this work was done: Patches were generated and checked against linux-4.14-rc6 for a subset of the use cases: - file had no licensing information it it. - file was a */uapi/* one with no licensing information in it, - file was a */uapi/* one with existing licensing information, Further patches will be generated in subsequent months to fix up cases where non-standard license headers were used, and references to license had to be inferred by heuristics based on keywords. The analysis to determine which SPDX License Identifier to be applied to a file was done in a spreadsheet of side by side results from of the output of two independent scanners (ScanCode & Windriver) producing SPDX tag:value files created by Philippe Ombredanne. Philippe prepared the base worksheet, and did an initial spot review of a few 1000 files. The 4.13 kernel was the starting point of the analysis with 60,537 files assessed. Kate Stewart did a file by file comparison of the scanner results in the spreadsheet to determine which SPDX license identifier(s) to be applied to the file. She confirmed any determination that was not immediately clear with lawyers working with the Linux Foundation. Criteria used to select files for SPDX license identifier tagging was: - Files considered eligible had to be source code files. - Make and config files were included as candidates if they contained >5 lines of source - File already had some variant of a license header in it (even if <5 lines). All documentation files were explicitly excluded. The following heuristics were used to determine which SPDX license identifiers to apply. - when both scanners couldn't find any license traces, file was considered to have no license information in it, and the top level COPYING file license applied. For non */uapi/* files that summary was: SPDX license identifier # files ---------------------------------------------------|------- GPL-2.0 11139 and resulted in the first patch in this series. If that file was a */uapi/* path one, it was "GPL-2.0 WITH Linux-syscall-note" otherwise it was "GPL-2.0". Results of that was: SPDX license identifier # files ---------------------------------------------------|------- GPL-2.0 WITH Linux-syscall-note 930 and resulted in the second patch in this series. - if a file had some form of licensing information in it, and was one of the */uapi/* ones, it was denoted with the Linux-syscall-note if any GPL family license was found in the file or had no licensing in it (per prior point). Results summary: SPDX license identifier # files ---------------------------------------------------|------ GPL-2.0 WITH Linux-syscall-note 270 GPL-2.0+ WITH Linux-syscall-note 169 ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) 21 ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) 17 LGPL-2.1+ WITH Linux-syscall-note 15 GPL-1.0+ WITH Linux-syscall-note 14 ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) 5 LGPL-2.0+ WITH Linux-syscall-note 4 LGPL-2.1 WITH Linux-syscall-note 3 ((GPL-2.0 WITH Linux-syscall-note) OR MIT) 3 ((GPL-2.0 WITH Linux-syscall-note) AND MIT) 1 and that resulted in the third patch in this series. - when the two scanners agreed on the detected license(s), that became the concluded license(s). - when there was disagreement between the two scanners (one detected a license but the other didn't, or they both detected different licenses) a manual inspection of the file occurred. - In most cases a manual inspection of the information in the file resulted in a clear resolution of the license that should apply (and which scanner probably needed to revisit its heuristics). - When it was not immediately clear, the license identifier was confirmed with lawyers working with the Linux Foundation. - If there was any question as to the appropriate license identifier, the file was flagged for further research and to be revisited later in time. In total, over 70 hours of logged manual review was done on the spreadsheet to determine the SPDX license identifiers to apply to the source files by Kate, Philippe, Thomas and, in some cases, confirmation by lawyers working with the Linux Foundation. Kate also obtained a third independent scan of the 4.13 code base from FOSSology, and compared selected files where the other two scanners disagreed against that SPDX file, to see if there was new insights. The Windriver scanner is based on an older version of FOSSology in part, so they are related. Thomas did random spot checks in about 500 files from the spreadsheets for the uapi headers and agreed with SPDX license identifier in the files he inspected. For the non-uapi files Thomas did random spot checks in about 15000 files. In initial set of patches against 4.14-rc6, 3 files were found to have copy/paste license identifier errors, and have been fixed to reflect the correct identifier. Additionally Philippe spent 10 hours this week doing a detailed manual inspection and review of the 12,461 patched files from the initial patch version early this week with: - a full scancode scan run, collecting the matched texts, detected license ids and scores - reviewing anything where there was a license detected (about 500+ files) to ensure that the applied SPDX license was correct - reviewing anything where there was no detection but the patch license was not GPL-2.0 WITH Linux-syscall-note to ensure that the applied SPDX license was correct This produced a worksheet with 20 files needing minor correction. This worksheet was then exported into 3 different .csv files for the different types of files to be modified. These .csv files were then reviewed by Greg. Thomas wrote a script to parse the csv files and add the proper SPDX tag to the file, in the format that the file expected. This script was further refined by Greg based on the output to detect more types of files automatically and to distinguish between header and source .c files (which need different comment types.) Finally Greg ran the script using the .csv files to generate the patches. Reviewed-by: Kate Stewart <kstewart@linuxfoundation.org> Reviewed-by: Philippe Ombredanne <pombredanne@nexb.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-10-11orangefs: remove detritus from struct orangefs_kiocb_sAl Viro
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>