From a07b20004793d8926f78d63eb5980559f7813404 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 5 Nov 2018 17:40:30 +0000 Subject: vfs: syscall: Add open_tree(2) to reference or clone a mount open_tree(dfd, pathname, flags) Returns an O_PATH-opened file descriptor or an error. dfd and pathname specify the location to open, in usual fashion (see e.g. fstatat(2)). flags should be an OR of some of the following: * AT_PATH_EMPTY, AT_NO_AUTOMOUNT, AT_SYMLINK_NOFOLLOW - same meanings as usual * OPEN_TREE_CLOEXEC - make the resulting descriptor close-on-exec * OPEN_TREE_CLONE or OPEN_TREE_CLONE | AT_RECURSIVE - instead of opening the location in question, create a detached mount tree matching the subtree rooted at location specified by dfd/pathname. With AT_RECURSIVE the entire subtree is cloned, without it - only the part within in the mount containing the location in question. In other words, the same as mount --rbind or mount --bind would've taken. The detached tree will be dissolved on the final close of obtained file. Creation of such detached trees requires the same capabilities as doing mount --bind. Signed-off-by: Al Viro Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- arch/x86/entry/syscalls/syscall_32.tbl | 3 ++- arch/x86/entry/syscalls/syscall_64.tbl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 1f9607ed087c..ae2294d07ecb 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -398,7 +398,8 @@ 384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl 385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents 386 i386 rseq sys_rseq __ia32_sys_rseq -# don't use numbers 387 through 392, add new calls at the end +387 i386 open_tree sys_open_tree __ia32_sys_open_tree +# don't use numbers 388 through 392, add new calls at the end 393 i386 semget sys_semget __ia32_sys_semget 394 i386 semctl sys_semctl __ia32_compat_sys_semctl 395 i386 shmget sys_shmget __ia32_sys_shmget diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 92ee0b4378d4..a6e06c35b5b1 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -343,6 +343,7 @@ 332 common statx __x64_sys_statx 333 common io_pgetevents __x64_sys_io_pgetevents 334 common rseq __x64_sys_rseq +335 common open_tree __x64_sys_open_tree # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal __x64_sys_pidfd_send_signal -- cgit v1.2.3 From 2db154b3ea8e14b04fee23e3fdfd5e9d17fbc6ae Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 5 Nov 2018 17:40:30 +0000 Subject: vfs: syscall: Add move_mount(2) to move mounts around Add a move_mount() system call that will move a mount from one place to another and, in the next commit, allow to attach an unattached mount tree. The new system call looks like the following: int move_mount(int from_dfd, const char *from_path, int to_dfd, const char *to_path, unsigned int flags); Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- arch/x86/entry/syscalls/syscall_32.tbl | 3 ++- arch/x86/entry/syscalls/syscall_64.tbl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ae2294d07ecb..0db9effb18d9 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -399,7 +399,8 @@ 385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents 386 i386 rseq sys_rseq __ia32_sys_rseq 387 i386 open_tree sys_open_tree __ia32_sys_open_tree -# don't use numbers 388 through 392, add new calls at the end +388 i386 move_mount sys_move_mount __ia32_sys_move_mount +# don't use numbers 389 through 392, add new calls at the end 393 i386 semget sys_semget __ia32_sys_semget 394 i386 semctl sys_semctl __ia32_compat_sys_semctl 395 i386 shmget sys_shmget __ia32_sys_shmget diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index a6e06c35b5b1..0440f0eefa02 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -344,6 +344,7 @@ 333 common io_pgetevents __x64_sys_io_pgetevents 334 common rseq __x64_sys_rseq 335 common open_tree __x64_sys_open_tree +336 common move_mount __x64_sys_move_mount # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal __x64_sys_pidfd_send_signal -- cgit v1.2.3 From 24dcb3d90a1f67fe08c68a004af37df059d74005 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:33:31 +0000 Subject: vfs: syscall: Add fsopen() to prepare for superblock creation Provide an fsopen() system call that starts the process of preparing to create a superblock that will then be mountable, using an fd as a context handle. fsopen() is given the name of the filesystem that will be used: int mfd = fsopen(const char *fsname, unsigned int flags); where flags can be 0 or FSOPEN_CLOEXEC. For example: sfd = fsopen("ext4", FSOPEN_CLOEXEC); fsconfig(sfd, FSCONFIG_SET_PATH, "source", "/dev/sda1", AT_FDCWD); fsconfig(sfd, FSCONFIG_SET_FLAG, "noatime", NULL, 0); fsconfig(sfd, FSCONFIG_SET_FLAG, "acl", NULL, 0); fsconfig(sfd, FSCONFIG_SET_FLAG, "user_xattr", NULL, 0); fsconfig(sfd, FSCONFIG_SET_STRING, "sb", "1", 0); fsconfig(sfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); fsinfo(sfd, NULL, ...); // query new superblock attributes mfd = fsmount(sfd, FSMOUNT_CLOEXEC, MS_RELATIME); move_mount(mfd, "", sfd, AT_FDCWD, "/mnt", MOVE_MOUNT_F_EMPTY_PATH); sfd = fsopen("afs", -1); fsconfig(fd, FSCONFIG_SET_STRING, "source", "#grand.central.org:root.cell", 0); fsconfig(fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); mfd = fsmount(sfd, 0, MS_NODEV); move_mount(mfd, "", sfd, AT_FDCWD, "/mnt", MOVE_MOUNT_F_EMPTY_PATH); If an error is reported at any step, an error message may be available to be read() back (ENODATA will be reported if there isn't an error available) in the form: "e :" "e SELinux:Mount on mountpoint not permitted" Once fsmount() has been called, further fsconfig() calls will incur EBUSY, even if the fsmount() fails. read() is still possible to retrieve error information. The fsopen() syscall creates a mount context and hangs it of the fd that it returns. Netlink is not used because it is optional and would make the core VFS dependent on the networking layer and also potentially add network namespace issues. Note that, for the moment, the caller must have SYS_CAP_ADMIN to use fsopen(). Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- arch/x86/entry/syscalls/syscall_32.tbl | 3 ++- arch/x86/entry/syscalls/syscall_64.tbl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 0db9effb18d9..37fd1fc5396e 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -400,7 +400,8 @@ 386 i386 rseq sys_rseq __ia32_sys_rseq 387 i386 open_tree sys_open_tree __ia32_sys_open_tree 388 i386 move_mount sys_move_mount __ia32_sys_move_mount -# don't use numbers 389 through 392, add new calls at the end +389 i386 fsopen sys_fsopen __ia32_sys_fsopen +# don't use numbers 390 through 392, add new calls at the end 393 i386 semget sys_semget __ia32_sys_semget 394 i386 semctl sys_semctl __ia32_compat_sys_semctl 395 i386 shmget sys_shmget __ia32_sys_shmget diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 0440f0eefa02..511608a21611 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -345,6 +345,7 @@ 334 common rseq __x64_sys_rseq 335 common open_tree __x64_sys_open_tree 336 common move_mount __x64_sys_move_mount +337 common fsopen __x64_sys_fsopen # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal __x64_sys_pidfd_send_signal -- cgit v1.2.3 From ecdab150fddb42fe6a739335257949220033b782 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:36:09 +0000 Subject: vfs: syscall: Add fsconfig() for configuring and managing a context Add a syscall for configuring a filesystem creation context and triggering actions upon it, to be used in conjunction with fsopen, fspick and fsmount. long fsconfig(int fs_fd, unsigned int cmd, const char *key, const void *value, int aux); Where fs_fd indicates the context, cmd indicates the action to take, key indicates the parameter name for parameter-setting actions and, if needed, value points to a buffer containing the value and aux can give more information for the value. The following command IDs are proposed: (*) FSCONFIG_SET_FLAG: No value is specified. The parameter must be boolean in nature. The key may be prefixed with "no" to invert the setting. value must be NULL and aux must be 0. (*) FSCONFIG_SET_STRING: A string value is specified. The parameter can be expecting boolean, integer, string or take a path. A conversion to an appropriate type will be attempted (which may include looking up as a path). value points to a NUL-terminated string and aux must be 0. (*) FSCONFIG_SET_BINARY: A binary blob is specified. value points to the blob and aux indicates its size. The parameter must be expecting a blob. (*) FSCONFIG_SET_PATH: A non-empty path is specified. The parameter must be expecting a path object. value points to a NUL-terminated string that is the path and aux is a file descriptor at which to start a relative lookup or AT_FDCWD. (*) FSCONFIG_SET_PATH_EMPTY: As fsconfig_set_path, but with AT_EMPTY_PATH implied. (*) FSCONFIG_SET_FD: An open file descriptor is specified. value must be NULL and aux indicates the file descriptor. (*) FSCONFIG_CMD_CREATE: Trigger superblock creation. (*) FSCONFIG_CMD_RECONFIGURE: Trigger superblock reconfiguration. For the "set" command IDs, the idea is that the file_system_type will point to a list of parameters and the types of value that those parameters expect to take. The core code can then do the parse and argument conversion and then give the LSM and FS a cooked option or array of options to use. Source specification is also done the same way same way, using special keys "source", "source1", "source2", etc.. [!] Note that, for the moment, the key and value are just glued back together and handed to the filesystem. Every filesystem that uses options uses match_token() and co. to do this, and this will need to be changed - but not all at once. Example usage: fd = fsopen("ext4", FSOPEN_CLOEXEC); fsconfig(fd, fsconfig_set_path, "source", "/dev/sda1", AT_FDCWD); fsconfig(fd, fsconfig_set_path_empty, "journal_path", "", journal_fd); fsconfig(fd, fsconfig_set_fd, "journal_fd", "", journal_fd); fsconfig(fd, fsconfig_set_flag, "user_xattr", NULL, 0); fsconfig(fd, fsconfig_set_flag, "noacl", NULL, 0); fsconfig(fd, fsconfig_set_string, "sb", "1", 0); fsconfig(fd, fsconfig_set_string, "errors", "continue", 0); fsconfig(fd, fsconfig_set_string, "data", "journal", 0); fsconfig(fd, fsconfig_set_string, "context", "unconfined_u:...", 0); fsconfig(fd, fsconfig_cmd_create, NULL, NULL, 0); mfd = fsmount(fd, FSMOUNT_CLOEXEC, MS_NOEXEC); or: fd = fsopen("ext4", FSOPEN_CLOEXEC); fsconfig(fd, fsconfig_set_string, "source", "/dev/sda1", 0); fsconfig(fd, fsconfig_cmd_create, NULL, NULL, 0); mfd = fsmount(fd, FSMOUNT_CLOEXEC, MS_NOEXEC); or: fd = fsopen("afs", FSOPEN_CLOEXEC); fsconfig(fd, fsconfig_set_string, "source", "#grand.central.org:root.cell", 0); fsconfig(fd, fsconfig_cmd_create, NULL, NULL, 0); mfd = fsmount(fd, FSMOUNT_CLOEXEC, MS_NOEXEC); or: fd = fsopen("jffs2", FSOPEN_CLOEXEC); fsconfig(fd, fsconfig_set_string, "source", "mtd0", 0); fsconfig(fd, fsconfig_cmd_create, NULL, NULL, 0); mfd = fsmount(fd, FSMOUNT_CLOEXEC, MS_NOEXEC); Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- arch/x86/entry/syscalls/syscall_32.tbl | 3 ++- arch/x86/entry/syscalls/syscall_64.tbl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 37fd1fc5396e..786728143205 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -401,7 +401,8 @@ 387 i386 open_tree sys_open_tree __ia32_sys_open_tree 388 i386 move_mount sys_move_mount __ia32_sys_move_mount 389 i386 fsopen sys_fsopen __ia32_sys_fsopen -# don't use numbers 390 through 392, add new calls at the end +390 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig +# don't use numbers 391 through 392, add new calls at the end 393 i386 semget sys_semget __ia32_sys_semget 394 i386 semctl sys_semctl __ia32_compat_sys_semctl 395 i386 shmget sys_shmget __ia32_sys_shmget diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 511608a21611..7039a809d37d 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -346,6 +346,7 @@ 335 common open_tree __x64_sys_open_tree 336 common move_mount __x64_sys_move_mount 337 common fsopen __x64_sys_fsopen +338 common fsconfig __x64_sys_fsconfig # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal __x64_sys_pidfd_send_signal -- cgit v1.2.3 From 93766fbd2696c2c4453dd8e1070977e9cd4e6b6d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:36:14 +0000 Subject: vfs: syscall: Add fsmount() to create a mount for a superblock Provide a system call by which a filesystem opened with fsopen() and configured by a series of fsconfig() calls can have a detached mount object created for it. This mount object can then be attached to the VFS mount hierarchy using move_mount() by passing the returned file descriptor as the from directory fd. The system call looks like: int mfd = fsmount(int fsfd, unsigned int flags, unsigned int attr_flags); where fsfd is the file descriptor returned by fsopen(). flags can be 0 or FSMOUNT_CLOEXEC. attr_flags is a bitwise-OR of the following flags: MOUNT_ATTR_RDONLY Mount read-only MOUNT_ATTR_NOSUID Ignore suid and sgid bits MOUNT_ATTR_NODEV Disallow access to device special files MOUNT_ATTR_NOEXEC Disallow program execution MOUNT_ATTR__ATIME Setting on how atime should be updated MOUNT_ATTR_RELATIME - Update atime relative to mtime/ctime MOUNT_ATTR_NOATIME - Do not update access times MOUNT_ATTR_STRICTATIME - Always perform atime updates MOUNT_ATTR_NODIRATIME Do not update directory access times In the event that fsmount() fails, it may be possible to get an error message by calling read() on fsfd. If no message is available, ENODATA will be reported. Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- arch/x86/entry/syscalls/syscall_32.tbl | 3 ++- arch/x86/entry/syscalls/syscall_64.tbl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 786728143205..5b5c9189c507 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -402,7 +402,8 @@ 388 i386 move_mount sys_move_mount __ia32_sys_move_mount 389 i386 fsopen sys_fsopen __ia32_sys_fsopen 390 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig -# don't use numbers 391 through 392, add new calls at the end +391 i386 fsmount sys_fsmount __ia32_sys_fsmount +# don't use number 392, add new calls at the end 393 i386 semget sys_semget __ia32_sys_semget 394 i386 semctl sys_semctl __ia32_compat_sys_semctl 395 i386 shmget sys_shmget __ia32_sys_shmget diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 7039a809d37d..984ad594bb2b 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -347,6 +347,7 @@ 336 common move_mount __x64_sys_move_mount 337 common fsopen __x64_sys_fsopen 338 common fsconfig __x64_sys_fsconfig +339 common fsmount __x64_sys_fsmount # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal __x64_sys_pidfd_send_signal -- cgit v1.2.3 From cf3cba4a429be43e5527a3f78859b1bfd9ebc5fb Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:36:23 +0000 Subject: vfs: syscall: Add fspick() to select a superblock for reconfiguration Provide an fspick() system call that can be used to pick an existing mountpoint into an fs_context which can thereafter be used to reconfigure a superblock (equivalent of the superblock side of -o remount). This looks like: int fd = fspick(AT_FDCWD, "/mnt", FSPICK_CLOEXEC | FSPICK_NO_AUTOMOUNT); fsconfig(fd, FSCONFIG_SET_FLAG, "intr", NULL, 0); fsconfig(fd, FSCONFIG_SET_FLAG, "noac", NULL, 0); fsconfig(fd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0); At the point of fspick being called, the file descriptor referring to the filesystem context is in exactly the same state as the one that was created by fsopen() after fsmount() has been successfully called. Signed-off-by: David Howells cc: linux-api@vger.kernel.org Signed-off-by: Al Viro --- arch/x86/entry/syscalls/syscall_32.tbl | 2 +- arch/x86/entry/syscalls/syscall_64.tbl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 5b5c9189c507..4cd5f982b1e5 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -403,7 +403,7 @@ 389 i386 fsopen sys_fsopen __ia32_sys_fsopen 390 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig 391 i386 fsmount sys_fsmount __ia32_sys_fsmount -# don't use number 392, add new calls at the end +392 i386 fspick sys_fspick __ia32_sys_fspick 393 i386 semget sys_semget __ia32_sys_semget 394 i386 semctl sys_semctl __ia32_compat_sys_semctl 395 i386 shmget sys_shmget __ia32_sys_shmget diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 984ad594bb2b..64ca0d06259a 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -348,6 +348,7 @@ 337 common fsopen __x64_sys_fsopen 338 common fsconfig __x64_sys_fsconfig 339 common fsmount __x64_sys_fsmount +340 common fspick __x64_sys_fspick # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal __x64_sys_pidfd_send_signal -- cgit v1.2.3