From a2b426267c56773201f968fdb5eda6ab9ae94e34 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 29 Apr 2017 14:12:15 -0500 Subject: userns,pidns: Verify the userns for new pid namespaces It is pointless and confusing to allow a pid namespace hierarchy and the user namespace hierarchy to get out of sync. The owner of a child pid namespace should be the owner of the parent pid namespace or a descendant of the owner of the parent pid namespace. Otherwise it is possible to construct scenarios where a process has a capability over a parent pid namespace but does not have the capability over a child pid namespace. Which confusingly makes permission checks non-transitive. It requires use of setns into a pid namespace (but not into a user namespace) to create such a scenario. Add the function in_userns to help in making this determination. v2: Optimized in_userns by using level as suggested by: Kirill Tkhai Ref: 49f4d8b93ccf ("pidns: Capture the user namespace and filter ns_last_pid") Signed-off-by: "Eric W. Biederman" --- include/linux/user_namespace.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 32354b4b4b2b..4005877bb8b6 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -112,8 +112,9 @@ extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *); extern int proc_setgroups_show(struct seq_file *m, void *v); extern bool userns_may_setgroups(const struct user_namespace *ns); +extern bool in_userns(const struct user_namespace *ancestor, + const struct user_namespace *child); extern bool current_in_userns(const struct user_namespace *target_ns); - struct ns_common *ns_get_owner(struct ns_common *ns); #else @@ -144,6 +145,12 @@ static inline bool userns_may_setgroups(const struct user_namespace *ns) return true; } +static inline bool in_userns(const struct user_namespace *ancestor, + const struct user_namespace *child) +{ + return true; +} + static inline bool current_in_userns(const struct user_namespace *target_ns) { return true; -- cgit v1.2.3 From d08477aa975e97f1dc64c0ae59cebf98520456ce Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 29 Jun 2017 09:28:50 -0500 Subject: fcntl: Don't use ambiguous SIG_POLL si_codes We have a weird and problematic intersection of features that when they all come together result in ambiguous siginfo values, that we can not support properly. - Supporting fcntl(F_SETSIG,...) with arbitrary valid signals. - Using positive values for POLL_IN, POLL_OUT, POLL_MSG, ..., etc that imply they are signal specific si_codes and using the aforementioned arbitrary signal to deliver them. - Supporting injection of arbitrary siginfo values for debugging and checkpoint/restore. The result is that just looking at siginfo si_codes of 1 to 6 are ambigious. It could either be a signal specific si_code or it could be a generic si_code. For most of the kernel this is a non-issue but for sending signals with siginfo it is impossible to play back the kernel signals and get the same result. Strictly speaking when the si_code was changed from SI_SIGIO to POLL_IN and friends between 2.2 and 2.4 this functionality was not ambiguous, as only real time signals were supported. Before 2.4 was released the kernel began supporting siginfo with non realtime signals so they could give details of why the signal was sent. The result is that if F_SETSIG is set to one of the signals with signal specific si_codes then user space can not know why the signal was sent. I grepped through a bunch of userspace programs using debian code search to get a feel for how often people choose a signal that results in an ambiguous si_code. I only found one program doing so and it was using SIGCHLD to test the F_SETSIG functionality, and did not appear to be a real world usage. Therefore the ambiguity does not appears to be a real world problem in practice. Remove the ambiguity while introducing the smallest chance of breakage by changing the si_code to SI_SIGIO when signals with signal specific si_codes are targeted. Fixes: v2.3.40 -- Added support for queueing non-rt signals Fixes: v2.3.21 -- Changed the si_code from SI_SIGIO Signed-off-by: "Eric W. Biederman" --- include/linux/signal.h | 8 ++++++++ include/uapi/asm-generic/siginfo.h | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/signal.h b/include/linux/signal.h index e2678b5dbb21..c97cc20369c0 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -380,10 +380,18 @@ int unhandled_signal(struct task_struct *tsk, int sig); rt_sigmask(SIGCONT) | rt_sigmask(SIGCHLD) | \ rt_sigmask(SIGWINCH) | rt_sigmask(SIGURG) ) +#define SIG_SPECIFIC_SICODES_MASK (\ + rt_sigmask(SIGILL) | rt_sigmask(SIGFPE) | \ + rt_sigmask(SIGSEGV) | rt_sigmask(SIGBUS) | \ + rt_sigmask(SIGTRAP) | rt_sigmask(SIGCHLD) | \ + rt_sigmask(SIGPOLL) | rt_sigmask(SIGSYS) | \ + SIGEMT_MASK ) + #define sig_kernel_only(sig) siginmask(sig, SIG_KERNEL_ONLY_MASK) #define sig_kernel_coredump(sig) siginmask(sig, SIG_KERNEL_COREDUMP_MASK) #define sig_kernel_ignore(sig) siginmask(sig, SIG_KERNEL_IGNORE_MASK) #define sig_kernel_stop(sig) siginmask(sig, SIG_KERNEL_STOP_MASK) +#define sig_specific_sicodes(sig) siginmask(sig, SIG_SPECIFIC_SICODES_MASK) #define sig_fatal(t, signr) \ (!siginmask(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \ diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index 9c4eca6b374a..9e956ea94d57 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -184,7 +184,7 @@ typedef struct siginfo { #define SI_TIMER __SI_CODE(__SI_TIMER,-2) /* sent by timer expiration */ #define SI_MESGQ __SI_CODE(__SI_MESGQ,-3) /* sent by real time mesq state change */ #define SI_ASYNCIO -4 /* sent by AIO completion */ -#define SI_SIGIO -5 /* sent by queued SIGIO */ +#define SI_SIGIO __SI_CODE(__SI_POLL,-5) /* sent by queued SIGIO */ #define SI_TKILL -6 /* sent by tkill system call */ #define SI_DETHREAD -7 /* sent by execve() killing subsidiary threads */ @@ -259,7 +259,7 @@ typedef struct siginfo { #define NSIGCHLD 6 /* - * SIGPOLL si_codes + * SIGPOLL (or any other signal without signal specific si_codes) si_codes */ #define POLL_IN (__SI_POLL|1) /* data input available */ #define POLL_OUT (__SI_POLL|2) /* output buffers available */ -- cgit v1.2.3 From cc731525f26af85a1c3537da41e0abd1d35e0bdb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 16 Jul 2017 22:36:59 -0500 Subject: signal: Remove kernel interal si_code magic struct siginfo is a union and the kernel since 2.4 has been hiding a union tag in the high 16bits of si_code using the values: __SI_KILL __SI_TIMER __SI_POLL __SI_FAULT __SI_CHLD __SI_RT __SI_MESGQ __SI_SYS While this looks plausible on the surface, in practice this situation has not worked well. - Injected positive signals are not copied to user space properly unless they have these magic high bits set. - Injected positive signals are not reported properly by signalfd unless they have these magic high bits set. - These kernel internal values leaked to userspace via ptrace_peek_siginfo - It was possible to inject these kernel internal values and cause the the kernel to misbehave. - Kernel developers got confused and expected these kernel internal values in userspace in kernel self tests. - Kernel developers got confused and set si_code to __SI_FAULT which is SI_USER in userspace which causes userspace to think an ordinary user sent the signal and that it was not kernel generated. - The values make it impossible to reorganize the code to transform siginfo_copy_to_user into a plain copy_to_user. As si_code must be massaged before being passed to userspace. So remove these kernel internal si codes and make the kernel code simpler and more maintainable. To replace these kernel internal magic si_codes introduce the helper function siginfo_layout, that takes a signal number and an si_code and computes which union member of siginfo is being used. Have siginfo_layout return an enumeration so that gcc will have enough information to warn if a switch statement does not handle all of union members. A couple of architectures have a messed up ABI that defines signal specific duplications of SI_USER which causes more special cases in siginfo_layout than I would like. The good news is only problem architectures pay the cost. Update all of the code that used the previous magic __SI_ values to use the new SIL_ values and to call siginfo_layout to get those values. Escept where not all of the cases are handled remove the defaults in the switch statements so that if a new case is missed in the future the lack will show up at compile time. Modify the code that copies siginfo si_code to userspace to just copy the value and not cast si_code to a short first. The high bits are no longer used to hold a magic union member. Fixup the siginfo header files to stop including the __SI_ values in their constants and for the headers that were missing it to properly update the number of si_codes for each signal type. The fixes to copy_siginfo_from_user32 implementations has the interesting property that several of them perviously should never have worked as the __SI_ values they depended up where kernel internal. With that dependency gone those implementations should work much better. The idea of not passing the __SI_ values out to userspace and then not reinserting them has been tested with criu and criu worked without changes. Ref: 2.4.0-test1 Signed-off-by: "Eric W. Biederman" --- include/linux/signal.h | 14 +++++ include/uapi/asm-generic/siginfo.h | 115 +++++++++++++++---------------------- 2 files changed, 60 insertions(+), 69 deletions(-) (limited to 'include') diff --git a/include/linux/signal.h b/include/linux/signal.h index c97cc20369c0..38564e3e54c7 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -21,6 +21,20 @@ static inline void copy_siginfo(struct siginfo *to, struct siginfo *from) int copy_siginfo_to_user(struct siginfo __user *to, const struct siginfo *from); +enum siginfo_layout { + SIL_KILL, + SIL_TIMER, + SIL_POLL, + SIL_FAULT, + SIL_CHLD, + SIL_RT, +#ifdef __ARCH_SIGSYS + SIL_SYS, +#endif +}; + +enum siginfo_layout siginfo_layout(int sig, int si_code); + /* * Define some primitives to manipulate sigset_t. */ diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index 9e956ea94d57..e5aa6794cea4 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -151,29 +151,6 @@ typedef struct siginfo { #define si_arch _sifields._sigsys._arch #endif -#ifdef __KERNEL__ -#define __SI_MASK 0xffff0000u -#define __SI_KILL (0 << 16) -#define __SI_TIMER (1 << 16) -#define __SI_POLL (2 << 16) -#define __SI_FAULT (3 << 16) -#define __SI_CHLD (4 << 16) -#define __SI_RT (5 << 16) -#define __SI_MESGQ (6 << 16) -#define __SI_SYS (7 << 16) -#define __SI_CODE(T,N) ((T) | ((N) & 0xffff)) -#else /* __KERNEL__ */ -#define __SI_KILL 0 -#define __SI_TIMER 0 -#define __SI_POLL 0 -#define __SI_FAULT 0 -#define __SI_CHLD 0 -#define __SI_RT 0 -#define __SI_MESGQ 0 -#define __SI_SYS 0 -#define __SI_CODE(T,N) (N) -#endif /* __KERNEL__ */ - /* * si_code values * Digital reserves positive values for kernel-generated signals. @@ -181,10 +158,10 @@ typedef struct siginfo { #define SI_USER 0 /* sent by kill, sigsend, raise */ #define SI_KERNEL 0x80 /* sent by the kernel from somewhere */ #define SI_QUEUE -1 /* sent by sigqueue */ -#define SI_TIMER __SI_CODE(__SI_TIMER,-2) /* sent by timer expiration */ -#define SI_MESGQ __SI_CODE(__SI_MESGQ,-3) /* sent by real time mesq state change */ +#define SI_TIMER -2 /* sent by timer expiration */ +#define SI_MESGQ -3 /* sent by real time mesq state change */ #define SI_ASYNCIO -4 /* sent by AIO completion */ -#define SI_SIGIO __SI_CODE(__SI_POLL,-5) /* sent by queued SIGIO */ +#define SI_SIGIO -5 /* sent by queued SIGIO */ #define SI_TKILL -6 /* sent by tkill system call */ #define SI_DETHREAD -7 /* sent by execve() killing subsidiary threads */ @@ -194,86 +171,86 @@ typedef struct siginfo { /* * SIGILL si_codes */ -#define ILL_ILLOPC (__SI_FAULT|1) /* illegal opcode */ -#define ILL_ILLOPN (__SI_FAULT|2) /* illegal operand */ -#define ILL_ILLADR (__SI_FAULT|3) /* illegal addressing mode */ -#define ILL_ILLTRP (__SI_FAULT|4) /* illegal trap */ -#define ILL_PRVOPC (__SI_FAULT|5) /* privileged opcode */ -#define ILL_PRVREG (__SI_FAULT|6) /* privileged register */ -#define ILL_COPROC (__SI_FAULT|7) /* coprocessor error */ -#define ILL_BADSTK (__SI_FAULT|8) /* internal stack error */ +#define ILL_ILLOPC 1 /* illegal opcode */ +#define ILL_ILLOPN 2 /* illegal operand */ +#define ILL_ILLADR 3 /* illegal addressing mode */ +#define ILL_ILLTRP 4 /* illegal trap */ +#define ILL_PRVOPC 5 /* privileged opcode */ +#define ILL_PRVREG 6 /* privileged register */ +#define ILL_COPROC 7 /* coprocessor error */ +#define ILL_BADSTK 8 /* internal stack error */ #define NSIGILL 8 /* * SIGFPE si_codes */ -#define FPE_INTDIV (__SI_FAULT|1) /* integer divide by zero */ -#define FPE_INTOVF (__SI_FAULT|2) /* integer overflow */ -#define FPE_FLTDIV (__SI_FAULT|3) /* floating point divide by zero */ -#define FPE_FLTOVF (__SI_FAULT|4) /* floating point overflow */ -#define FPE_FLTUND (__SI_FAULT|5) /* floating point underflow */ -#define FPE_FLTRES (__SI_FAULT|6) /* floating point inexact result */ -#define FPE_FLTINV (__SI_FAULT|7) /* floating point invalid operation */ -#define FPE_FLTSUB (__SI_FAULT|8) /* subscript out of range */ +#define FPE_INTDIV 1 /* integer divide by zero */ +#define FPE_INTOVF 2 /* integer overflow */ +#define FPE_FLTDIV 3 /* floating point divide by zero */ +#define FPE_FLTOVF 4 /* floating point overflow */ +#define FPE_FLTUND 5 /* floating point underflow */ +#define FPE_FLTRES 6 /* floating point inexact result */ +#define FPE_FLTINV 7 /* floating point invalid operation */ +#define FPE_FLTSUB 8 /* subscript out of range */ #define NSIGFPE 8 /* * SIGSEGV si_codes */ -#define SEGV_MAPERR (__SI_FAULT|1) /* address not mapped to object */ -#define SEGV_ACCERR (__SI_FAULT|2) /* invalid permissions for mapped object */ -#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */ -#define SEGV_PKUERR (__SI_FAULT|4) /* failed protection key checks */ +#define SEGV_MAPERR 1 /* address not mapped to object */ +#define SEGV_ACCERR 2 /* invalid permissions for mapped object */ +#define SEGV_BNDERR 3 /* failed address bound checks */ +#define SEGV_PKUERR 4 /* failed protection key checks */ #define NSIGSEGV 4 /* * SIGBUS si_codes */ -#define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */ -#define BUS_ADRERR (__SI_FAULT|2) /* non-existent physical address */ -#define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */ +#define BUS_ADRALN 1 /* invalid address alignment */ +#define BUS_ADRERR 2 /* non-existent physical address */ +#define BUS_OBJERR 3 /* object specific hardware error */ /* hardware memory error consumed on a machine check: action required */ -#define BUS_MCEERR_AR (__SI_FAULT|4) +#define BUS_MCEERR_AR 4 /* hardware memory error detected in process but not consumed: action optional*/ -#define BUS_MCEERR_AO (__SI_FAULT|5) +#define BUS_MCEERR_AO 5 #define NSIGBUS 5 /* * SIGTRAP si_codes */ -#define TRAP_BRKPT (__SI_FAULT|1) /* process breakpoint */ -#define TRAP_TRACE (__SI_FAULT|2) /* process trace trap */ -#define TRAP_BRANCH (__SI_FAULT|3) /* process taken branch trap */ -#define TRAP_HWBKPT (__SI_FAULT|4) /* hardware breakpoint/watchpoint */ +#define TRAP_BRKPT 1 /* process breakpoint */ +#define TRAP_TRACE 2 /* process trace trap */ +#define TRAP_BRANCH 3 /* process taken branch trap */ +#define TRAP_HWBKPT 4 /* hardware breakpoint/watchpoint */ #define NSIGTRAP 4 /* * SIGCHLD si_codes */ -#define CLD_EXITED (__SI_CHLD|1) /* child has exited */ -#define CLD_KILLED (__SI_CHLD|2) /* child was killed */ -#define CLD_DUMPED (__SI_CHLD|3) /* child terminated abnormally */ -#define CLD_TRAPPED (__SI_CHLD|4) /* traced child has trapped */ -#define CLD_STOPPED (__SI_CHLD|5) /* child has stopped */ -#define CLD_CONTINUED (__SI_CHLD|6) /* stopped child has continued */ +#define CLD_EXITED 1 /* child has exited */ +#define CLD_KILLED 2 /* child was killed */ +#define CLD_DUMPED 3 /* child terminated abnormally */ +#define CLD_TRAPPED 4 /* traced child has trapped */ +#define CLD_STOPPED 5 /* child has stopped */ +#define CLD_CONTINUED 6 /* stopped child has continued */ #define NSIGCHLD 6 /* * SIGPOLL (or any other signal without signal specific si_codes) si_codes */ -#define POLL_IN (__SI_POLL|1) /* data input available */ -#define POLL_OUT (__SI_POLL|2) /* output buffers available */ -#define POLL_MSG (__SI_POLL|3) /* input message available */ -#define POLL_ERR (__SI_POLL|4) /* i/o error */ -#define POLL_PRI (__SI_POLL|5) /* high priority input available */ -#define POLL_HUP (__SI_POLL|6) /* device disconnected */ +#define POLL_IN 1 /* data input available */ +#define POLL_OUT 2 /* output buffers available */ +#define POLL_MSG 3 /* input message available */ +#define POLL_ERR 4 /* i/o error */ +#define POLL_PRI 5 /* high priority input available */ +#define POLL_HUP 6 /* device disconnected */ #define NSIGPOLL 6 /* * SIGSYS si_codes */ -#define SYS_SECCOMP (__SI_SYS|1) /* seccomp triggered */ -#define NSIGSYS 1 +#define SYS_SECCOMP 1 /* seccomp triggered */ +#define NSIGSYS 1 /* * sigevent definitions -- cgit v1.2.3 From 8db6c34f1dbc8e06aa016a9b829b06902c3e1340 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 8 May 2017 13:11:56 -0500 Subject: Introduce v3 namespaced file capabilities Root in a non-initial user ns cannot be trusted to write a traditional security.capability xattr. If it were allowed to do so, then any unprivileged user on the host could map his own uid to root in a private namespace, write the xattr, and execute the file with privilege on the host. However supporting file capabilities in a user namespace is very desirable. Not doing so means that any programs designed to run with limited privilege must continue to support other methods of gaining and dropping privilege. For instance a program installer must detect whether file capabilities can be assigned, and assign them if so but set setuid-root otherwise. The program in turn must know how to drop partial capabilities, and do so only if setuid-root. This patch introduces v3 of the security.capability xattr. It builds a vfs_ns_cap_data struct by appending a uid_t rootid to struct vfs_cap_data. This is the absolute uid_t (that is, the uid_t in user namespace which mounted the filesystem, usually init_user_ns) of the root id in whose namespaces the file capabilities may take effect. When a task asks to write a v2 security.capability xattr, if it is privileged with respect to the userns which mounted the filesystem, then nothing should change. Otherwise, the kernel will transparently rewrite the xattr as a v3 with the appropriate rootid. This is done during the execution of setxattr() to catch user-space-initiated capability writes. Subsequently, any task executing the file which has the noted kuid as its root uid, or which is in a descendent user_ns of such a user_ns, will run the file with capabilities. Similarly when asking to read file capabilities, a v3 capability will be presented as v2 if it applies to the caller's namespace. If a task writes a v3 security.capability, then it can provide a uid for the xattr so long as the uid is valid in its own user namespace, and it is privileged with CAP_SETFCAP over its namespace. The kernel will translate that rootid to an absolute uid, and write that to disk. After this, a task in the writer's namespace will not be able to use those capabilities (unless rootid was 0), but a task in a namespace where the given uid is root will. Only a single security.capability xattr may exist at a time for a given file. A task may overwrite an existing xattr so long as it is privileged over the inode. Note this is a departure from previous semantics, which required privilege to remove a security.capability xattr. This check can be re-added if deemed useful. This allows a simple setxattr to work, allows tar/untar to work, and allows us to tar in one namespace and untar in another while preserving the capability, without risking leaking privilege into a parent namespace. Example using tar: $ cp /bin/sleep sleepx $ mkdir b1 b2 $ lxc-usernsexec -m b:0:100000:1 -m b:1:$(id -u):1 -- chown 0:0 b1 $ lxc-usernsexec -m b:0:100001:1 -m b:1:$(id -u):1 -- chown 0:0 b2 $ lxc-usernsexec -m b:0:100000:1000 -- tar --xattrs-include=security.capability --xattrs -cf b1/sleepx.tar sleepx $ lxc-usernsexec -m b:0:100001:1000 -- tar --xattrs-include=security.capability --xattrs -C b2 -xf b1/sleepx.tar $ lxc-usernsexec -m b:0:100001:1000 -- getcap b2/sleepx b2/sleepx = cap_sys_admin+ep # /opt/ltp/testcases/bin/getv3xattr b2/sleepx v3 xattr, rootid is 100001 A patch to linux-test-project adding a new set of tests for this functionality is in the nsfscaps branch at github.com/hallyn/ltp Changelog: Nov 02 2016: fix invalid check at refuse_fcap_overwrite() Nov 07 2016: convert rootid from and to fs user_ns (From ebiederm: mar 28 2017) commoncap.c: fix typos - s/v4/v3 get_vfs_caps_from_disk: clarify the fs_ns root access check nsfscaps: change the code split for cap_inode_setxattr() Apr 09 2017: don't return v3 cap for caps owned by current root. return a v2 cap for a true v2 cap in non-init ns Apr 18 2017: . Change the flow of fscap writing to support s_user_ns writing. . Remove refuse_fcap_overwrite(). The value of the previous xattr doesn't matter. Apr 24 2017: . incorporate Eric's incremental diff . move cap_convert_nscap to setxattr and simplify its usage May 8, 2017: . fix leaking dentry refcount in cap_inode_getsecurity Signed-off-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- include/linux/capability.h | 2 ++ include/linux/security.h | 2 ++ include/uapi/linux/capability.h | 22 +++++++++++++++++++--- 3 files changed, 23 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/capability.h b/include/linux/capability.h index 6ffb67e10c06..b52e278e4744 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -248,4 +248,6 @@ extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); +extern int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size); + #endif /* !_LINUX_CAPABILITY_H */ diff --git a/include/linux/security.h b/include/linux/security.h index b6ea1dc9cc9d..6fff8c924718 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -91,6 +91,8 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name, extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); +extern int cap_inode_getsecurity(struct inode *inode, const char *name, + void **buffer, bool alloc); extern int cap_mmap_addr(unsigned long addr); extern int cap_mmap_file(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags); diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 6fe14d001f68..230e05d35191 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -60,9 +60,13 @@ typedef struct __user_cap_data_struct { #define VFS_CAP_U32_2 2 #define XATTR_CAPS_SZ_2 (sizeof(__le32)*(1 + 2*VFS_CAP_U32_2)) -#define XATTR_CAPS_SZ XATTR_CAPS_SZ_2 -#define VFS_CAP_U32 VFS_CAP_U32_2 -#define VFS_CAP_REVISION VFS_CAP_REVISION_2 +#define VFS_CAP_REVISION_3 0x03000000 +#define VFS_CAP_U32_3 2 +#define XATTR_CAPS_SZ_3 (sizeof(__le32)*(2 + 2*VFS_CAP_U32_3)) + +#define XATTR_CAPS_SZ XATTR_CAPS_SZ_3 +#define VFS_CAP_U32 VFS_CAP_U32_3 +#define VFS_CAP_REVISION VFS_CAP_REVISION_3 struct vfs_cap_data { __le32 magic_etc; /* Little endian */ @@ -72,6 +76,18 @@ struct vfs_cap_data { } data[VFS_CAP_U32]; }; +/* + * same as vfs_cap_data but with a rootid at the end + */ +struct vfs_ns_cap_data { + __le32 magic_etc; + struct { + __le32 permitted; /* Little endian */ + __le32 inheritable; /* Little endian */ + } data[VFS_CAP_U32]; + __le32 rootid; +}; + #ifndef __KERNEL__ /* -- cgit v1.2.3