summaryrefslogtreecommitdiff
path: root/ipc
diff options
context:
space:
mode:
Diffstat (limited to 'ipc')
-rw-r--r--ipc/compat.c174
-rw-r--r--ipc/mqueue.c15
-rw-r--r--ipc/msg.c123
-rw-r--r--ipc/msgutil.c112
-rw-r--r--ipc/namespace.c2
-rw-r--r--ipc/sem.c587
-rw-r--r--ipc/shm.c14
-rw-r--r--ipc/syscall.c6
-rw-r--r--ipc/util.c232
-rw-r--r--ipc/util.h19
10 files changed, 750 insertions, 534 deletions
diff --git a/ipc/compat.c b/ipc/compat.c
index 2547f29dcd1b..892f6585dd60 100644
--- a/ipc/compat.c
+++ b/ipc/compat.c
@@ -240,7 +240,7 @@ static inline int put_compat_semid_ds(struct semid64_ds *s,
static long do_compat_semctl(int first, int second, int third, u32 pad)
{
- union semun fourth;
+ unsigned long fourth;
int err, err2;
struct semid64_ds s64;
struct semid64_ds __user *up64;
@@ -249,9 +249,13 @@ static long do_compat_semctl(int first, int second, int third, u32 pad)
memset(&s64, 0, sizeof(s64));
if ((third & (~IPC_64)) == SETVAL)
- fourth.val = (int) pad;
+#ifdef __BIG_ENDIAN
+ fourth = (unsigned long)pad << 32;
+#else
+ fourth = pad;
+#endif
else
- fourth.__pad = compat_ptr(pad);
+ fourth = (unsigned long)compat_ptr(pad);
switch (third & (~IPC_64)) {
case IPC_INFO:
case IPC_RMID:
@@ -269,7 +273,7 @@ static long do_compat_semctl(int first, int second, int third, u32 pad)
case IPC_STAT:
case SEM_STAT:
up64 = compat_alloc_user_space(sizeof(s64));
- fourth.__pad = up64;
+ fourth = (unsigned long)up64;
err = sys_semctl(first, second, third, fourth);
if (err < 0)
break;
@@ -295,7 +299,7 @@ static long do_compat_semctl(int first, int second, int third, u32 pad)
if (err)
break;
- fourth.__pad = up64;
+ fourth = (unsigned long)up64;
err = sys_semctl(first, second, third, fourth);
break;
@@ -306,7 +310,7 @@ static long do_compat_semctl(int first, int second, int third, u32 pad)
return err;
}
-long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
+static long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
{
struct compat_msgbuf __user *msgp = dest;
size_t msgsz;
@@ -320,77 +324,117 @@ long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
return msgsz;
}
+#ifndef COMPAT_SHMLBA
+#define COMPAT_SHMLBA SHMLBA
+#endif
+
#ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC
-long compat_sys_semctl(int first, int second, int third, void __user *uptr)
+COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second,
+ u32, third, compat_uptr_t, ptr, u32, fifth)
{
+ int version;
u32 pad;
- if (!uptr)
- return -EINVAL;
- if (get_user(pad, (u32 __user *) uptr))
- return -EFAULT;
- return do_compat_semctl(first, second, third, pad);
-}
+ version = call >> 16; /* hack for backward compatibility */
+ call &= 0xffff;
+
+ switch (call) {
+ case SEMOP:
+ /* struct sembuf is the same on 32 and 64bit :)) */
+ return sys_semtimedop(first, compat_ptr(ptr), second, NULL);
+ case SEMTIMEDOP:
+ return compat_sys_semtimedop(first, compat_ptr(ptr), second,
+ compat_ptr(fifth));
+ case SEMGET:
+ return sys_semget(first, second, third);
+ case SEMCTL:
+ if (!ptr)
+ return -EINVAL;
+ if (get_user(pad, (u32 __user *) compat_ptr(ptr)))
+ return -EFAULT;
+ return do_compat_semctl(first, second, third, pad);
-long compat_sys_msgsnd(int first, int second, int third, void __user *uptr)
-{
- struct compat_msgbuf __user *up = uptr;
- long type;
+ case MSGSND: {
+ struct compat_msgbuf __user *up = compat_ptr(ptr);
+ compat_long_t type;
- if (first < 0)
- return -EINVAL;
- if (second < 0)
- return -EINVAL;
+ if (first < 0 || second < 0)
+ return -EINVAL;
- if (get_user(type, &up->mtype))
- return -EFAULT;
+ if (get_user(type, &up->mtype))
+ return -EFAULT;
- return do_msgsnd(first, type, up->mtext, second, third);
-}
+ return do_msgsnd(first, type, up->mtext, second, third);
+ }
+ case MSGRCV: {
+ void __user *uptr = compat_ptr(ptr);
-long compat_sys_msgrcv(int first, int second, int msgtyp, int third,
- int version, void __user *uptr)
-{
- if (first < 0)
- return -EINVAL;
- if (second < 0)
- return -EINVAL;
-
- if (!version) {
- struct compat_ipc_kludge ipck;
- if (!uptr)
+ if (first < 0 || second < 0)
return -EINVAL;
- if (copy_from_user (&ipck, uptr, sizeof(ipck)))
- return -EFAULT;
- uptr = compat_ptr(ipck.msgp);
- msgtyp = ipck.msgtyp;
+
+ if (!version) {
+ struct compat_ipc_kludge ipck;
+ if (!uptr)
+ return -EINVAL;
+ if (copy_from_user (&ipck, uptr, sizeof(ipck)))
+ return -EFAULT;
+ uptr = compat_ptr(ipck.msgp);
+ fifth = ipck.msgtyp;
+ }
+ return do_msgrcv(first, uptr, second, fifth, third,
+ compat_do_msg_fill);
}
- return do_msgrcv(first, uptr, second, msgtyp, third,
- compat_do_msg_fill);
+ case MSGGET:
+ return sys_msgget(first, second);
+ case MSGCTL:
+ return compat_sys_msgctl(first, second, compat_ptr(ptr));
+
+ case SHMAT: {
+ int err;
+ unsigned long raddr;
+
+ if (version == 1)
+ return -EINVAL;
+ err = do_shmat(first, compat_ptr(ptr), second, &raddr,
+ COMPAT_SHMLBA);
+ if (err < 0)
+ return err;
+ return put_user(raddr, (compat_ulong_t *)compat_ptr(third));
+ }
+ case SHMDT:
+ return sys_shmdt(compat_ptr(ptr));
+ case SHMGET:
+ return sys_shmget(first, (unsigned)second, third);
+ case SHMCTL:
+ return compat_sys_shmctl(first, second, compat_ptr(ptr));
+ }
+
+ return -ENOSYS;
}
-#else
-long compat_sys_semctl(int semid, int semnum, int cmd, int arg)
+#endif
+
+COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg)
{
return do_compat_semctl(semid, semnum, cmd, arg);
}
-long compat_sys_msgsnd(int msqid, struct compat_msgbuf __user *msgp,
- compat_ssize_t msgsz, int msgflg)
+COMPAT_SYSCALL_DEFINE4(msgsnd, int, msqid, compat_uptr_t, msgp,
+ compat_ssize_t, msgsz, int, msgflg)
{
+ struct compat_msgbuf __user *up = compat_ptr(msgp);
compat_long_t mtype;
- if (get_user(mtype, &msgp->mtype))
+ if (get_user(mtype, &up->mtype))
return -EFAULT;
- return do_msgsnd(msqid, mtype, msgp->mtext, (ssize_t)msgsz, msgflg);
+ return do_msgsnd(msqid, mtype, up->mtext, (ssize_t)msgsz, msgflg);
}
-long compat_sys_msgrcv(int msqid, struct compat_msgbuf __user *msgp,
- compat_ssize_t msgsz, long msgtyp, int msgflg)
+COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
+ compat_ssize_t, msgsz, long, msgtyp, int, msgflg)
{
- return do_msgrcv(msqid, msgp, (ssize_t)msgsz, msgtyp, msgflg,
- compat_do_msg_fill);
+ return do_msgrcv(msqid, compat_ptr(msgp), (ssize_t)msgsz, msgtyp,
+ msgflg, compat_do_msg_fill);
}
-#endif
static inline int get_compat_msqid64(struct msqid64_ds *m64,
struct compat_msqid64_ds __user *up64)
@@ -508,28 +552,7 @@ long compat_sys_msgctl(int first, int second, void __user *uptr)
return err;
}
-#ifndef COMPAT_SHMLBA
-#define COMPAT_SHMLBA SHMLBA
-#endif
-
-#ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC
-long compat_sys_shmat(int first, int second, compat_uptr_t third, int version,
- void __user *uptr)
-{
- int err;
- unsigned long raddr;
- compat_ulong_t __user *uaddr;
-
- if (version == 1)
- return -EINVAL;
- err = do_shmat(first, uptr, second, &raddr, COMPAT_SHMLBA);
- if (err < 0)
- return err;
- uaddr = compat_ptr(third);
- return put_user(raddr, uaddr);
-}
-#else
-long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg)
+COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg)
{
unsigned long ret;
long err;
@@ -540,7 +563,6 @@ long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg)
force_successful_syscall_return();
return (long)ret;
}
-#endif
static inline int get_compat_shmid64_ds(struct shmid64_ds *s64,
struct compat_shmid64_ds __user *up64)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index e5c4f609f22c..e4e47f647446 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -330,8 +330,16 @@ static struct dentry *mqueue_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *data)
{
- if (!(flags & MS_KERNMOUNT))
- data = current->nsproxy->ipc_ns;
+ if (!(flags & MS_KERNMOUNT)) {
+ struct ipc_namespace *ns = current->nsproxy->ipc_ns;
+ /* Don't allow mounting unless the caller has CAP_SYS_ADMIN
+ * over the ipc namespace.
+ */
+ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ data = ns;
+ }
return mount_ns(fs_type, flags, data, mqueue_fill_super);
}
@@ -840,7 +848,8 @@ out_putfd:
fd = error;
}
mutex_unlock(&root->d_inode->i_mutex);
- mnt_drop_write(mnt);
+ if (!ro)
+ mnt_drop_write(mnt);
out_putname:
putname(name);
return fd;
diff --git a/ipc/msg.c b/ipc/msg.c
index 31cd1bf6af27..d0c6d967b390 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -66,6 +66,7 @@ struct msg_sender {
#define SEARCH_EQUAL 2
#define SEARCH_NOTEQUAL 3
#define SEARCH_LESSEQUAL 4
+#define SEARCH_NUMBER 5
#define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS])
@@ -237,14 +238,9 @@ static inline void ss_del(struct msg_sender *mss)
static void ss_wakeup(struct list_head *h, int kill)
{
- struct list_head *tmp;
+ struct msg_sender *mss, *t;
- tmp = h->next;
- while (tmp != h) {
- struct msg_sender *mss;
-
- mss = list_entry(tmp, struct msg_sender, list);
- tmp = tmp->next;
+ list_for_each_entry_safe(mss, t, h, list) {
if (kill)
mss->list.next = NULL;
wake_up_process(mss->tsk);
@@ -253,14 +249,9 @@ static void ss_wakeup(struct list_head *h, int kill)
static void expunge_all(struct msg_queue *msq, int res)
{
- struct list_head *tmp;
-
- tmp = msq->q_receivers.next;
- while (tmp != &msq->q_receivers) {
- struct msg_receiver *msr;
+ struct msg_receiver *msr, *t;
- msr = list_entry(tmp, struct msg_receiver, r_list);
- tmp = tmp->next;
+ list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
msr->r_msg = NULL;
wake_up_process(msr->r_tsk);
smp_mb();
@@ -278,7 +269,7 @@ static void expunge_all(struct msg_queue *msq, int res)
*/
static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
{
- struct list_head *tmp;
+ struct msg_msg *msg, *t;
struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
expunge_all(msq, -EIDRM);
@@ -286,11 +277,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
msg_rmid(ns, msq);
msg_unlock(msq);
- tmp = msq->q_messages.next;
- while (tmp != &msq->q_messages) {
- struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);
-
- tmp = tmp->next;
+ list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
atomic_dec(&ns->msg_hdrs);
free_msg(msg);
}
@@ -583,6 +570,7 @@ static int testmsg(struct msg_msg *msg, long type, int mode)
switch(mode)
{
case SEARCH_ANY:
+ case SEARCH_NUMBER:
return 1;
case SEARCH_LESSEQUAL:
if (msg->m_type <=type)
@@ -602,14 +590,9 @@ static int testmsg(struct msg_msg *msg, long type, int mode)
static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
{
- struct list_head *tmp;
+ struct msg_receiver *msr, *t;
- tmp = msq->q_receivers.next;
- while (tmp != &msq->q_receivers) {
- struct msg_receiver *msr;
-
- msr = list_entry(tmp, struct msg_receiver, r_list);
- tmp = tmp->next;
+ list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
!security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
msr->r_msgtype, msr->r_mode)) {
@@ -685,7 +668,12 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
goto out_unlock_free;
}
ss_add(msq, &s);
- ipc_rcu_getref(msq);
+
+ if (!ipc_rcu_getref(msq)) {
+ err = -EIDRM;
+ goto out_unlock_free;
+ }
+
msg_unlock(msq);
schedule();
@@ -738,6 +726,8 @@ SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
static inline int convert_mode(long *msgtyp, int msgflg)
{
+ if (msgflg & MSG_COPY)
+ return SEARCH_NUMBER;
/*
* find message of correct type.
* msgtyp = 0 => get first.
@@ -774,14 +764,10 @@ static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
* This function creates new kernel message structure, large enough to store
* bufsz message bytes.
*/
-static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz,
- int msgflg, long *msgtyp,
- unsigned long *copy_number)
+static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
{
struct msg_msg *copy;
- *copy_number = *msgtyp;
- *msgtyp = 0;
/*
* Create dummy message to copy real message to.
*/
@@ -797,9 +783,7 @@ static inline void free_copy(struct msg_msg *copy)
free_msg(copy);
}
#else
-static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz,
- int msgflg, long *msgtyp,
- unsigned long *copy_number)
+static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
{
return ERR_PTR(-ENOSYS);
}
@@ -809,6 +793,30 @@ static inline void free_copy(struct msg_msg *copy)
}
#endif
+static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode)
+{
+ struct msg_msg *msg;
+ long count = 0;
+
+ list_for_each_entry(msg, &msq->q_messages, m_list) {
+ if (testmsg(msg, *msgtyp, mode) &&
+ !security_msg_queue_msgrcv(msq, msg, current,
+ *msgtyp, mode)) {
+ if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) {
+ *msgtyp = msg->m_type - 1;
+ } else if (mode == SEARCH_NUMBER) {
+ if (*msgtyp == count)
+ return msg;
+ } else
+ return msg;
+ count++;
+ }
+ }
+
+ return ERR_PTR(-EAGAIN);
+}
+
+
long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
int msgflg,
long (*msg_handler)(void __user *, struct msg_msg *, size_t))
@@ -818,15 +826,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
int mode;
struct ipc_namespace *ns;
struct msg_msg *copy = NULL;
- unsigned long copy_number = 0;
ns = current->nsproxy->ipc_ns;
if (msqid < 0 || (long) bufsz < 0)
return -EINVAL;
if (msgflg & MSG_COPY) {
- copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax),
- msgflg, &msgtyp, &copy_number);
+ copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax));
if (IS_ERR(copy))
return PTR_ERR(copy);
}
@@ -840,44 +846,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
for (;;) {
struct msg_receiver msr_d;
- struct list_head *tmp;
- long msg_counter = 0;
msg = ERR_PTR(-EACCES);
if (ipcperms(ns, &msq->q_perm, S_IRUGO))
goto out_unlock;
- msg = ERR_PTR(-EAGAIN);
- tmp = msq->q_messages.next;
- while (tmp != &msq->q_messages) {
- struct msg_msg *walk_msg;
-
- walk_msg = list_entry(tmp, struct msg_msg, m_list);
- if (testmsg(walk_msg, msgtyp, mode) &&
- !security_msg_queue_msgrcv(msq, walk_msg, current,
- msgtyp, mode)) {
-
- msg = walk_msg;
- if (mode == SEARCH_LESSEQUAL &&
- walk_msg->m_type != 1) {
- msgtyp = walk_msg->m_type - 1;
- } else if (msgflg & MSG_COPY) {
- if (copy_number == msg_counter) {
- /*
- * Found requested message.
- * Copy it.
- */
- msg = copy_msg(msg, copy);
- if (IS_ERR(msg))
- goto out_unlock;
- break;
- }
- } else
- break;
- msg_counter++;
- }
- tmp = tmp->next;
- }
+ msg = find_msg(msq, &msgtyp, mode);
+
if (!IS_ERR(msg)) {
/*
* Found a suitable message.
@@ -891,8 +866,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
* If we are copying, then do not unlink message and do
* not update queue parameters.
*/
- if (msgflg & MSG_COPY)
+ if (msgflg & MSG_COPY) {
+ msg = copy_msg(msg, copy);
goto out_unlock;
+ }
list_del(&msg->m_list);
msq->q_qnum--;
msq->q_rtime = get_seconds();
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 5df8e4bf1db0..491e71f2a1b8 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -16,8 +16,8 @@
#include <linux/msg.h>
#include <linux/ipc_namespace.h>
#include <linux/utsname.h>
-#include <linux/proc_fs.h>
-#include <asm/uaccess.h>
+#include <linux/proc_ns.h>
+#include <linux/uaccess.h>
#include "util.h"
@@ -37,59 +37,70 @@ struct ipc_namespace init_ipc_ns = {
atomic_t nr_ipc_ns = ATOMIC_INIT(1);
struct msg_msgseg {
- struct msg_msgseg* next;
+ struct msg_msgseg *next;
/* the next part of the message follows immediately */
};
-#define DATALEN_MSG (PAGE_SIZE-sizeof(struct msg_msg))
-#define DATALEN_SEG (PAGE_SIZE-sizeof(struct msg_msgseg))
+#define DATALEN_MSG (int)(PAGE_SIZE-sizeof(struct msg_msg))
+#define DATALEN_SEG (int)(PAGE_SIZE-sizeof(struct msg_msgseg))
-struct msg_msg *load_msg(const void __user *src, int len)
+
+static struct msg_msg *alloc_msg(int len)
{
struct msg_msg *msg;
struct msg_msgseg **pseg;
- int err;
int alen;
- alen = len;
- if (alen > DATALEN_MSG)
- alen = DATALEN_MSG;
-
+ alen = min(len, DATALEN_MSG);
msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
if (msg == NULL)
- return ERR_PTR(-ENOMEM);
+ return NULL;
msg->next = NULL;
msg->security = NULL;
- if (copy_from_user(msg + 1, src, alen)) {
- err = -EFAULT;
- goto out_err;
- }
-
len -= alen;
- src = ((char __user *)src) + alen;
pseg = &msg->next;
while (len > 0) {
struct msg_msgseg *seg;
- alen = len;
- if (alen > DATALEN_SEG)
- alen = DATALEN_SEG;
- seg = kmalloc(sizeof(*seg) + alen,
- GFP_KERNEL);
- if (seg == NULL) {
- err = -ENOMEM;
+ alen = min(len, DATALEN_SEG);
+ seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL);
+ if (seg == NULL)
goto out_err;
- }
*pseg = seg;
seg->next = NULL;
- if (copy_from_user(seg + 1, src, alen)) {
- err = -EFAULT;
- goto out_err;
- }
pseg = &seg->next;
len -= alen;
- src = ((char __user *)src) + alen;
+ }
+
+ return msg;
+
+out_err:
+ free_msg(msg);
+ return NULL;
+}
+
+struct msg_msg *load_msg(const void __user *src, int len)
+{
+ struct msg_msg *msg;
+ struct msg_msgseg *seg;
+ int err = -EFAULT;
+ int alen;
+
+ msg = alloc_msg(len);
+ if (msg == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ alen = min(len, DATALEN_MSG);
+ if (copy_from_user(msg + 1, src, alen))
+ goto out_err;
+
+ for (seg = msg->next; seg != NULL; seg = seg->next) {
+ len -= alen;
+ src = (char __user *)src + alen;
+ alen = min(len, DATALEN_SEG);
+ if (copy_from_user(seg + 1, src, alen))
+ goto out_err;
}
err = security_msg_msg_alloc(msg);
@@ -113,23 +124,16 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst)
if (src->m_ts > dst->m_ts)
return ERR_PTR(-EINVAL);
- alen = len;
- if (alen > DATALEN_MSG)
- alen = DATALEN_MSG;
-
+ alen = min(len, DATALEN_MSG);
memcpy(dst + 1, src + 1, alen);
- len -= alen;
- dst_pseg = dst->next;
- src_pseg = src->next;
- while (len > 0) {
- alen = len;
- if (alen > DATALEN_SEG)
- alen = DATALEN_SEG;
- memcpy(dst_pseg + 1, src_pseg + 1, alen);
- dst_pseg = dst_pseg->next;
+ for (dst_pseg = dst->next, src_pseg = src->next;
+ src_pseg != NULL;
+ dst_pseg = dst_pseg->next, src_pseg = src_pseg->next) {
+
len -= alen;
- src_pseg = src_pseg->next;
+ alen = min(len, DATALEN_SEG);
+ memcpy(dst_pseg + 1, src_pseg + 1, alen);
}
dst->m_type = src->m_type;
@@ -148,24 +152,16 @@ int store_msg(void __user *dest, struct msg_msg *msg, int len)
int alen;
struct msg_msgseg *seg;
- alen = len;
- if (alen > DATALEN_MSG)
- alen = DATALEN_MSG;
+ alen = min(len, DATALEN_MSG);
if (copy_to_user(dest, msg + 1, alen))
return -1;
- len -= alen;
- dest = ((char __user *)dest) + alen;
- seg = msg->next;
- while (len > 0) {
- alen = len;
- if (alen > DATALEN_SEG)
- alen = DATALEN_SEG;
+ for (seg = msg->next; seg != NULL; seg = seg->next) {
+ len -= alen;
+ dest = (char __user *)dest + alen;
+ alen = min(len, DATALEN_SEG);
if (copy_to_user(dest, seg + 1, alen))
return -1;
- len -= alen;
- dest = ((char __user *)dest) + alen;
- seg = seg->next;
}
return 0;
}
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 7c1fa451b0b0..7ee61bf44933 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -12,7 +12,7 @@
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/user_namespace.h>
-#include <linux/proc_fs.h>
+#include <linux/proc_ns.h>
#include "util.h"
diff --git a/ipc/sem.c b/ipc/sem.c
index 58d31f1c1eb5..70480a3aa698 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -94,12 +94,12 @@
struct sem {
int semval; /* current value */
int sempid; /* pid of last operation */
+ spinlock_t lock; /* spinlock for fine-grained semtimedop */
struct list_head sem_pending; /* pending single-sop operations */
};
/* One queue for each sleeping process in the system. */
struct sem_queue {
- struct list_head simple_list; /* queue of pending operations */
struct list_head list; /* queue of pending operations */
struct task_struct *sleeper; /* this process */
struct sem_undo *undo; /* undo structure */
@@ -138,7 +138,6 @@ struct sem_undo_list {
#define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS])
-#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
#define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid)
static int newary(struct ipc_namespace *, struct ipc_params *);
@@ -191,47 +190,141 @@ void __init sem_init (void)
}
/*
+ * If the request contains only one semaphore operation, and there are
+ * no complex transactions pending, lock only the semaphore involved.
+ * Otherwise, lock the entire semaphore array, since we either have
+ * multiple semaphores in our own semops, or we need to look at
+ * semaphores from other pending complex operations.
+ *
+ * Carefully guard against sma->complex_count changing between zero
+ * and non-zero while we are spinning for the lock. The value of
+ * sma->complex_count cannot change while we are holding the lock,
+ * so sem_unlock should be fine.
+ *
+ * The global lock path checks that all the local locks have been released,
+ * checking each local lock once. This means that the local lock paths
+ * cannot start their critical sections while the global lock is held.
+ */
+static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
+ int nsops)
+{
+ int locknum;
+ again:
+ if (nsops == 1 && !sma->complex_count) {
+ struct sem *sem = sma->sem_base + sops->sem_num;
+
+ /* Lock just the semaphore we are interested in. */
+ spin_lock(&sem->lock);
+
+ /*
+ * If sma->complex_count was set while we were spinning,
+ * we may need to look at things we did not lock here.
+ */
+ if (unlikely(sma->complex_count)) {
+ spin_unlock(&sem->lock);
+ goto lock_array;
+ }
+
+ /*
+ * Another process is holding the global lock on the
+ * sem_array; we cannot enter our critical section,
+ * but have to wait for the global lock to be released.
+ */
+ if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
+ spin_unlock(&sem->lock);
+ spin_unlock_wait(&sma->sem_perm.lock);
+ goto again;
+ }
+
+ locknum = sops->sem_num;
+ } else {
+ int i;
+ /*
+ * Lock the semaphore array, and wait for all of the
+ * individual semaphore locks to go away. The code
+ * above ensures no new single-lock holders will enter
+ * their critical section while the array lock is held.
+ */
+ lock_array:
+ spin_lock(&sma->sem_perm.lock);
+ for (i = 0; i < sma->sem_nsems; i++) {
+ struct sem *sem = sma->sem_base + i;
+ spin_unlock_wait(&sem->lock);
+ }
+ locknum = -1;
+ }
+ return locknum;
+}
+
+static inline void sem_unlock(struct sem_array *sma, int locknum)
+{
+ if (locknum == -1) {
+ spin_unlock(&sma->sem_perm.lock);
+ } else {
+ struct sem *sem = sma->sem_base + locknum;
+ spin_unlock(&sem->lock);
+ }
+}
+
+/*
* sem_lock_(check_) routines are called in the paths where the rw_mutex
* is not held.
+ *
+ * The caller holds the RCU read lock.
*/
-static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id)
+static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns,
+ int id, struct sembuf *sops, int nsops, int *locknum)
{
- struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id);
+ struct kern_ipc_perm *ipcp;
+ struct sem_array *sma;
+ ipcp = ipc_obtain_object(&sem_ids(ns), id);
if (IS_ERR(ipcp))
- return (struct sem_array *)ipcp;
+ return ERR_CAST(ipcp);
- return container_of(ipcp, struct sem_array, sem_perm);
+ sma = container_of(ipcp, struct sem_array, sem_perm);
+ *locknum = sem_lock(sma, sops, nsops);
+
+ /* ipc_rmid() may have already freed the ID while sem_lock
+ * was spinning: verify that the structure is still valid
+ */
+ if (!ipcp->deleted)
+ return container_of(ipcp, struct sem_array, sem_perm);
+
+ sem_unlock(sma, *locknum);
+ return ERR_PTR(-EINVAL);
}
-static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns,
- int id)
+static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id)
{
- struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id);
+ struct kern_ipc_perm *ipcp = ipc_obtain_object(&sem_ids(ns), id);
if (IS_ERR(ipcp))
- return (struct sem_array *)ipcp;
+ return ERR_CAST(ipcp);
return container_of(ipcp, struct sem_array, sem_perm);
}
-static inline void sem_lock_and_putref(struct sem_array *sma)
+static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns,
+ int id)
{
- ipc_lock_by_ptr(&sma->sem_perm);
- ipc_rcu_putref(sma);
+ struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id);
+
+ if (IS_ERR(ipcp))
+ return ERR_CAST(ipcp);
+
+ return container_of(ipcp, struct sem_array, sem_perm);
}
-static inline void sem_getref_and_unlock(struct sem_array *sma)
+static inline void sem_lock_and_putref(struct sem_array *sma)
{
- ipc_rcu_getref(sma);
- ipc_unlock(&(sma)->sem_perm);
+ sem_lock(sma, NULL, -1);
+ ipc_rcu_putref(sma);
}
static inline void sem_putref(struct sem_array *sma)
{
- ipc_lock_by_ptr(&sma->sem_perm);
ipc_rcu_putref(sma);
- ipc_unlock(&(sma)->sem_perm);
}
static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -324,15 +417,18 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
sma->sem_base = (struct sem *) &sma[1];
- for (i = 0; i < nsems; i++)
+ for (i = 0; i < nsems; i++) {
INIT_LIST_HEAD(&sma->sem_base[i].sem_pending);
+ spin_lock_init(&sma->sem_base[i].lock);
+ }
sma->complex_count = 0;
INIT_LIST_HEAD(&sma->sem_pending);
INIT_LIST_HEAD(&sma->list_id);
sma->sem_nsems = nsems;
sma->sem_ctime = get_seconds();
- sem_unlock(sma);
+ sem_unlock(sma, -1);
+ rcu_read_unlock();
return sma->sem_perm.id;
}
@@ -471,7 +567,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt,
q->status = IN_WAKEUP;
q->pid = error;
- list_add_tail(&q->simple_list, pt);
+ list_add_tail(&q->list, pt);
}
/**
@@ -489,7 +585,7 @@ static void wake_up_sem_queue_do(struct list_head *pt)
int did_something;
did_something = !list_empty(pt);
- list_for_each_entry_safe(q, t, pt, simple_list) {
+ list_for_each_entry_safe(q, t, pt, list) {
wake_up_process(q->sleeper);
/* q can disappear immediately after writing q->status. */
smp_wmb();
@@ -502,9 +598,7 @@ static void wake_up_sem_queue_do(struct list_head *pt)
static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
{
list_del(&q->list);
- if (q->nsops == 1)
- list_del(&q->simple_list);
- else
+ if (q->nsops > 1)
sma->complex_count--;
}
@@ -557,9 +651,9 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q)
}
/*
* semval is 0. Check if there are wait-for-zero semops.
- * They must be the first entries in the per-semaphore simple queue
+ * They must be the first entries in the per-semaphore queue
*/
- h = list_first_entry(&curr->sem_pending, struct sem_queue, simple_list);
+ h = list_first_entry(&curr->sem_pending, struct sem_queue, list);
BUG_ON(h->nsops != 1);
BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num);
@@ -579,8 +673,9 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q)
* @pt: list head for the tasks that must be woken up.
*
* update_queue must be called after a semaphore in a semaphore array
- * was modified. If multiple semaphore were modified, then @semnum
- * must be set to -1.
+ * was modified. If multiple semaphores were modified, update_queue must
+ * be called with semnum = -1, as well as with the number of each modified
+ * semaphore.
* The tasks that must be woken up are added to @pt. The return code
* is stored in q->pid.
* The function return 1 if at least one semop was completed successfully.
@@ -590,30 +685,19 @@ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
struct sem_queue *q;
struct list_head *walk;
struct list_head *pending_list;
- int offset;
int semop_completed = 0;
- /* if there are complex operations around, then knowing the semaphore
- * that was modified doesn't help us. Assume that multiple semaphores
- * were modified.
- */
- if (sma->complex_count)
- semnum = -1;
-
- if (semnum == -1) {
+ if (semnum == -1)
pending_list = &sma->sem_pending;
- offset = offsetof(struct sem_queue, list);
- } else {
+ else
pending_list = &sma->sem_base[semnum].sem_pending;
- offset = offsetof(struct sem_queue, simple_list);
- }
again:
walk = pending_list->next;
while (walk != pending_list) {
int error, restart;
- q = (struct sem_queue *)((char *)walk - offset);
+ q = container_of(walk, struct sem_queue, list);
walk = walk->next;
/* If we are scanning the single sop, per-semaphore list of
@@ -668,19 +752,45 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
int otime, struct list_head *pt)
{
int i;
+ int progress;
- if (sma->complex_count || sops == NULL) {
- if (update_queue(sma, -1, pt))
+ progress = 1;
+retry_global:
+ if (sma->complex_count) {
+ if (update_queue(sma, -1, pt)) {
+ progress = 1;
otime = 1;
+ sops = NULL;
+ }
+ }
+ if (!progress)
goto done;
+
+ if (!sops) {
+ /* No semops; something special is going on. */
+ for (i = 0; i < sma->sem_nsems; i++) {
+ if (update_queue(sma, i, pt)) {
+ otime = 1;
+ progress = 1;
+ }
+ }
+ goto done_checkretry;
}
+ /* Check the semaphores that were modified. */
for (i = 0; i < nsops; i++) {
if (sops[i].sem_op > 0 ||
(sops[i].sem_op < 0 &&
sma->sem_base[sops[i].sem_num].semval == 0))
- if (update_queue(sma, sops[i].sem_num, pt))
+ if (update_queue(sma, sops[i].sem_num, pt)) {
otime = 1;
+ progress = 1;
+ }
+ }
+done_checkretry:
+ if (progress) {
+ progress = 0;
+ goto retry_global;
}
done:
if (otime)
@@ -703,6 +813,13 @@ static int count_semncnt (struct sem_array * sma, ushort semnum)
struct sem_queue * q;
semncnt = 0;
+ list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) {
+ struct sembuf * sops = q->sops;
+ BUG_ON(sops->sem_num != semnum);
+ if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT))
+ semncnt++;
+ }
+
list_for_each_entry(q, &sma->sem_pending, list) {
struct sembuf * sops = q->sops;
int nsops = q->nsops;
@@ -722,6 +839,13 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
struct sem_queue * q;
semzcnt = 0;
+ list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) {
+ struct sembuf * sops = q->sops;
+ BUG_ON(sops->sem_num != semnum);
+ if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT))
+ semzcnt++;
+ }
+
list_for_each_entry(q, &sma->sem_pending, list) {
struct sembuf * sops = q->sops;
int nsops = q->nsops;
@@ -745,6 +869,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
struct sem_queue *q, *tq;
struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
struct list_head tasks;
+ int i;
/* Free the existing undo structures for this semaphore set. */
assert_spin_locked(&sma->sem_perm.lock);
@@ -763,10 +888,18 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
unlink_queue(sma, q);
wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
}
+ for (i = 0; i < sma->sem_nsems; i++) {
+ struct sem *sem = sma->sem_base + i;
+ list_for_each_entry_safe(q, tq, &sem->sem_pending, list) {
+ unlink_queue(sma, q);
+ wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
+ }
+ }
/* Remove the semaphore set from the IDR */
sem_rmid(ns, sma);
- sem_unlock(sma);
+ sem_unlock(sma, -1);
+ rcu_read_unlock();
wake_up_sem_queue_do(&tasks);
ns->used_sems -= sma->sem_nsems;
@@ -799,7 +932,7 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in,
}
static int semctl_nolock(struct ipc_namespace *ns, int semid,
- int cmd, int version, union semun arg)
+ int cmd, int version, void __user *p)
{
int err;
struct sem_array *sma;
@@ -834,7 +967,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
}
max_id = ipc_get_maxid(&sem_ids(ns));
up_read(&sem_ids(ns).rw_mutex);
- if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo)))
+ if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
return -EFAULT;
return (max_id < 0) ? 0: max_id;
}
@@ -842,18 +975,24 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
case SEM_STAT:
{
struct semid64_ds tbuf;
- int id;
+ int id = 0;
+
+ memset(&tbuf, 0, sizeof(tbuf));
+ rcu_read_lock();
if (cmd == SEM_STAT) {
- sma = sem_lock(ns, semid);
- if (IS_ERR(sma))
- return PTR_ERR(sma);
+ sma = sem_obtain_object(ns, semid);
+ if (IS_ERR(sma)) {
+ err = PTR_ERR(sma);
+ goto out_unlock;
+ }
id = sma->sem_perm.id;
} else {
- sma = sem_lock_check(ns, semid);
- if (IS_ERR(sma))
- return PTR_ERR(sma);
- id = 0;
+ sma = sem_obtain_object_check(ns, semid);
+ if (IS_ERR(sma)) {
+ err = PTR_ERR(sma);
+ goto out_unlock;
+ }
}
err = -EACCES;
@@ -864,14 +1003,12 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
if (err)
goto out_unlock;
- memset(&tbuf, 0, sizeof(tbuf));
-
kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
tbuf.sem_otime = sma->sem_otime;
tbuf.sem_ctime = sma->sem_ctime;
tbuf.sem_nsems = sma->sem_nsems;
- sem_unlock(sma);
- if (copy_semid_to_user (arg.buf, &tbuf, version))
+ rcu_read_unlock();
+ if (copy_semid_to_user(p, &tbuf, version))
return -EFAULT;
return id;
}
@@ -879,64 +1016,140 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
return -EINVAL;
}
out_unlock:
- sem_unlock(sma);
+ rcu_read_unlock();
return err;
}
-static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
- int cmd, int version, union semun arg)
+static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
+ unsigned long arg)
{
+ struct sem_undo *un;
struct sem_array *sma;
struct sem* curr;
int err;
+ struct list_head tasks;
+ int val;
+#if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)
+ /* big-endian 64bit */
+ val = arg >> 32;
+#else
+ /* 32bit or little-endian 64bit */
+ val = arg;
+#endif
+
+ if (val > SEMVMX || val < 0)
+ return -ERANGE;
+
+ INIT_LIST_HEAD(&tasks);
+
+ rcu_read_lock();
+ sma = sem_obtain_object_check(ns, semid);
+ if (IS_ERR(sma)) {
+ rcu_read_unlock();
+ return PTR_ERR(sma);
+ }
+
+ if (semnum < 0 || semnum >= sma->sem_nsems) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+
+
+ if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) {
+ rcu_read_unlock();
+ return -EACCES;
+ }
+
+ err = security_sem_semctl(sma, SETVAL);
+ if (err) {
+ rcu_read_unlock();
+ return -EACCES;
+ }
+
+ sem_lock(sma, NULL, -1);
+
+ curr = &sma->sem_base[semnum];
+
+ assert_spin_locked(&sma->sem_perm.lock);
+ list_for_each_entry(un, &sma->list_id, list_id)
+ un->semadj[semnum] = 0;
+
+ curr->semval = val;
+ curr->sempid = task_tgid_vnr(current);
+ sma->sem_ctime = get_seconds();
+ /* maybe some queued-up processes were waiting for this */
+ do_smart_update(sma, NULL, 0, 0, &tasks);
+ sem_unlock(sma, -1);
+ rcu_read_unlock();
+ wake_up_sem_queue_do(&tasks);
+ return 0;
+}
+
+static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
+ int cmd, void __user *p)
+{
+ struct sem_array *sma;
+ struct sem* curr;
+ int err, nsems;
ushort fast_sem_io[SEMMSL_FAST];
ushort* sem_io = fast_sem_io;
- int nsems;
struct list_head tasks;
- sma = sem_lock_check(ns, semid);
- if (IS_ERR(sma))
+ INIT_LIST_HEAD(&tasks);
+
+ rcu_read_lock();
+ sma = sem_obtain_object_check(ns, semid);
+ if (IS_ERR(sma)) {
+ rcu_read_unlock();
return PTR_ERR(sma);
+ }
- INIT_LIST_HEAD(&tasks);
nsems = sma->sem_nsems;
err = -EACCES;
- if (ipcperms(ns, &sma->sem_perm,
- (cmd == SETVAL || cmd == SETALL) ? S_IWUGO : S_IRUGO))
- goto out_unlock;
+ if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO))
+ goto out_rcu_wakeup;
err = security_sem_semctl(sma, cmd);
if (err)
- goto out_unlock;
+ goto out_rcu_wakeup;
err = -EACCES;
switch (cmd) {
case GETALL:
{
- ushort __user *array = arg.array;
+ ushort __user *array = p;
int i;
+ sem_lock(sma, NULL, -1);
if(nsems > SEMMSL_FAST) {
- sem_getref_and_unlock(sma);
-
+ if (!ipc_rcu_getref(sma)) {
+ sem_unlock(sma, -1);
+ rcu_read_unlock();
+ err = -EIDRM;
+ goto out_free;
+ }
+ sem_unlock(sma, -1);
+ rcu_read_unlock();
sem_io = ipc_alloc(sizeof(ushort)*nsems);
if(sem_io == NULL) {
sem_putref(sma);
return -ENOMEM;
}
+ rcu_read_lock();
sem_lock_and_putref(sma);
if (sma->sem_perm.deleted) {
- sem_unlock(sma);
+ sem_unlock(sma, -1);
+ rcu_read_unlock();
err = -EIDRM;
goto out_free;
}
}
-
for (i = 0; i < sma->sem_nsems; i++)
sem_io[i] = sma->sem_base[i].semval;
- sem_unlock(sma);
+ sem_unlock(sma, -1);
+ rcu_read_unlock();
err = 0;
if(copy_to_user(array, sem_io, nsems*sizeof(ushort)))
err = -EFAULT;
@@ -947,7 +1160,11 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
int i;
struct sem_undo *un;
- sem_getref_and_unlock(sma);
+ if (!ipc_rcu_getref(sma)) {
+ rcu_read_unlock();
+ return -EIDRM;
+ }
+ rcu_read_unlock();
if(nsems > SEMMSL_FAST) {
sem_io = ipc_alloc(sizeof(ushort)*nsems);
@@ -957,7 +1174,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
}
}
- if (copy_from_user (sem_io, arg.array, nsems*sizeof(ushort))) {
+ if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) {
sem_putref(sma);
err = -EFAULT;
goto out_free;
@@ -970,9 +1187,11 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
goto out_free;
}
}
+ rcu_read_lock();
sem_lock_and_putref(sma);
if (sma->sem_perm.deleted) {
- sem_unlock(sma);
+ sem_unlock(sma, -1);
+ rcu_read_unlock();
err = -EIDRM;
goto out_free;
}
@@ -991,12 +1210,13 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
err = 0;
goto out_unlock;
}
- /* GETVAL, GETPID, GETNCTN, GETZCNT, SETVAL: fall-through */
+ /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */
}
err = -EINVAL;
- if(semnum < 0 || semnum >= nsems)
- goto out_unlock;
+ if (semnum < 0 || semnum >= nsems)
+ goto out_rcu_wakeup;
+ sem_lock(sma, NULL, -1);
curr = &sma->sem_base[semnum];
switch (cmd) {
@@ -1012,32 +1232,13 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
case GETZCNT:
err = count_semzcnt(sma,semnum);
goto out_unlock;
- case SETVAL:
- {
- int val = arg.val;
- struct sem_undo *un;
-
- err = -ERANGE;
- if (val > SEMVMX || val < 0)
- goto out_unlock;
-
- assert_spin_locked(&sma->sem_perm.lock);
- list_for_each_entry(un, &sma->list_id, list_id)
- un->semadj[semnum] = 0;
-
- curr->semval = val;
- curr->sempid = task_tgid_vnr(current);
- sma->sem_ctime = get_seconds();
- /* maybe some queued-up processes were waiting for this */
- do_smart_update(sma, NULL, 0, 0, &tasks);
- err = 0;
- goto out_unlock;
- }
}
+
out_unlock:
- sem_unlock(sma);
+ sem_unlock(sma, -1);
+out_rcu_wakeup:
+ rcu_read_unlock();
wake_up_sem_queue_do(&tasks);
-
out_free:
if(sem_io != fast_sem_io)
ipc_free(sem_io, sizeof(ushort)*nsems);
@@ -1076,7 +1277,7 @@ copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
* NOTE: no locks must be held, the rw_mutex is taken inside this function.
*/
static int semctl_down(struct ipc_namespace *ns, int semid,
- int cmd, int version, union semun arg)
+ int cmd, int version, void __user *p)
{
struct sem_array *sma;
int err;
@@ -1084,47 +1285,54 @@ static int semctl_down(struct ipc_namespace *ns, int semid,
struct kern_ipc_perm *ipcp;
if(cmd == IPC_SET) {
- if (copy_semid_from_user(&semid64, arg.buf, version))
+ if (copy_semid_from_user(&semid64, p, version))
return -EFAULT;
}
- ipcp = ipcctl_pre_down(ns, &sem_ids(ns), semid, cmd,
- &semid64.sem_perm, 0);
+ ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd,
+ &semid64.sem_perm, 0);
if (IS_ERR(ipcp))
return PTR_ERR(ipcp);
sma = container_of(ipcp, struct sem_array, sem_perm);
err = security_sem_semctl(sma, cmd);
- if (err)
- goto out_unlock;
+ if (err) {
+ rcu_read_unlock();
+ goto out_up;
+ }
switch(cmd){
case IPC_RMID:
+ sem_lock(sma, NULL, -1);
freeary(ns, ipcp);
goto out_up;
case IPC_SET:
+ sem_lock(sma, NULL, -1);
err = ipc_update_perm(&semid64.sem_perm, ipcp);
if (err)
goto out_unlock;
sma->sem_ctime = get_seconds();
break;
default:
+ rcu_read_unlock();
err = -EINVAL;
+ goto out_up;
}
out_unlock:
- sem_unlock(sma);
+ sem_unlock(sma, -1);
+ rcu_read_unlock();
out_up:
up_write(&sem_ids(ns).rw_mutex);
return err;
}
-SYSCALL_DEFINE(semctl)(int semid, int semnum, int cmd, union semun arg)
+SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
{
- int err = -EINVAL;
int version;
struct ipc_namespace *ns;
+ void __user *p = (void __user *)arg;
if (semid < 0)
return -EINVAL;
@@ -1137,32 +1345,23 @@ SYSCALL_DEFINE(semctl)(int semid, int semnum, int cmd, union semun arg)
case SEM_INFO:
case IPC_STAT:
case SEM_STAT:
- err = semctl_nolock(ns, semid, cmd, version, arg);
- return err;
+ return semctl_nolock(ns, semid, cmd, version, p);
case GETALL:
case GETVAL:
case GETPID:
case GETNCNT:
case GETZCNT:
- case SETVAL:
case SETALL:
- err = semctl_main(ns,semid,semnum,cmd,version,arg);
- return err;
+ return semctl_main(ns, semid, semnum, cmd, p);
+ case SETVAL:
+ return semctl_setval(ns, semid, semnum, arg);
case IPC_RMID:
case IPC_SET:
- err = semctl_down(ns, semid, cmd, version, arg);
- return err;
+ return semctl_down(ns, semid, cmd, version, p);
default:
return -EINVAL;
}
}
-#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
-asmlinkage long SyS_semctl(int semid, int semnum, int cmd, union semun arg)
-{
- return SYSC_semctl((int) semid, (int) semnum, (int) cmd, arg);
-}
-SYSCALL_ALIAS(sys_semctl, SyS_semctl);
-#endif
/* If the task doesn't already have a undo_list, then allocate one
* here. We guarantee there is only one thread using this undo list,
@@ -1235,8 +1434,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
struct sem_array *sma;
struct sem_undo_list *ulp;
struct sem_undo *un, *new;
- int nsems;
- int error;
+ int nsems, error;
error = get_undo_list(&ulp);
if (error)
@@ -1248,16 +1446,22 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
spin_unlock(&ulp->lock);
if (likely(un!=NULL))
goto out;
- rcu_read_unlock();
/* no undo structure around - allocate one. */
/* step 1: figure out the size of the semaphore array */
- sma = sem_lock_check(ns, semid);
- if (IS_ERR(sma))
+ sma = sem_obtain_object_check(ns, semid);
+ if (IS_ERR(sma)) {
+ rcu_read_unlock();
return ERR_CAST(sma);
+ }
nsems = sma->sem_nsems;
- sem_getref_and_unlock(sma);
+ if (!ipc_rcu_getref(sma)) {
+ rcu_read_unlock();
+ un = ERR_PTR(-EIDRM);
+ goto out;
+ }
+ rcu_read_unlock();
/* step 2: allocate new undo structure */
new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
@@ -1267,9 +1471,11 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
}
/* step 3: Acquire the lock on semaphore array */
+ rcu_read_lock();
sem_lock_and_putref(sma);
if (sma->sem_perm.deleted) {
- sem_unlock(sma);
+ sem_unlock(sma, -1);
+ rcu_read_unlock();
kfree(new);
un = ERR_PTR(-EIDRM);
goto out;
@@ -1296,8 +1502,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
success:
spin_unlock(&ulp->lock);
- rcu_read_lock();
- sem_unlock(sma);
+ sem_unlock(sma, -1);
out:
return un;
}
@@ -1337,7 +1542,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
struct sembuf fast_sops[SEMOPM_FAST];
struct sembuf* sops = fast_sops, *sop;
struct sem_undo *un;
- int undos = 0, alter = 0, max;
+ int undos = 0, alter = 0, max, locknum;
struct sem_queue queue;
unsigned long jiffies_left = 0;
struct ipc_namespace *ns;
@@ -1381,60 +1586,49 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
alter = 1;
}
+ INIT_LIST_HEAD(&tasks);
+
if (undos) {
+ /* On success, find_alloc_undo takes the rcu_read_lock */
un = find_alloc_undo(ns, semid);
if (IS_ERR(un)) {
error = PTR_ERR(un);
goto out_free;
}
- } else
+ } else {
un = NULL;
+ rcu_read_lock();
+ }
- INIT_LIST_HEAD(&tasks);
-
- sma = sem_lock_check(ns, semid);
+ sma = sem_obtain_object_check(ns, semid);
if (IS_ERR(sma)) {
- if (un)
- rcu_read_unlock();
+ rcu_read_unlock();
error = PTR_ERR(sma);
goto out_free;
}
- /*
- * semid identifiers are not unique - find_alloc_undo may have
- * allocated an undo structure, it was invalidated by an RMID
- * and now a new array with received the same id. Check and fail.
- * This case can be detected checking un->semid. The existence of
- * "un" itself is guaranteed by rcu.
- */
- error = -EIDRM;
- if (un) {
- if (un->semid == -1) {
- rcu_read_unlock();
- goto out_unlock_free;
- } else {
- /*
- * rcu lock can be released, "un" cannot disappear:
- * - sem_lock is acquired, thus IPC_RMID is
- * impossible.
- * - exit_sem is impossible, it always operates on
- * current (or a dead task).
- */
-
- rcu_read_unlock();
- }
- }
-
error = -EFBIG;
if (max >= sma->sem_nsems)
- goto out_unlock_free;
+ goto out_rcu_wakeup;
error = -EACCES;
if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO))
- goto out_unlock_free;
+ goto out_rcu_wakeup;
error = security_sem_semop(sma, sops, nsops, alter);
if (error)
+ goto out_rcu_wakeup;
+
+ /*
+ * semid identifiers are not unique - find_alloc_undo may have
+ * allocated an undo structure, it was invalidated by an RMID
+ * and now a new array with received the same id. Check and fail.
+ * This case can be detected checking un->semid. The existence of
+ * "un" itself is guaranteed by rcu.
+ */
+ error = -EIDRM;
+ locknum = sem_lock(sma, sops, nsops);
+ if (un && un->semid == -1)
goto out_unlock_free;
error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
@@ -1454,21 +1648,20 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
queue.undo = un;
queue.pid = task_tgid_vnr(current);
queue.alter = alter;
- if (alter)
- list_add_tail(&queue.list, &sma->sem_pending);
- else
- list_add(&queue.list, &sma->sem_pending);
if (nsops == 1) {
struct sem *curr;
curr = &sma->sem_base[sops->sem_num];
if (alter)
- list_add_tail(&queue.simple_list, &curr->sem_pending);
+ list_add_tail(&queue.list, &curr->sem_pending);
else
- list_add(&queue.simple_list, &curr->sem_pending);
+ list_add(&queue.list, &curr->sem_pending);
} else {
- INIT_LIST_HEAD(&queue.simple_list);
+ if (alter)
+ list_add_tail(&queue.list, &sma->sem_pending);
+ else
+ list_add(&queue.list, &sma->sem_pending);
sma->complex_count++;
}
@@ -1477,7 +1670,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
sleep_again:
current->state = TASK_INTERRUPTIBLE;
- sem_unlock(sma);
+ sem_unlock(sma, locknum);
+ rcu_read_unlock();
if (timeout)
jiffies_left = schedule_timeout(jiffies_left);
@@ -1499,7 +1693,8 @@ sleep_again:
goto out_free;
}
- sma = sem_lock(ns, semid);
+ rcu_read_lock();
+ sma = sem_obtain_lock(ns, semid, sops, nsops, &locknum);
/*
* Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing.
@@ -1510,6 +1705,7 @@ sleep_again:
* Array removed? If yes, leave without sem_unlock().
*/
if (IS_ERR(sma)) {
+ rcu_read_unlock();
goto out_free;
}
@@ -1538,8 +1734,9 @@ sleep_again:
unlink_queue(sma, &queue);
out_unlock_free:
- sem_unlock(sma);
-
+ sem_unlock(sma, locknum);
+out_rcu_wakeup:
+ rcu_read_unlock();
wake_up_sem_queue_do(&tasks);
out_free:
if(sops != fast_sops)
@@ -1602,8 +1799,7 @@ void exit_sem(struct task_struct *tsk)
struct sem_array *sma;
struct sem_undo *un;
struct list_head tasks;
- int semid;
- int i;
+ int semid, i;
rcu_read_lock();
un = list_entry_rcu(ulp->list_proc.next,
@@ -1612,23 +1808,27 @@ void exit_sem(struct task_struct *tsk)
semid = -1;
else
semid = un->semid;
- rcu_read_unlock();
- if (semid == -1)
+ if (semid == -1) {
+ rcu_read_unlock();
break;
+ }
- sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid);
-
+ sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, un->semid);
/* exit_sem raced with IPC_RMID, nothing to do */
- if (IS_ERR(sma))
+ if (IS_ERR(sma)) {
+ rcu_read_unlock();
continue;
+ }
+ sem_lock(sma, NULL, -1);
un = __lookup_undo(ulp, semid);
if (un == NULL) {
/* exit_sem raced with IPC_RMID+semget() that created
* exactly the same semid. Nothing to do.
*/
- sem_unlock(sma);
+ sem_unlock(sma, -1);
+ rcu_read_unlock();
continue;
}
@@ -1668,7 +1868,8 @@ void exit_sem(struct task_struct *tsk)
/* maybe some queued-up processes were waiting for this */
INIT_LIST_HEAD(&tasks);
do_smart_update(sma, NULL, 0, 1, &tasks);
- sem_unlock(sma);
+ sem_unlock(sma, -1);
+ rcu_read_unlock();
wake_up_sem_queue_do(&tasks);
kfree_rcu(un, rcu);
diff --git a/ipc/shm.c b/ipc/shm.c
index cb858df061d3..7e199fa1960f 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -462,7 +462,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
size_t size = params->u.size;
int error;
struct shmid_kernel *shp;
- int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
+ size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
struct file * file;
char name[13];
int id;
@@ -491,10 +491,20 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
sprintf (name, "SYSV%08x", key);
if (shmflg & SHM_HUGETLB) {
+ struct hstate *hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT)
+ & SHM_HUGE_MASK);
+ size_t hugesize;
+
+ if (!hs) {
+ error = -EINVAL;
+ goto no_file;
+ }
+ hugesize = ALIGN(size, huge_page_size(hs));
+
/* hugetlb_file_setup applies strict accounting */
if (shmflg & SHM_NORESERVE)
acctflag = VM_NORESERVE;
- file = hugetlb_file_setup(name, 0, size, acctflag,
+ file = hugetlb_file_setup(name, hugesize, acctflag,
&shp->mlock_user, HUGETLB_SHMFS_INODE,
(shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
} else {
diff --git a/ipc/syscall.c b/ipc/syscall.c
index 0d1e32ce048e..52429489cde0 100644
--- a/ipc/syscall.c
+++ b/ipc/syscall.c
@@ -33,12 +33,12 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second,
case SEMGET:
return sys_semget(first, second, third);
case SEMCTL: {
- union semun fourth;
+ unsigned long arg;
if (!ptr)
return -EINVAL;
- if (get_user(fourth.__pad, (void __user * __user *) ptr))
+ if (get_user(arg, (unsigned long __user *) ptr))
return -EFAULT;
- return sys_semctl(first, second, third, fourth);
+ return sys_semctl(first, second, third, arg);
}
case MSGSND:
diff --git a/ipc/util.c b/ipc/util.c
index 464a8abd779f..809ec5ec8122 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -23,6 +23,7 @@
#include <linux/msg.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
+#include <linux/notifier.h>
#include <linux/capability.h>
#include <linux/highuid.h>
#include <linux/security.h>
@@ -47,19 +48,16 @@ struct ipc_proc_iface {
int (*show)(struct seq_file *, void *);
};
-#ifdef CONFIG_MEMORY_HOTPLUG
-
static void ipc_memory_notifier(struct work_struct *work)
{
ipcns_notify(IPCNS_MEMCHANGED);
}
-static DECLARE_WORK(ipc_memory_wq, ipc_memory_notifier);
-
-
static int ipc_memory_callback(struct notifier_block *self,
unsigned long action, void *arg)
{
+ static DECLARE_WORK(ipc_memory_wq, ipc_memory_notifier);
+
switch (action) {
case MEM_ONLINE: /* memory successfully brought online */
case MEM_OFFLINE: /* or offline: it's time to recompute msgmni */
@@ -85,7 +83,10 @@ static int ipc_memory_callback(struct notifier_block *self,
return NOTIFY_OK;
}
-#endif /* CONFIG_MEMORY_HOTPLUG */
+static struct notifier_block ipc_memory_nb = {
+ .notifier_call = ipc_memory_callback,
+ .priority = IPC_CALLBACK_PRI,
+};
/**
* ipc_init - initialise IPC subsystem
@@ -102,7 +103,7 @@ static int __init ipc_init(void)
sem_init();
msg_init();
shm_init();
- hotplug_memory_notifier(ipc_memory_callback, IPC_CALLBACK_PRI);
+ register_hotmemory_notifier(&ipc_memory_nb);
register_ipcns_notifier(&init_ipc_ns);
return 0;
}
@@ -438,9 +439,9 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
* NULL is returned if the allocation fails
*/
-void* ipc_alloc(int size)
+void *ipc_alloc(int size)
{
- void* out;
+ void *out;
if(size > PAGE_SIZE)
out = vmalloc(size);
else
@@ -465,126 +466,57 @@ void ipc_free(void* ptr, int size)
kfree(ptr);
}
-/*
- * rcu allocations:
- * There are three headers that are prepended to the actual allocation:
- * - during use: ipc_rcu_hdr.
- * - during the rcu grace period: ipc_rcu_grace.
- * - [only if vmalloc]: ipc_rcu_sched.
- * Their lifetime doesn't overlap, thus the headers share the same memory.
- * Unlike a normal union, they are right-aligned, thus some container_of
- * forward/backward casting is necessary:
- */
-struct ipc_rcu_hdr
-{
- int refcount;
- int is_vmalloc;
- void *data[0];
-};
-
-
-struct ipc_rcu_grace
-{
+struct ipc_rcu {
struct rcu_head rcu;
+ atomic_t refcount;
/* "void *" makes sure alignment of following data is sane. */
void *data[0];
};
-struct ipc_rcu_sched
-{
- struct work_struct work;
- /* "void *" makes sure alignment of following data is sane. */
- void *data[0];
-};
-
-#define HDRLEN_KMALLOC (sizeof(struct ipc_rcu_grace) > sizeof(struct ipc_rcu_hdr) ? \
- sizeof(struct ipc_rcu_grace) : sizeof(struct ipc_rcu_hdr))
-#define HDRLEN_VMALLOC (sizeof(struct ipc_rcu_sched) > HDRLEN_KMALLOC ? \
- sizeof(struct ipc_rcu_sched) : HDRLEN_KMALLOC)
-
-static inline int rcu_use_vmalloc(int size)
-{
- /* Too big for a single page? */
- if (HDRLEN_KMALLOC + size > PAGE_SIZE)
- return 1;
- return 0;
-}
-
/**
* ipc_rcu_alloc - allocate ipc and rcu space
* @size: size desired
*
* Allocate memory for the rcu header structure + the object.
- * Returns the pointer to the object.
- * NULL is returned if the allocation fails.
+ * Returns the pointer to the object or NULL upon failure.
*/
-
-void* ipc_rcu_alloc(int size)
+void *ipc_rcu_alloc(int size)
{
- void* out;
- /*
- * We prepend the allocation with the rcu struct, and
- * workqueue if necessary (for vmalloc).
+ /*
+ * We prepend the allocation with the rcu struct
*/
- if (rcu_use_vmalloc(size)) {
- out = vmalloc(HDRLEN_VMALLOC + size);
- if (out) {
- out += HDRLEN_VMALLOC;
- container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
- container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
- }
- } else {
- out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
- if (out) {
- out += HDRLEN_KMALLOC;
- container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
- container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
- }
- }
-
- return out;
-}
-
-void ipc_rcu_getref(void *ptr)
-{
- container_of(ptr, struct ipc_rcu_hdr, data)->refcount++;
+ struct ipc_rcu *out = ipc_alloc(sizeof(struct ipc_rcu) + size);
+ if (unlikely(!out))
+ return NULL;
+ atomic_set(&out->refcount, 1);
+ return out->data;
}
-static void ipc_do_vfree(struct work_struct *work)
+int ipc_rcu_getref(void *ptr)
{
- vfree(container_of(work, struct ipc_rcu_sched, work));
+ return atomic_inc_not_zero(&container_of(ptr, struct ipc_rcu, data)->refcount);
}
/**
* ipc_schedule_free - free ipc + rcu space
* @head: RCU callback structure for queued work
- *
- * Since RCU callback function is called in bh,
- * we need to defer the vfree to schedule_work().
*/
static void ipc_schedule_free(struct rcu_head *head)
{
- struct ipc_rcu_grace *grace;
- struct ipc_rcu_sched *sched;
-
- grace = container_of(head, struct ipc_rcu_grace, rcu);
- sched = container_of(&(grace->data[0]), struct ipc_rcu_sched,
- data[0]);
-
- INIT_WORK(&sched->work, ipc_do_vfree);
- schedule_work(&sched->work);
+ vfree(container_of(head, struct ipc_rcu, rcu));
}
void ipc_rcu_putref(void *ptr)
{
- if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0)
+ struct ipc_rcu *p = container_of(ptr, struct ipc_rcu, data);
+
+ if (!atomic_dec_and_test(&p->refcount))
return;
- if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) {
- call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu,
- ipc_schedule_free);
+ if (is_vmalloc_addr(ptr)) {
+ call_rcu(&p->rcu, ipc_schedule_free);
} else {
- kfree_rcu(container_of(ptr, struct ipc_rcu_grace, data), rcu);
+ kfree_rcu(p, rcu);
}
}
@@ -668,38 +600,81 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
}
/**
+ * ipc_obtain_object
+ * @ids: ipc identifier set
+ * @id: ipc id to look for
+ *
+ * Look for an id in the ipc ids idr and return associated ipc object.
+ *
+ * Call inside the RCU critical section.
+ * The ipc object is *not* locked on exit.
+ */
+struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id)
+{
+ struct kern_ipc_perm *out;
+ int lid = ipcid_to_idx(id);
+
+ out = idr_find(&ids->ipcs_idr, lid);
+ if (!out)
+ return ERR_PTR(-EINVAL);
+
+ return out;
+}
+
+/**
* ipc_lock - Lock an ipc structure without rw_mutex held
* @ids: IPC identifier set
* @id: ipc id to look for
*
* Look for an id in the ipc ids idr and lock the associated ipc object.
*
- * The ipc object is locked on exit.
+ * The ipc object is locked on successful exit.
*/
-
struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
{
struct kern_ipc_perm *out;
- int lid = ipcid_to_idx(id);
rcu_read_lock();
- out = idr_find(&ids->ipcs_idr, lid);
- if (out == NULL) {
- rcu_read_unlock();
- return ERR_PTR(-EINVAL);
- }
+ out = ipc_obtain_object(ids, id);
+ if (IS_ERR(out))
+ goto err1;
spin_lock(&out->lock);
-
+
/* ipc_rmid() may have already freed the ID while ipc_lock
* was spinning: here verify that the structure is still valid
*/
- if (out->deleted) {
- spin_unlock(&out->lock);
- rcu_read_unlock();
- return ERR_PTR(-EINVAL);
- }
+ if (!out->deleted)
+ return out;
+ spin_unlock(&out->lock);
+ out = ERR_PTR(-EINVAL);
+err1:
+ rcu_read_unlock();
+ return out;
+}
+
+/**
+ * ipc_obtain_object_check
+ * @ids: ipc identifier set
+ * @id: ipc id to look for
+ *
+ * Similar to ipc_obtain_object() but also checks
+ * the ipc object reference counter.
+ *
+ * Call inside the RCU critical section.
+ * The ipc object is *not* locked on exit.
+ */
+struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id)
+{
+ struct kern_ipc_perm *out = ipc_obtain_object(ids, id);
+
+ if (IS_ERR(out))
+ goto out;
+
+ if (ipc_checkid(out, id))
+ return ERR_PTR(-EIDRM);
+out:
return out;
}
@@ -780,11 +755,28 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
struct ipc64_perm *perm, int extra_perm)
{
struct kern_ipc_perm *ipcp;
+
+ ipcp = ipcctl_pre_down_nolock(ns, ids, id, cmd, perm, extra_perm);
+ if (IS_ERR(ipcp))
+ goto out;
+
+ spin_lock(&ipcp->lock);
+out:
+ return ipcp;
+}
+
+struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
+ struct ipc_ids *ids, int id, int cmd,
+ struct ipc64_perm *perm, int extra_perm)
+{
kuid_t euid;
- int err;
+ int err = -EPERM;
+ struct kern_ipc_perm *ipcp;
down_write(&ids->rw_mutex);
- ipcp = ipc_lock_check(ids, id);
+ rcu_read_lock();
+
+ ipcp = ipc_obtain_object_check(ids, id);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
goto out_up;
@@ -793,17 +785,21 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
audit_ipc_obj(ipcp);
if (cmd == IPC_SET)
audit_ipc_set_perm(extra_perm, perm->uid,
- perm->gid, perm->mode);
+ perm->gid, perm->mode);
euid = current_euid();
if (uid_eq(euid, ipcp->cuid) || uid_eq(euid, ipcp->uid) ||
ns_capable(ns->user_ns, CAP_SYS_ADMIN))
return ipcp;
- err = -EPERM;
- ipc_unlock(ipcp);
out_up:
+ /*
+ * Unsuccessful lookup, unlock and return
+ * the corresponding error.
+ */
+ rcu_read_unlock();
up_write(&ids->rw_mutex);
+
return ERR_PTR(err);
}
@@ -964,7 +960,7 @@ static int sysvipc_proc_open(struct inode *inode, struct file *file)
seq = file->private_data;
seq->private = iter;
- iter->iface = PDE(inode)->data;
+ iter->iface = PDE_DATA(inode);
iter->ns = get_ipc_ns(current->nsproxy->ipc_ns);
out:
return ret;
diff --git a/ipc/util.h b/ipc/util.h
index eeb79a1fbd83..2b0bdd5d92ce 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -119,14 +119,18 @@ void ipc_free(void* ptr, int size);
* to 0 schedules the rcu destruction. Caller must guarantee locking.
*/
void* ipc_rcu_alloc(int size);
-void ipc_rcu_getref(void *ptr);
+int ipc_rcu_getref(void *ptr);
void ipc_rcu_putref(void *ptr);
struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
+struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id);
void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out);
+struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
+ struct ipc_ids *ids, int id, int cmd,
+ struct ipc64_perm *perm, int extra_perm);
struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
struct ipc_ids *ids, int id, int cmd,
struct ipc64_perm *perm, int extra_perm);
@@ -150,14 +154,9 @@ static inline int ipc_buildid(int id, int seq)
return SEQ_MULTIPLIER * seq + id;
}
-/*
- * Must be called with ipcp locked
- */
static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid)
{
- if (uid / SEQ_MULTIPLIER != ipcp->seq)
- return 1;
- return 0;
+ return uid / SEQ_MULTIPLIER != ipcp->seq;
}
static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
@@ -172,7 +171,13 @@ static inline void ipc_unlock(struct kern_ipc_perm *perm)
rcu_read_unlock();
}
+static inline void ipc_lock_object(struct kern_ipc_perm *perm)
+{
+ spin_lock(&perm->lock);
+}
+
struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id);
+struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id);
int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
struct ipc_ops *ops, struct ipc_params *params);
void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,