summaryrefslogtreecommitdiff
path: root/net/unix
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-09-19 05:52:27 +0000
committerDavid S. Miller <davem@davemloft.net>2011-09-28 13:29:50 -0400
commit16e5726269611b71c930054ffe9b858c1cea88eb (patch)
tree50d25ec97d639b418964bad2f45774d657468c74 /net/unix
parenta9e9fd7182332d0cf5f3e601df3e71dd431b70d7 (diff)
af_unix: dont send SCM_CREDENTIALS by default
Since commit 7361c36c5224 (af_unix: Allow credentials to work across user and pid namespaces) af_unix performance dropped a lot. This is because we now take a reference on pid and cred in each write(), and release them in read(), usually done from another process, eventually from another cpu. This triggers false sharing. # Events: 154K cycles # # Overhead Command Shared Object Symbol # ........ ....... .................. ......................... # 10.40% hackbench [kernel.kallsyms] [k] put_pid 8.60% hackbench [kernel.kallsyms] [k] unix_stream_recvmsg 7.87% hackbench [kernel.kallsyms] [k] unix_stream_sendmsg 6.11% hackbench [kernel.kallsyms] [k] do_raw_spin_lock 4.95% hackbench [kernel.kallsyms] [k] unix_scm_to_skb 4.87% hackbench [kernel.kallsyms] [k] pid_nr_ns 4.34% hackbench [kernel.kallsyms] [k] cred_to_ucred 2.39% hackbench [kernel.kallsyms] [k] unix_destruct_scm 2.24% hackbench [kernel.kallsyms] [k] sub_preempt_count 1.75% hackbench [kernel.kallsyms] [k] fget_light 1.51% hackbench [kernel.kallsyms] [k] __mutex_lock_interruptible_slowpath 1.42% hackbench [kernel.kallsyms] [k] sock_alloc_send_pskb This patch includes SCM_CREDENTIALS information in a af_unix message/skb only if requested by the sender, [man 7 unix for details how to include ancillary data using sendmsg() system call] Note: This might break buggy applications that expected SCM_CREDENTIAL from an unaware write() system call, and receiver not using SO_PASSCRED socket option. If SOCK_PASSCRED is set on source or destination socket, we still include credentials for mere write() syscalls. Performance boost in hackbench : more than 50% gain on a 16 thread machine (2 quad-core cpus, 2 threads per core) hackbench 20 thread 2000 4.228 sec instead of 9.102 sec Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Acked-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/unix')
-rw-r--r--net/unix/af_unix.c24
1 files changed, 23 insertions, 1 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ec68e1c05b85..466fbcc5cf77 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1381,8 +1381,10 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
{
int err = 0;
+
UNIXCB(skb).pid = get_pid(scm->pid);
- UNIXCB(skb).cred = get_cred(scm->cred);
+ if (scm->cred)
+ UNIXCB(skb).cred = get_cred(scm->cred);
UNIXCB(skb).fp = NULL;
if (scm->fp && send_fds)
err = unix_attach_fds(scm, skb);
@@ -1392,6 +1394,24 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
}
/*
+ * Some apps rely on write() giving SCM_CREDENTIALS
+ * We include credentials if source or destination socket
+ * asserted SOCK_PASSCRED.
+ */
+static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
+ const struct sock *other)
+{
+ if (UNIXCB(skb).cred)
+ return;
+ if (test_bit(SOCK_PASSCRED, &sock->flags) ||
+ !other->sk_socket ||
+ test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
+ UNIXCB(skb).pid = get_pid(task_tgid(current));
+ UNIXCB(skb).cred = get_current_cred();
+ }
+}
+
+/*
* Send AF_UNIX data.
*/
@@ -1538,6 +1558,7 @@ restart:
if (sock_flag(other, SOCK_RCVTSTAMP))
__net_timestamp(skb);
+ maybe_add_creds(skb, sock, other);
skb_queue_tail(&other->sk_receive_queue, skb);
if (max_level > unix_sk(other)->recursion_level)
unix_sk(other)->recursion_level = max_level;
@@ -1652,6 +1673,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
(other->sk_shutdown & RCV_SHUTDOWN))
goto pipe_err_free;
+ maybe_add_creds(skb, sock, other);
skb_queue_tail(&other->sk_receive_queue, skb);
if (max_level > unix_sk(other)->recursion_level)
unix_sk(other)->recursion_level = max_level;