diff options
author | David S. Miller <davem@davemloft.net> | 2015-06-15 19:49:22 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-06-15 19:49:22 -0700 |
commit | 24029a3603cfa633e8bc2b3fb3e48e76c497831d (patch) | |
tree | 19ed0663ecfc90d53e8a449f28b25206e0c9a5f6 | |
parent | 916035ddd3f453a9152db8755dc8a1f53505444c (diff) | |
parent | 35ac838a9b96470f999db04320f53a2033642bfb (diff) |
Merge branch 'sock_diag_destruction_events'
Craig Gallek says:
====================
Socket destruction events via netlink sock_diag
This series extends the netlink sock_diag interface to broadcast
socket information as they are being destroyed. The current
interface is poll based and can not be used to retreive information
about sockets that are destroyed between poll intervals.
Only inet sockets are broadcast in this implementation, but other
families could easily be added as needed in the future.
If this patch set is accepted, a follow-up patch to the ss utility
in the iproute2 suite will also be submitted.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/inet_diag.h | 1 | ||||
-rw-r--r-- | include/linux/sock_diag.h | 42 | ||||
-rw-r--r-- | include/net/sock.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/inet_diag.h | 3 | ||||
-rw-r--r-- | include/uapi/linux/sock_diag.h | 10 | ||||
-rw-r--r-- | net/core/sock.c | 11 | ||||
-rw-r--r-- | net/core/sock_diag.c | 85 | ||||
-rw-r--r-- | net/dccp/diag.c | 1 | ||||
-rw-r--r-- | net/ipv4/inet_diag.c | 50 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_diag.c | 6 | ||||
-rw-r--r-- | net/ipv4/udp_diag.c | 2 |
12 files changed, 209 insertions, 7 deletions
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index ac48b10c9395..0e707f0c1a3e 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -24,6 +24,7 @@ struct inet_diag_handler { struct inet_diag_msg *r, void *info); __u16 idiag_type; + __u16 idiag_info_size; }; struct inet_connection_sock; diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 083ac388098e..fddebc617469 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -1,7 +1,10 @@ #ifndef __SOCK_DIAG_H__ #define __SOCK_DIAG_H__ +#include <linux/netlink.h> #include <linux/user_namespace.h> +#include <net/net_namespace.h> +#include <net/sock.h> #include <uapi/linux/sock_diag.h> struct sk_buff; @@ -11,6 +14,7 @@ struct sock; struct sock_diag_handler { __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); + int (*get_info)(struct sk_buff *skb, struct sock *sk); }; int sock_diag_register(const struct sock_diag_handler *h); @@ -26,4 +30,42 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, struct sk_buff *skb, int attrtype); +static inline +enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk) +{ + switch (sk->sk_family) { + case AF_INET: + switch (sk->sk_protocol) { + case IPPROTO_TCP: + return SKNLGRP_INET_TCP_DESTROY; + case IPPROTO_UDP: + return SKNLGRP_INET_UDP_DESTROY; + default: + return SKNLGRP_NONE; + } + case AF_INET6: + switch (sk->sk_protocol) { + case IPPROTO_TCP: + return SKNLGRP_INET6_TCP_DESTROY; + case IPPROTO_UDP: + return SKNLGRP_INET6_UDP_DESTROY; + default: + return SKNLGRP_NONE; + } + default: + return SKNLGRP_NONE; + } +} + +static inline +bool sock_diag_has_destroy_listeners(const struct sock *sk) +{ + const struct net *n = sock_net(sk); + const enum sknetlink_groups group = sock_diag_destroy_group(sk); + + return group != SKNLGRP_NONE && n->diag_nlsk && + netlink_has_listeners(n->diag_nlsk, group); +} +void sock_diag_broadcast_destroy(struct sock *sk); + #endif diff --git a/include/net/sock.h b/include/net/sock.h index 26c1c3171e00..3e8258699270 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1518,6 +1518,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern); void sk_free(struct sock *sk); +void sk_destruct(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index c7093c75bdd6..b629fc53b109 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -111,9 +111,10 @@ enum { INET_DIAG_SKMEMINFO, INET_DIAG_SHUTDOWN, INET_DIAG_DCTCPINFO, + INET_DIAG_PROTOCOL, /* response attribute only */ }; -#define INET_DIAG_MAX INET_DIAG_DCTCPINFO +#define INET_DIAG_MAX INET_DIAG_PROTOCOL /* INET_DIAG_MEM */ diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h index b00e29efb161..49230d36f9ce 100644 --- a/include/uapi/linux/sock_diag.h +++ b/include/uapi/linux/sock_diag.h @@ -23,4 +23,14 @@ enum { SK_MEMINFO_VARS, }; +enum sknetlink_groups { + SKNLGRP_NONE, + SKNLGRP_INET_TCP_DESTROY, + SKNLGRP_INET_UDP_DESTROY, + SKNLGRP_INET6_TCP_DESTROY, + SKNLGRP_INET6_UDP_DESTROY, + __SKNLGRP_MAX, +}; +#define SKNLGRP_MAX (__SKNLGRP_MAX - 1) + #endif /* _UAPI__SOCK_DIAG_H__ */ diff --git a/net/core/sock.c b/net/core/sock.c index 7063c329c1b6..1e1fe9a68d83 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -131,6 +131,7 @@ #include <linux/ipsec.h> #include <net/cls_cgroup.h> #include <net/netprio_cgroup.h> +#include <linux/sock_diag.h> #include <linux/filter.h> @@ -1423,7 +1424,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, } EXPORT_SYMBOL(sk_alloc); -static void __sk_free(struct sock *sk) +void sk_destruct(struct sock *sk) { struct sk_filter *filter; @@ -1451,6 +1452,14 @@ static void __sk_free(struct sock *sk) sk_prot_free(sk->sk_prot_creator, sk); } +static void __sk_free(struct sock *sk) +{ + if (unlikely(sock_diag_has_destroy_listeners(sk))) + sock_diag_broadcast_destroy(sk); + else + sk_destruct(sk); +} + void sk_free(struct sock *sk) { /* diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 74dddf84adcd..d79866c5f8bc 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -5,6 +5,9 @@ #include <net/net_namespace.h> #include <linux/module.h> #include <net/sock.h> +#include <linux/kernel.h> +#include <linux/tcp.h> +#include <linux/workqueue.h> #include <linux/inet_diag.h> #include <linux/sock_diag.h> @@ -12,6 +15,7 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); +static struct workqueue_struct *broadcast_wq; static u64 sock_gen_cookie(struct sock *sk) { @@ -101,6 +105,62 @@ out: } EXPORT_SYMBOL(sock_diag_put_filterinfo); +struct broadcast_sk { + struct sock *sk; + struct work_struct work; +}; + +static size_t sock_diag_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct inet_diag_msg) + + nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */ + + nla_total_size(sizeof(struct tcp_info))); /* INET_DIAG_INFO */ +} + +static void sock_diag_broadcast_destroy_work(struct work_struct *work) +{ + struct broadcast_sk *bsk = + container_of(work, struct broadcast_sk, work); + struct sock *sk = bsk->sk; + const struct sock_diag_handler *hndl; + struct sk_buff *skb; + const enum sknetlink_groups group = sock_diag_destroy_group(sk); + int err = -1; + + WARN_ON(group == SKNLGRP_NONE); + + skb = nlmsg_new(sock_diag_nlmsg_size(), GFP_KERNEL); + if (!skb) + goto out; + + mutex_lock(&sock_diag_table_mutex); + hndl = sock_diag_handlers[sk->sk_family]; + if (hndl && hndl->get_info) + err = hndl->get_info(skb, sk); + mutex_unlock(&sock_diag_table_mutex); + + if (!err) + nlmsg_multicast(sock_net(sk)->diag_nlsk, skb, 0, group, + GFP_KERNEL); + else + kfree_skb(skb); +out: + sk_destruct(sk); + kfree(bsk); +} + +void sock_diag_broadcast_destroy(struct sock *sk) +{ + /* Note, this function is often called from an interrupt context. */ + struct broadcast_sk *bsk = + kmalloc(sizeof(struct broadcast_sk), GFP_ATOMIC); + if (!bsk) + return sk_destruct(sk); + bsk->sk = sk; + INIT_WORK(&bsk->work, sock_diag_broadcast_destroy_work); + queue_work(broadcast_wq, &bsk->work); +} + void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)) { mutex_lock(&sock_diag_table_mutex); @@ -211,10 +271,32 @@ static void sock_diag_rcv(struct sk_buff *skb) mutex_unlock(&sock_diag_mutex); } +static int sock_diag_bind(struct net *net, int group) +{ + switch (group) { + case SKNLGRP_INET_TCP_DESTROY: + case SKNLGRP_INET_UDP_DESTROY: + if (!sock_diag_handlers[AF_INET]) + request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, AF_INET); + break; + case SKNLGRP_INET6_TCP_DESTROY: + case SKNLGRP_INET6_UDP_DESTROY: + if (!sock_diag_handlers[AF_INET6]) + request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, AF_INET); + break; + } + return 0; +} + static int __net_init diag_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { + .groups = SKNLGRP_MAX, .input = sock_diag_rcv, + .bind = sock_diag_bind, + .flags = NL_CFG_F_NONROOT_RECV, }; net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg); @@ -234,12 +316,15 @@ static struct pernet_operations diag_net_ops = { static int __init sock_diag_init(void) { + broadcast_wq = alloc_workqueue("sock_diag_events", 0, 0); + BUG_ON(!broadcast_wq); return register_pernet_subsys(&diag_net_ops); } static void __exit sock_diag_exit(void) { unregister_pernet_subsys(&diag_net_ops); + destroy_workqueue(broadcast_wq); } module_init(sock_diag_init); diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 5a45f8de5d99..2d84303ea6bf 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -66,6 +66,7 @@ static const struct inet_diag_handler dccp_diag_handler = { .dump_one = dccp_diag_dump_one, .idiag_get_info = dccp_diag_get_info, .idiag_type = IPPROTO_DCCP, + .idiag_info_size = sizeof(struct tcp_info), }; static int __init dccp_diag_init(void) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 4d32262c7502..21985d8d41e7 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -200,9 +200,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, } #undef EXPIRES_IN_MS - if (ext & (1 << (INET_DIAG_INFO - 1))) { + if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { attr = nla_reserve(skb, INET_DIAG_INFO, - sizeof(struct tcp_info)); + handler->idiag_info_size); if (!attr) goto errout; @@ -1078,14 +1078,60 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) return inet_diag_get_exact(skb, h, nlmsg_data(h)); } +static +int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) +{ + const struct inet_diag_handler *handler; + struct nlmsghdr *nlh; + struct nlattr *attr; + struct inet_diag_msg *r; + void *info = NULL; + int err = 0; + + nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0); + if (!nlh) + return -ENOMEM; + + r = nlmsg_data(nlh); + memset(r, 0, sizeof(*r)); + inet_diag_msg_common_fill(r, sk); + r->idiag_state = sk->sk_state; + + if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) { + nlmsg_cancel(skb, nlh); + return err; + } + + handler = inet_diag_lock_handler(sk->sk_protocol); + if (IS_ERR(handler)) { + inet_diag_unlock_handler(handler); + nlmsg_cancel(skb, nlh); + return PTR_ERR(handler); + } + + attr = handler->idiag_info_size + ? nla_reserve(skb, INET_DIAG_INFO, handler->idiag_info_size) + : NULL; + if (attr) + info = nla_data(attr); + + handler->idiag_get_info(sk, r, info); + inet_diag_unlock_handler(handler); + + nlmsg_end(skb, nlh); + return 0; +} + static const struct sock_diag_handler inet_diag_handler = { .family = AF_INET, .dump = inet_diag_handler_dump, + .get_info = inet_diag_handler_get_info, }; static const struct sock_diag_handler inet6_diag_handler = { .family = AF_INET6, .dump = inet_diag_handler_dump, + .get_info = inet_diag_handler_get_info, }; int inet_diag_register(const struct inet_diag_handler *h) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 65f791f74845..697b86dd45b3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2624,13 +2624,15 @@ EXPORT_SYMBOL(compat_tcp_setsockopt); /* Return information about state of tcp endpoint in API format. */ void tcp_get_info(struct sock *sk, struct tcp_info *info) { - const struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; unsigned int start; u32 rate; memset(info, 0, sizeof(*info)); + if (sk->sk_type != SOCK_STREAM) + return; info->tcpi_state = sk->sk_state; info->tcpi_ca_state = icsk->icsk_ca_state; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 79b34a0f4a4a..479f34946177 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -19,13 +19,14 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *_info) { - const struct tcp_sock *tp = tcp_sk(sk); struct tcp_info *info = _info; if (sk->sk_state == TCP_LISTEN) { r->idiag_rqueue = sk->sk_ack_backlog; r->idiag_wqueue = sk->sk_max_ack_backlog; - } else { + } else if (sk->sk_type == SOCK_STREAM) { + const struct tcp_sock *tp = tcp_sk(sk); + r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); r->idiag_wqueue = tp->write_seq - tp->snd_una; } @@ -50,6 +51,7 @@ static const struct inet_diag_handler tcp_diag_handler = { .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, .idiag_type = IPPROTO_TCP, + .idiag_info_size = sizeof(struct tcp_info), }; static int __init tcp_diag_init(void) diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index b763c39ae1d7..6116604bf6e8 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -170,6 +170,7 @@ static const struct inet_diag_handler udp_diag_handler = { .dump_one = udp_diag_dump_one, .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDP, + .idiag_info_size = 0, }; static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, @@ -190,6 +191,7 @@ static const struct inet_diag_handler udplite_diag_handler = { .dump_one = udplite_diag_dump_one, .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDPLITE, + .idiag_info_size = 0, }; static int __init udp_diag_init(void) |