From 496053f488fc2d859e41574f3421993826d2d0eb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 11 Jan 2012 16:46:32 -0800
Subject: ipv4: Remove bogus checks of rt_gateway being zero.

It can never actually happen.  rt_gateway is either the fully resolved
flow lookup key's destination address, or the non-zero FIB entry gateway
address.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 8 +++-----
 net/ipv4/ipip.c   | 2 --
 2 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2b53a1f7abf6..fc213350a6ed 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -724,11 +724,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			goto tx_error;
 		}
 
-		if (skb->protocol == htons(ETH_P_IP)) {
+		if (skb->protocol == htons(ETH_P_IP))
 			rt = skb_rtable(skb);
-			if ((dst = rt->rt_gateway) == 0)
-				goto tx_error_icmp;
-		}
 #if IS_ENABLED(CONFIG_IPV6)
 		else if (skb->protocol == htons(ETH_P_IPV6)) {
 			struct neighbour *neigh = dst_get_neighbour_noref(skb_dst(skb));
@@ -910,9 +907,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	__IPTUNNEL_XMIT(tstats, &dev->stats);
 	return NETDEV_TX_OK;
 
+#if IS_ENABLED(CONFIG_IPV6)
 tx_error_icmp:
 	dst_link_failure(skb);
-
+#endif
 tx_error:
 	dev->stats.tx_errors++;
 	dev_kfree_skb(skb);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 22a199315309..87c63b67f8ce 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -454,8 +454,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			dev->stats.tx_fifo_errors++;
 			goto tx_error;
 		}
-		if ((dst = rt->rt_gateway) == 0)
-			goto tx_error_icmp;
 	}
 
 	rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
-- 
cgit v1.2.3


From 658c8d964eb3cdb7e4230a59ba09c75a3359ee4a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 24 Jan 2012 18:18:05 -0500
Subject: ipip: Fix bug added to ipip_tunnel_xmit().

We can remove the rt_gateway == 0 check but we shouldn't
remove the 'dst' initialization too.

Noticed by Eric Dumazet.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipip.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 87c63b67f8ce..f84ebff5cdb0 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -454,6 +454,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			dev->stats.tx_fifo_errors++;
 			goto tx_error;
 		}
+		dst = rt->rt_gateway;
 	}
 
 	rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
-- 
cgit v1.2.3


From 61d57f87f3fb04a305f22befabd042ffbec8b852 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 24 Jan 2012 18:23:30 -0500
Subject: ip_gre: Fix bug added to ipgre_tunnel_xmit().

We can remove the rt_gateway == 0 check but we shouldn't
remove the 'dst' initialization too.

Noticed by Eric Dumazet.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index fc213350a6ed..05f7419ed7c5 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -724,8 +724,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			goto tx_error;
 		}
 
-		if (skb->protocol == htons(ETH_P_IP))
+		if (skb->protocol == htons(ETH_P_IP)) {
 			rt = skb_rtable(skb);
+			dst = rt->rt_gateway;
+		}
 #if IS_ENABLED(CONFIG_IPV6)
 		else if (skb->protocol == htons(ETH_P_IPV6)) {
 			struct neighbour *neigh = dst_get_neighbour_noref(skb_dst(skb));
-- 
cgit v1.2.3


From 09e9b813d34d9a09d64a64580a9959d8bae1f4f5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 25 Jan 2012 04:44:20 +0000
Subject: tcp: add LINUX_MIB_TCPRETRANSFAIL counter

It might be useful to get a counter of failed tcp_retransmit_skb()
calls.

Reported-by: Satoru Moriya <satoru.moriya@hds.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/proc.c       | 1 +
 net/ipv4/tcp_output.c | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 6afc807ee2ad..02d61079f08b 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -256,6 +256,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW),
 	SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES),
 	SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP),
+	SNMP_MIB_ITEM("TCPRetransFail", LINUX_MIB_TCPRETRANSFAIL),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8c8de2780c7a..561550ab3c30 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2308,8 +2308,10 @@ begin_fwd:
 		if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
 			continue;
 
-		if (tcp_retransmit_skb(sk, skb))
+		if (tcp_retransmit_skb(sk, skb)) {
+			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
 			return;
+		}
 		NET_INC_STATS_BH(sock_net(sk), mib_idx);
 
 		if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
-- 
cgit v1.2.3


From 39232973b779ab0c02cb6dcd8f819b7cb0fcd09a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 26 Jan 2012 15:22:32 -0500
Subject: ipv4/ipv6: Prepare for new route gateway semantics.

In the future the ipv4/ipv6 route gateway will take on two types
of values:

1) INADDR_ANY/IN6ADDR_ANY, for local network routes, and in this case
   the neighbour must be obtained using the destination address in
   ipv4/ipv6 header as the lookup key.

2) Everything else, the actual nexthop route address.

So if the gateway is not inaddr-any we use it, otherwise we must use
the packet's destination address.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net/ipv4')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index bcacf54e5418..4eeb8ce856e2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1117,10 +1117,15 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const vo
 	static const __be32 inaddr_any = 0;
 	struct net_device *dev = dst->dev;
 	const __be32 *pkey = daddr;
+	const struct rtable *rt;
 	struct neighbour *n;
 
+	rt = (const struct rtable *) dst;
+
 	if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
 		pkey = &inaddr_any;
+	else if (rt->rt_gateway)
+		pkey = (const __be32 *) &rt->rt_gateway;
 
 	n = __ipv4_neigh_lookup(&arp_tbl, dev, *(__force u32 *)pkey);
 	if (n)
-- 
cgit v1.2.3


From 0ec88662041e172acf33d7a15a2020841ee82afb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 27 Jan 2012 15:01:08 -0800
Subject: ipv4: ip_gre: Convert to dst_neigh_lookup()

The conversion is very similar to that made to ipv6's SIT code.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 05f7419ed7c5..bf9541648b96 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -730,10 +730,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 		}
 #if IS_ENABLED(CONFIG_IPV6)
 		else if (skb->protocol == htons(ETH_P_IPV6)) {
-			struct neighbour *neigh = dst_get_neighbour_noref(skb_dst(skb));
 			const struct in6_addr *addr6;
+			struct neighbour *neigh;
+			bool do_tx_error_icmp;
 			int addr_type;
 
+			neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
 			if (neigh == NULL)
 				goto tx_error;
 
@@ -746,9 +748,14 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			}
 
 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
+				do_tx_error_icmp = true;
+			else {
+				do_tx_error_icmp = false;
+				dst = addr6->s6_addr32[3];
+			}
+			neigh_release(neigh);
+			if (do_tx_error_icmp)
 				goto tx_error_icmp;
-
-			dst = addr6->s6_addr32[3];
 		}
 #endif
 		else
-- 
cgit v1.2.3


From a2d91241a80ec9bbc5ab24b9a2c4d730b3fa5730 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 31 Jan 2012 01:04:42 +0000
Subject: tcp: md5: remove obsolete md5_add() method

We no longer use md5_add() method from struct tcp_sock_af_ops

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 337ba4cca052..345e24928fa6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -967,13 +967,6 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 }
 EXPORT_SYMBOL(tcp_v4_md5_do_add);
 
-static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
-			       u8 *newkey, u8 newkeylen)
-{
-	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
-				 newkey, newkeylen);
-}
-
 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -1853,7 +1846,6 @@ EXPORT_SYMBOL(ipv4_specific);
 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
 	.md5_lookup		= tcp_v4_md5_lookup,
 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
-	.md5_add		= tcp_v4_md5_add_func,
 	.md5_parse		= tcp_v4_parse_md5_keys,
 };
 #endif
-- 
cgit v1.2.3


From a915da9b69273815527ccb3789421cb7027b545b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 31 Jan 2012 05:18:33 +0000
Subject: tcp: md5: rcu conversion

In order to be able to support proper RST messages for TCP MD5 flows, we
need to allow access to MD5 keys without locking listener socket.

This conversion is a nice cleanup, and shrinks size of timewait sockets
by 80 bytes.

IPv6 code reuses generic code found in IPv4 instead of duplicating it.

Control path uses GFP_KERNEL allocations instead of GFP_ATOMIC.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Shawn Lu <shawn.lu@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c      | 227 +++++++++++++++++++----------------------------
 net/ipv4/tcp_minisocks.c |  12 +--
 2 files changed, 99 insertions(+), 140 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 345e24928fa6..1d5fd82c5c08 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -90,16 +90,8 @@ EXPORT_SYMBOL(sysctl_tcp_low_latency);
 
 
 #ifdef CONFIG_TCP_MD5SIG
-static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
-						   __be32 addr);
-static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
-#else
-static inline
-struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
-{
-	return NULL;
-}
 #endif
 
 struct inet_hashinfo tcp_hashinfo;
@@ -631,7 +623,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	arg.iov[0].iov_len  = sizeof(rep.th);
 
 #ifdef CONFIG_TCP_MD5SIG
-	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
+	key = sk ? tcp_md5_do_lookup(sk,
+				     (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
+				     AF_INET) : NULL;
 	if (key) {
 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 				   (TCPOPT_NOP << 16) |
@@ -759,7 +753,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 			req->ts_recent,
 			0,
-			tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
+			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
+					  AF_INET),
 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
 			ip_hdr(skb)->tos);
 }
@@ -876,146 +871,124 @@ static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
  */
 
 /* Find the Key structure for an address.  */
-static struct tcp_md5sig_key *
-			tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
+struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
+					 const union tcp_md5_addr *addr,
+					 int family)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int i;
+	struct tcp_md5sig_key *key;
+	struct hlist_node *pos;
+	unsigned int size = sizeof(struct in_addr);
 
-	if (!tp->md5sig_info || !tp->md5sig_info->entries4)
+	if (!tp->md5sig_info)
 		return NULL;
-	for (i = 0; i < tp->md5sig_info->entries4; i++) {
-		if (tp->md5sig_info->keys4[i].addr == addr)
-			return &tp->md5sig_info->keys4[i].base;
+#if IS_ENABLED(CONFIG_IPV6)
+	if (family == AF_INET6)
+		size = sizeof(struct in6_addr);
+#endif
+	hlist_for_each_entry_rcu(key, pos, &tp->md5sig_info->head, node) {
+		if (key->family != family)
+			continue;
+		if (!memcmp(&key->addr, addr, size))
+			return key;
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(tcp_md5_do_lookup);
 
 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 					 struct sock *addr_sk)
 {
-	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
+	union tcp_md5_addr *addr;
+
+	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
+	return tcp_md5_do_lookup(sk, addr, AF_INET);
 }
 EXPORT_SYMBOL(tcp_v4_md5_lookup);
 
 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
 						      struct request_sock *req)
 {
-	return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
+	union tcp_md5_addr *addr;
+
+	addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
+	return tcp_md5_do_lookup(sk, addr, AF_INET);
 }
 
 /* This can be called on a newly created socket, from other files */
-int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
-		      u8 *newkey, u8 newkeylen)
+int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
+		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
 {
 	/* Add Key to the list */
 	struct tcp_md5sig_key *key;
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp4_md5sig_key *keys;
+	struct tcp_md5sig_info *md5sig;
 
-	key = tcp_v4_md5_do_lookup(sk, addr);
+	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
 	if (key) {
 		/* Pre-existing entry - just update that one. */
-		kfree(key->key);
-		key->key = newkey;
+		memcpy(key->key, newkey, newkeylen);
 		key->keylen = newkeylen;
-	} else {
-		struct tcp_md5sig_info *md5sig;
-
-		if (!tp->md5sig_info) {
-			tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
-						  GFP_ATOMIC);
-			if (!tp->md5sig_info) {
-				kfree(newkey);
-				return -ENOMEM;
-			}
-			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
-		}
+		return 0;
+	}
 
-		md5sig = tp->md5sig_info;
-		if (md5sig->entries4 == 0 &&
-		    tcp_alloc_md5sig_pool(sk) == NULL) {
-			kfree(newkey);
+	md5sig = tp->md5sig_info;
+	if (!md5sig) {
+		md5sig = kmalloc(sizeof(*md5sig), gfp);
+		if (!md5sig)
 			return -ENOMEM;
-		}
-
-		if (md5sig->alloced4 == md5sig->entries4) {
-			keys = kmalloc((sizeof(*keys) *
-					(md5sig->entries4 + 1)), GFP_ATOMIC);
-			if (!keys) {
-				kfree(newkey);
-				if (md5sig->entries4 == 0)
-					tcp_free_md5sig_pool();
-				return -ENOMEM;
-			}
 
-			if (md5sig->entries4)
-				memcpy(keys, md5sig->keys4,
-				       sizeof(*keys) * md5sig->entries4);
+		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
+		INIT_HLIST_HEAD(&md5sig->head);
+		tp->md5sig_info = md5sig;
+	}
 
-			/* Free old key list, and reference new one */
-			kfree(md5sig->keys4);
-			md5sig->keys4 = keys;
-			md5sig->alloced4++;
-		}
-		md5sig->entries4++;
-		md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
-		md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
-		md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
+	key = kmalloc(sizeof(*key), gfp);
+	if (!key)
+		return -ENOMEM;
+	if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
+		kfree(key);
+		return -ENOMEM;
 	}
+
+	memcpy(key->key, newkey, newkeylen);
+	key->keylen = newkeylen;
+	key->family = family;
+	memcpy(&key->addr, addr,
+	       (family == AF_INET6) ? sizeof(struct in6_addr) :
+				      sizeof(struct in_addr));
+	hlist_add_head_rcu(&key->node, &md5sig->head);
 	return 0;
 }
-EXPORT_SYMBOL(tcp_v4_md5_do_add);
+EXPORT_SYMBOL(tcp_md5_do_add);
 
-int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
+int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int i;
-
-	for (i = 0; i < tp->md5sig_info->entries4; i++) {
-		if (tp->md5sig_info->keys4[i].addr == addr) {
-			/* Free the key */
-			kfree(tp->md5sig_info->keys4[i].base.key);
-			tp->md5sig_info->entries4--;
-
-			if (tp->md5sig_info->entries4 == 0) {
-				kfree(tp->md5sig_info->keys4);
-				tp->md5sig_info->keys4 = NULL;
-				tp->md5sig_info->alloced4 = 0;
-				tcp_free_md5sig_pool();
-			} else if (tp->md5sig_info->entries4 != i) {
-				/* Need to do some manipulation */
-				memmove(&tp->md5sig_info->keys4[i],
-					&tp->md5sig_info->keys4[i+1],
-					(tp->md5sig_info->entries4 - i) *
-					 sizeof(struct tcp4_md5sig_key));
-			}
-			return 0;
-		}
-	}
-	return -ENOENT;
+	struct tcp_md5sig_key *key;
+
+	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
+	if (!key)
+		return -ENOENT;
+	hlist_del_rcu(&key->node);
+	kfree_rcu(key, rcu);
+	if (hlist_empty(&tp->md5sig_info->head))
+		tcp_free_md5sig_pool();
+	return 0;
 }
-EXPORT_SYMBOL(tcp_v4_md5_do_del);
+EXPORT_SYMBOL(tcp_md5_do_del);
 
-static void tcp_v4_clear_md5_list(struct sock *sk)
+void tcp_clear_md5_list(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_md5sig_key *key;
+	struct hlist_node *pos, *n;
 
-	/* Free each key, then the set of key keys,
-	 * the crypto element, and then decrement our
-	 * hold on the last resort crypto.
-	 */
-	if (tp->md5sig_info->entries4) {
-		int i;
-		for (i = 0; i < tp->md5sig_info->entries4; i++)
-			kfree(tp->md5sig_info->keys4[i].base.key);
-		tp->md5sig_info->entries4 = 0;
+	if (!hlist_empty(&tp->md5sig_info->head))
 		tcp_free_md5sig_pool();
-	}
-	if (tp->md5sig_info->keys4) {
-		kfree(tp->md5sig_info->keys4);
-		tp->md5sig_info->keys4 = NULL;
-		tp->md5sig_info->alloced4  = 0;
+	hlist_for_each_entry_safe(key, pos, n, &tp->md5sig_info->head, node) {
+		hlist_del_rcu(&key->node);
+		kfree_rcu(key, rcu);
 	}
 }
 
@@ -1024,7 +997,6 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 {
 	struct tcp_md5sig cmd;
 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
-	u8 *newkey;
 
 	if (optlen < sizeof(cmd))
 		return -EINVAL;
@@ -1038,29 +1010,16 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 	if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
 		if (!tcp_sk(sk)->md5sig_info)
 			return -ENOENT;
-		return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
+		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
+				      AF_INET);
 	}
 
 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
 		return -EINVAL;
 
-	if (!tcp_sk(sk)->md5sig_info) {
-		struct tcp_sock *tp = tcp_sk(sk);
-		struct tcp_md5sig_info *p;
-
-		p = kzalloc(sizeof(*p), sk->sk_allocation);
-		if (!p)
-			return -EINVAL;
-
-		tp->md5sig_info = p;
-		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
-	}
-
-	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
-	if (!newkey)
-		return -ENOMEM;
-	return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
-				 newkey, cmd.tcpm_keylen);
+	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
+			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
+			      GFP_KERNEL);
 }
 
 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
@@ -1086,7 +1045,7 @@ static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
 }
 
-static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
 {
 	struct tcp_md5sig_pool *hp;
@@ -1186,7 +1145,8 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
 	int genhash;
 	unsigned char newhash[16];
 
-	hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
+	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
+					  AF_INET);
 	hash_location = tcp_parse_md5sig_option(th);
 
 	/* We've parsed the options - do we have a hash? */
@@ -1474,7 +1434,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Copy over the MD5 key from the original socket */
-	key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
+	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
+				AF_INET);
 	if (key != NULL) {
 		/*
 		 * We're using one, so create a matching key
@@ -1482,10 +1443,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		 * memory, then we end up not copying the key
 		 * across. Shucks.
 		 */
-		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
-		if (newkey != NULL)
-			tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
-					  newkey, key->keylen);
+		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
+			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
 	}
 #endif
@@ -1934,7 +1893,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
 #ifdef CONFIG_TCP_MD5SIG
 	/* Clean up the MD5 key list, if any */
 	if (tp->md5sig_info) {
-		tcp_v4_clear_md5_list(sk);
+		tcp_clear_md5_list(sk);
 		kfree(tp->md5sig_info);
 		tp->md5sig_info = NULL;
 	}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 550e755747e0..3cabafb5cdd1 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -359,13 +359,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		 */
 		do {
 			struct tcp_md5sig_key *key;
-			memset(tcptw->tw_md5_key, 0, sizeof(tcptw->tw_md5_key));
-			tcptw->tw_md5_keylen = 0;
+			tcptw->tw_md5_key = NULL;
 			key = tp->af_specific->md5_lookup(sk, sk);
 			if (key != NULL) {
-				memcpy(&tcptw->tw_md5_key, key->key, key->keylen);
-				tcptw->tw_md5_keylen = key->keylen;
-				if (tcp_alloc_md5sig_pool(sk) == NULL)
+				tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
+				if (tcptw->tw_md5_key && tcp_alloc_md5sig_pool(sk) == NULL)
 					BUG();
 			}
 		} while (0);
@@ -405,8 +403,10 @@ void tcp_twsk_destructor(struct sock *sk)
 {
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_timewait_sock *twsk = tcp_twsk(sk);
-	if (twsk->tw_md5_keylen)
+	if (twsk->tw_md5_key) {
 		tcp_free_md5sig_pool();
+		kfree_rcu(twsk->tw_md5_key, rcu);
+	}
 #endif
 }
 EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
-- 
cgit v1.2.3


From 5f3d9cb2962967d9d7e03abb4a7ca275a9a3fea5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 31 Jan 2012 10:56:48 +0000
Subject: tcp: md5: use sock_kmalloc() to limit md5 keys

There is no limit on number of MD5 keys an application can attach to a
tcp socket.

This patch adds a per tcp socket limit based
on /proc/sys/net/core/optmem_max

With current default optmem_max values, this allows about 150 keys on
64bit arches, and 88 keys on 32bit arches.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1d5fd82c5c08..da5d3226771b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -943,11 +943,11 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
 		tp->md5sig_info = md5sig;
 	}
 
-	key = kmalloc(sizeof(*key), gfp);
+	key = sock_kmalloc(sk, sizeof(*key), gfp);
 	if (!key)
 		return -ENOMEM;
 	if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
-		kfree(key);
+		sock_kfree_s(sk, key, sizeof(*key));
 		return -ENOMEM;
 	}
 
@@ -971,6 +971,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
 	if (!key)
 		return -ENOENT;
 	hlist_del_rcu(&key->node);
+	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
 	kfree_rcu(key, rcu);
 	if (hlist_empty(&tp->md5sig_info->head))
 		tcp_free_md5sig_pool();
@@ -988,6 +989,7 @@ void tcp_clear_md5_list(struct sock *sk)
 		tcp_free_md5sig_pool();
 	hlist_for_each_entry_safe(key, pos, n, &tp->md5sig_info->head, node) {
 		hlist_del_rcu(&key->node);
+		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
 		kfree_rcu(key, rcu);
 	}
 }
-- 
cgit v1.2.3


From a8afca032998850ec63e83d555cdcf0eb5680cd6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 31 Jan 2012 18:45:40 +0000
Subject: tcp: md5: protects md5sig_info with RCU

This patch makes sure we use appropriate memory barriers before
publishing tp->md5sig_info, allowing tcp_md5_do_lookup() being used from
tcp_v4_send_reset() without holding socket lock (upcoming patch from
Shawn Lu)

Note we also need to respect rcu grace period before its freeing, since
we can free socket without this grace period thanks to
SLAB_DESTROY_BY_RCU

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Shawn Lu <shawn.lu@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index da5d3226771b..567cca9b30df 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -879,14 +879,18 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
 	struct tcp_md5sig_key *key;
 	struct hlist_node *pos;
 	unsigned int size = sizeof(struct in_addr);
+	struct tcp_md5sig_info *md5sig;
 
-	if (!tp->md5sig_info)
+	/* caller either holds rcu_read_lock() or socket lock */
+	md5sig = rcu_dereference_check(tp->md5sig_info,
+				       sock_owned_by_user(sk));
+	if (!md5sig)
 		return NULL;
 #if IS_ENABLED(CONFIG_IPV6)
 	if (family == AF_INET6)
 		size = sizeof(struct in6_addr);
 #endif
-	hlist_for_each_entry_rcu(key, pos, &tp->md5sig_info->head, node) {
+	hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
 		if (key->family != family)
 			continue;
 		if (!memcmp(&key->addr, addr, size))
@@ -932,7 +936,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
 		return 0;
 	}
 
-	md5sig = tp->md5sig_info;
+	md5sig = rcu_dereference_protected(tp->md5sig_info,
+					   sock_owned_by_user(sk));
 	if (!md5sig) {
 		md5sig = kmalloc(sizeof(*md5sig), gfp);
 		if (!md5sig)
@@ -940,7 +945,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
 
 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 		INIT_HLIST_HEAD(&md5sig->head);
-		tp->md5sig_info = md5sig;
+		rcu_assign_pointer(tp->md5sig_info, md5sig);
 	}
 
 	key = sock_kmalloc(sk, sizeof(*key), gfp);
@@ -966,6 +971,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_md5sig_key *key;
+	struct tcp_md5sig_info *md5sig;
 
 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
 	if (!key)
@@ -973,7 +979,9 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
 	hlist_del_rcu(&key->node);
 	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
 	kfree_rcu(key, rcu);
-	if (hlist_empty(&tp->md5sig_info->head))
+	md5sig = rcu_dereference_protected(tp->md5sig_info,
+					   sock_owned_by_user(sk));
+	if (hlist_empty(&md5sig->head))
 		tcp_free_md5sig_pool();
 	return 0;
 }
@@ -984,10 +992,13 @@ void tcp_clear_md5_list(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_md5sig_key *key;
 	struct hlist_node *pos, *n;
+	struct tcp_md5sig_info *md5sig;
 
-	if (!hlist_empty(&tp->md5sig_info->head))
+	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
+
+	if (!hlist_empty(&md5sig->head))
 		tcp_free_md5sig_pool();
-	hlist_for_each_entry_safe(key, pos, n, &tp->md5sig_info->head, node) {
+	hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
 		hlist_del_rcu(&key->node);
 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
 		kfree_rcu(key, rcu);
@@ -1009,12 +1020,9 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 	if (sin->sin_family != AF_INET)
 		return -EINVAL;
 
-	if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
-		if (!tcp_sk(sk)->md5sig_info)
-			return -ENOENT;
+	if (!cmd.tcpm_key || !cmd.tcpm_keylen)
 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
 				      AF_INET);
-	}
 
 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
 		return -EINVAL;
@@ -1896,7 +1904,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	/* Clean up the MD5 key list, if any */
 	if (tp->md5sig_info) {
 		tcp_clear_md5_list(sk);
-		kfree(tp->md5sig_info);
+		kfree_rcu(tp->md5sig_info, rcu);
 		tp->md5sig_info = NULL;
 	}
 #endif
-- 
cgit v1.2.3


From 658ddaaf6694adf63f67451dec9ddeb87a7cb2d7 Mon Sep 17 00:00:00 2001
From: Shawn Lu <shawn.lu@ericsson.com>
Date: Tue, 31 Jan 2012 22:35:48 +0000
Subject: tcp: md5: RST: getting md5 key from listener

TCP RST mechanism is broken in TCP md5(RFC2385). When
connection is gone, md5 key is lost, sending RST
without md5 hash is deem to ignored by peer. This can
be a problem since RST help protocal like bgp to fast
recove from peer crash.

In most case, users of tcp md5, such as bgp and ldp,
have listener on both sides to accept connection from peer.
md5 keys for peers are saved in listening socket.

There are two cases in finding md5 key when connection is
lost:
1.Passive receive RST: The message is send to well known port,
tcp will associate it with listner. md5 key is gotten from
listener.

2.Active receive RST (no sock): The message is send to ative
side, there is no socket associated with the message. In this
case, finding listener from source port, then find md5 key from
listener.

we are not loosing sercuriy here:
packet is checked with md5 hash. No RST is generated
if md5 hash doesn't match or no md5 key can be found.

Signed-off-by: Shawn Lu <shawn.lu@ericsson.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 45 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 42 insertions(+), 3 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 567cca9b30df..90e47931e217 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -593,6 +593,10 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	struct ip_reply_arg arg;
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key *key;
+	const __u8 *hash_location = NULL;
+	unsigned char newhash[16];
+	int genhash;
+	struct sock *sk1 = NULL;
 #endif
 	struct net *net;
 
@@ -623,9 +627,36 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	arg.iov[0].iov_len  = sizeof(rep.th);
 
 #ifdef CONFIG_TCP_MD5SIG
-	key = sk ? tcp_md5_do_lookup(sk,
-				     (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
-				     AF_INET) : NULL;
+	hash_location = tcp_parse_md5sig_option(th);
+	if (!sk && hash_location) {
+		/*
+		 * active side is lost. Try to find listening socket through
+		 * source port, and then find md5 key through listening socket.
+		 * we are not loose security here:
+		 * Incoming packet is checked with md5 hash with finding key,
+		 * no RST generated if md5 hash doesn't match.
+		 */
+		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
+					     &tcp_hashinfo, ip_hdr(skb)->daddr,
+					     ntohs(th->source), inet_iif(skb));
+		/* don't send rst if it can't find key */
+		if (!sk1)
+			return;
+		rcu_read_lock();
+		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
+					&ip_hdr(skb)->saddr, AF_INET);
+		if (!key)
+			goto release_sk1;
+
+		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
+		if (genhash || memcmp(hash_location, newhash, 16) != 0)
+			goto release_sk1;
+	} else {
+		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
+					     &ip_hdr(skb)->saddr,
+					     AF_INET) : NULL;
+	}
+
 	if (key) {
 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 				   (TCPOPT_NOP << 16) |
@@ -653,6 +684,14 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 
 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
+
+#ifdef CONFIG_TCP_MD5SIG
+release_sk1:
+	if (sk1) {
+		rcu_read_unlock();
+		sock_put(sk1);
+	}
+#endif
 }
 
 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
-- 
cgit v1.2.3


From 76e21053b5bf33a07c76f99d27a74238310e3c71 Mon Sep 17 00:00:00 2001
From: "Erich E. Hoover" <ehoover@mines.edu>
Date: Wed, 8 Feb 2012 09:11:07 +0000
Subject: ipv4: Implement IP_UNICAST_IF socket option.

The IP_UNICAST_IF feature is needed by the Wine project.  This patch
implements the feature by setting the outgoing interface in a similar
fashion to that of IP_MULTICAST_IF.  A separate option is needed to
handle this feature since the existing options do not provide all of
the characteristics required by IP_UNICAST_IF, a summary is provided
below.

SO_BINDTODEVICE:
* SO_BINDTODEVICE requires administrative privileges, IP_UNICAST_IF
does not.  From reading some old mailing list articles my
understanding is that SO_BINDTODEVICE requires administrative
privileges because it can override the administrator's routing
settings.
* The SO_BINDTODEVICE option restricts both outbound and inbound
traffic, IP_UNICAST_IF only impacts outbound traffic.

IP_PKTINFO:
* Since IP_PKTINFO and IP_UNICAST_IF are independent options,
implementing IP_UNICAST_IF with IP_PKTINFO will likely break some
applications.
* Implementing IP_UNICAST_IF on top of IP_PKTINFO significantly
complicates the Wine codebase and reduces the socket performance
(doing this requires a lot of extra communication between the
"server" and "user" layers).

bind():
* bind() does not work on broadcast packets, IP_UNICAST_IF is
specifically intended to work with broadcast packets.
* Like SO_BINDTODEVICE, bind() restricts both outbound and inbound
traffic.

Signed-off-by: Erich E. Hoover <ehoover@mines.edu>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 33 +++++++++++++++++++++++++++++++++
 net/ipv4/ping.c        |  3 ++-
 net/ipv4/raw.c         |  3 ++-
 net/ipv4/udp.c         |  3 ++-
 4 files changed, 39 insertions(+), 3 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8aa87c19fa00..9125529dab95 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -469,6 +469,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			     (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
 			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
 			     (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
+	    optname == IP_UNICAST_IF ||
 	    optname == IP_MULTICAST_TTL ||
 	    optname == IP_MULTICAST_ALL ||
 	    optname == IP_MULTICAST_LOOP ||
@@ -628,6 +629,35 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			goto e_inval;
 		inet->mc_loop = !!val;
 		break;
+	case IP_UNICAST_IF:
+	{
+		struct net_device *dev = NULL;
+		int ifindex;
+
+		if (optlen != sizeof(int))
+			goto e_inval;
+
+		ifindex = (__force int)ntohl((__force __be32)val);
+		if (ifindex == 0) {
+			inet->uc_index = 0;
+			err = 0;
+			break;
+		}
+
+		dev = dev_get_by_index(sock_net(sk), ifindex);
+		err = -EADDRNOTAVAIL;
+		if (!dev)
+			break;
+		dev_put(dev);
+
+		err = -EINVAL;
+		if (sk->sk_bound_dev_if)
+			break;
+
+		inet->uc_index = ifindex;
+		err = 0;
+		break;
+	}
 	case IP_MULTICAST_IF:
 	{
 		struct ip_mreqn mreq;
@@ -1178,6 +1208,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	case IP_MULTICAST_LOOP:
 		val = inet->mc_loop;
 		break;
+	case IP_UNICAST_IF:
+		val = (__force int)htonl((__u32) inet->uc_index);
+		break;
 	case IP_MULTICAST_IF:
 	{
 		struct in_addr addr;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index aea5a199c37a..cfc82cf339f6 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -556,7 +556,8 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			ipc.oif = inet->mc_index;
 		if (!saddr)
 			saddr = inet->mc_addr;
-	}
+	} else if (!ipc.oif)
+		ipc.oif = inet->uc_index;
 
 	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
 			   RT_SCOPE_UNIVERSE, sk->sk_protocol,
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 3ccda5ae8a27..ab466305b629 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -563,7 +563,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			ipc.oif = inet->mc_index;
 		if (!saddr)
 			saddr = inet->mc_addr;
-	}
+	} else if (!ipc.oif)
+		ipc.oif = inet->uc_index;
 
 	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
 			   RT_SCOPE_UNIVERSE,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5d075b5f70fc..cd99f1a0f59f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -917,7 +917,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		if (!saddr)
 			saddr = inet->mc_addr;
 		connected = 0;
-	}
+	} else if (!ipc.oif)
+		ipc.oif = inet->uc_index;
 
 	if (connected)
 		rt = (struct rtable *)sk_dst_check(sk, 0);
-- 
cgit v1.2.3


From 4c507d2897bd9be810b3403ade73b04cf6fdfd4a Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 9 Feb 2012 09:35:49 +0000
Subject: net: implement IP_RECVTOS for IP_PKTOPTIONS

Currently, it is not easily possible to get TOS/DSCP value of packets from
an incoming TCP stream. The mechanism is there, IP_PKTOPTIONS getsockopt
with IP_RECVTOS set, the same way as incoming TTL can be queried. This is
not actually implemented for TOS, though.

This patch adds this functionality, both for IPv4 (IP_PKTOPTIONS) and IPv6
(IPV6_2292PKTOPTIONS). For IPv4, like in the IP_RECVTTL case, the value of
the TOS field is stored from the other party's ACK.

This is needed for proxies which require DSCP transparency. One such example
is at http://zph.bratcheda.org/.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c     | 1 +
 net/ipv4/ip_sockglue.c | 4 ++++
 net/ipv4/tcp_ipv4.c    | 1 +
 3 files changed, 6 insertions(+)

(limited to 'net/ipv4')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f7b5670744f0..e588a34e85c2 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -381,6 +381,7 @@ lookup_protocol:
 	inet->mc_all	= 1;
 	inet->mc_index	= 0;
 	inet->mc_list	= NULL;
+	inet->rcv_tos	= 0;
 
 	sk_refcnt_debug_inc(sk);
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 9125529dab95..ca50d9f9f8c1 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1289,6 +1289,10 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 			int hlim = inet->mc_ttl;
 			put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
 		}
+		if (inet->cmsg_flags & IP_CMSG_TOS) {
+			int tos = inet->rcv_tos;
+			put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
+		}
 		len -= msg.msg_controllen;
 		return put_user(len, optlen);
 	}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4d6f81c818dc..94abee8cf563 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1463,6 +1463,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	ireq->opt	      = NULL;
 	newinet->mc_index     = inet_iif(skb);
 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
+	newinet->rcv_tos      = ip_hdr(skb)->tos;
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
 	if (inet_opt)
 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
-- 
cgit v1.2.3


From f2cedb63df14342ad40a8b5b324fc5d94a60b665 Mon Sep 17 00:00:00 2001
From: Danny Kukawka <danny.kukawka@bisect.de>
Date: Wed, 15 Feb 2012 06:45:39 +0000
Subject: net: replace random_ether_addr() with eth_hw_addr_random()

Replace usage of random_ether_addr() with eth_hw_addr_random()
to set addr_assign_type correctly to NET_ADDR_RANDOM.

Change the trivial cases.

v2: adapt to renamed eth_hw_addr_random()

Signed-off-by: Danny Kukawka <danny.kukawka@bisect.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0286d78c589c..b59414a0c1ee 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1536,7 +1536,7 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nla
 		return -EEXIST;
 
 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
-		random_ether_addr(dev->dev_addr);
+		eth_hw_addr_random(dev);
 
 	mtu = ipgre_tunnel_bind_dev(dev);
 	if (!tb[IFLA_MTU])
-- 
cgit v1.2.3


From 80703d265b7e8a801560d907b1bfe340e574dbca Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 15 Feb 2012 17:48:35 -0500
Subject: ipv4: Eliminate spurious argument to __ipv4_neigh_lookup

'tbl' is always arp_tbl, so specifying it is pointless.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4eeb8ce856e2..0489cedc1671 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1127,7 +1127,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const vo
 	else if (rt->rt_gateway)
 		pkey = (const __be32 *) &rt->rt_gateway;
 
-	n = __ipv4_neigh_lookup(&arp_tbl, dev, *(__force u32 *)pkey);
+	n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
 	if (n)
 		return n;
 	return neigh_create(&arp_tbl, pkey, dev);
-- 
cgit v1.2.3


From 3f518bf745cbd6007d8069100fb9cb09e960c872 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 21 Feb 2012 07:30:58 +0000
Subject: datagram: Add offset argument to __skb_recv_datagram

This one is only considered for MSG_PEEK flag and the value pointed by
it specifies where to start peeking bytes from. If the offset happens to
point into the middle of the returned skb, the offset within this skb is
put back to this very argument.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cd99f1a0f59f..7c41ab84e72e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1167,7 +1167,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
 	struct sk_buff *skb;
 	unsigned int ulen, copied;
-	int peeked;
+	int peeked, off = 0;
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
 	bool slow;
@@ -1183,7 +1183,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 try_again:
 	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
-				  &peeked, &err);
+				  &peeked, &off, &err);
 	if (!skb)
 		goto out;
 
-- 
cgit v1.2.3


From 80d326fab534a5380e8f6e509a0b9076655a9670 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 24 Feb 2012 14:30:15 +0000
Subject: netlink: add netlink_dump_control structure for netlink_dump_start()

Davem considers that the argument list of this interface is getting
out of control. This patch tries to address this issue following
his proposal:

struct netlink_dump_control c = { .dump = dump, .done = done, ... };

netlink_dump_start(..., &c);

Suggested by David S. Miller.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_diag.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index fcf281819cd4..8d25a1c557eb 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -960,9 +960,12 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 			    inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
 				return -EINVAL;
 		}
-
-		return netlink_dump_start(sock_diag_nlsk, skb, nlh,
-					  inet_diag_dump_compat, NULL, 0);
+		{
+			struct netlink_dump_control c = {
+				.dump = inet_diag_dump_compat,
+			};
+			return netlink_dump_start(sock_diag_nlsk, skb, nlh, &c);
+		}
 	}
 
 	return inet_diag_get_exact_compat(skb, nlh);
@@ -985,9 +988,12 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 			    inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
 				return -EINVAL;
 		}
-
-		return netlink_dump_start(sock_diag_nlsk, skb, h,
-					  inet_diag_dump, NULL, 0);
+		{
+			struct netlink_dump_control c = {
+				.dump = inet_diag_dump,
+			};
+			return netlink_dump_start(sock_diag_nlsk, skb, h, &c);
+		}
 	}
 
 	return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h));
-- 
cgit v1.2.3


From 544d5c7d9f4d1ec4f170bc5bcc522012cb7704bc Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 5 Feb 2012 03:44:51 +0100
Subject: netfilter: ctnetlink: allow to set expectfn for expectations

This patch allows you to set expectfn which is specifically used
by the NAT side of most of the existing conntrack helpers.

I have added a symbol map that uses a string as key to look up for
the function that is attached to the expectation object. This is
the best solution I came out with to solve this issue.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_core.c |  8 ++++++++
 net/ipv4/netfilter/nf_nat_h323.c | 14 ++++++++++++++
 net/ipv4/netfilter/nf_nat_sip.c  |  7 +++++++
 3 files changed, 29 insertions(+)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index a708933dc230..abb52adf5acd 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -686,6 +686,11 @@ static struct pernet_operations nf_nat_net_ops = {
 	.exit = nf_nat_net_exit,
 };
 
+static struct nf_ct_helper_expectfn follow_master_nat = {
+	.name		= "nat-follow-master",
+	.expectfn	= nf_nat_follow_master,
+};
+
 static int __init nf_nat_init(void)
 {
 	size_t i;
@@ -717,6 +722,8 @@ static int __init nf_nat_init(void)
 
 	l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
 
+	nf_ct_helper_expectfn_register(&follow_master_nat);
+
 	BUG_ON(nf_nat_seq_adjust_hook != NULL);
 	RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
 	BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
@@ -736,6 +743,7 @@ static void __exit nf_nat_cleanup(void)
 	unregister_pernet_subsys(&nf_nat_net_ops);
 	nf_ct_l3proto_put(l3proto);
 	nf_ct_extend_unregister(&nat_extend);
+	nf_ct_helper_expectfn_unregister(&follow_master_nat);
 	RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL);
 	RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
 	RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index dc1dd912baf4..82536701e3a3 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -568,6 +568,16 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
 	return 0;
 }
 
+static struct nf_ct_helper_expectfn q931_nat = {
+	.name		= "Q.931",
+	.expectfn	= ip_nat_q931_expect,
+};
+
+static struct nf_ct_helper_expectfn callforwarding_nat = {
+	.name		= "callforwarding",
+	.expectfn	= ip_nat_callforwarding_expect,
+};
+
 /****************************************************************************/
 static int __init init(void)
 {
@@ -590,6 +600,8 @@ static int __init init(void)
 	RCU_INIT_POINTER(nat_h245_hook, nat_h245);
 	RCU_INIT_POINTER(nat_callforwarding_hook, nat_callforwarding);
 	RCU_INIT_POINTER(nat_q931_hook, nat_q931);
+	nf_ct_helper_expectfn_register(&q931_nat);
+	nf_ct_helper_expectfn_register(&callforwarding_nat);
 	return 0;
 }
 
@@ -605,6 +617,8 @@ static void __exit fini(void)
 	RCU_INIT_POINTER(nat_h245_hook, NULL);
 	RCU_INIT_POINTER(nat_callforwarding_hook, NULL);
 	RCU_INIT_POINTER(nat_q931_hook, NULL);
+	nf_ct_helper_expectfn_unregister(&q931_nat);
+	nf_ct_helper_expectfn_unregister(&callforwarding_nat);
 	synchronize_rcu();
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index d0319f96269f..57932c43960e 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -526,6 +526,11 @@ err1:
 	return NF_DROP;
 }
 
+static struct nf_ct_helper_expectfn sip_nat = {
+        .name           = "sip",
+        .expectfn       = ip_nat_sip_expected,
+};
+
 static void __exit nf_nat_sip_fini(void)
 {
 	RCU_INIT_POINTER(nf_nat_sip_hook, NULL);
@@ -535,6 +540,7 @@ static void __exit nf_nat_sip_fini(void)
 	RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL);
 	RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL);
 	RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL);
+	nf_ct_helper_expectfn_unregister(&sip_nat);
 	synchronize_rcu();
 }
 
@@ -554,6 +560,7 @@ static int __init nf_nat_sip_init(void)
 	RCU_INIT_POINTER(nf_nat_sdp_port_hook, ip_nat_sdp_port);
 	RCU_INIT_POINTER(nf_nat_sdp_session_hook, ip_nat_sdp_session);
 	RCU_INIT_POINTER(nf_nat_sdp_media_hook, ip_nat_sdp_media);
+	nf_ct_helper_expectfn_register(&sip_nat);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 6939c33a757bd006c5e0b8b5fd429fc587a4d0f4 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Fri, 10 Feb 2012 23:10:52 +0100
Subject: netfilter: merge ipt_LOG and ip6_LOG into xt_LOG

ipt_LOG and ip6_LOG have a lot of common code, merge them
to reduce duplicate code.

Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/Kconfig   |   9 -
 net/ipv4/netfilter/Makefile  |   1 -
 net/ipv4/netfilter/ipt_LOG.c | 516 -------------------------------------------
 3 files changed, 526 deletions(-)
 delete mode 100644 net/ipv4/netfilter/ipt_LOG.c

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 74dfc9e5211f..fcc543cd987a 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -123,15 +123,6 @@ config IP_NF_TARGET_REJECT
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_TARGET_LOG
-	tristate "LOG target support"
-	default m if NETFILTER_ADVANCED=n
-	help
-	  This option adds a `LOG' target, which allows you to create rules in
-	  any iptables table which records the packet header to the syslog.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_TARGET_ULOG
 	tristate "ULOG target support"
 	default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 213a462b739b..240b68469a7a 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -54,7 +54,6 @@ obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o
 # targets
 obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
 obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
-obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
 obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
 obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
 obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
deleted file mode 100644
index d76d6c9ed946..000000000000
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ /dev/null
@@ -1,516 +0,0 @@
-/*
- * This is a module which is used for logging packets.
- */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/skbuff.h>
-#include <linux/if_arp.h>
-#include <linux/ip.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/tcp.h>
-#include <net/route.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ipt_LOG.h>
-#include <net/netfilter/nf_log.h>
-#include <net/netfilter/xt_log.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog");
-
-/* One level of recursion won't kill us */
-static void dump_packet(struct sbuff *m,
-			const struct nf_loginfo *info,
-			const struct sk_buff *skb,
-			unsigned int iphoff)
-{
-	struct iphdr _iph;
-	const struct iphdr *ih;
-	unsigned int logflags;
-
-	if (info->type == NF_LOG_TYPE_LOG)
-		logflags = info->u.log.logflags;
-	else
-		logflags = NF_LOG_MASK;
-
-	ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
-	if (ih == NULL) {
-		sb_add(m, "TRUNCATED");
-		return;
-	}
-
-	/* Important fields:
-	 * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
-	/* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
-	sb_add(m, "SRC=%pI4 DST=%pI4 ",
-	       &ih->saddr, &ih->daddr);
-
-	/* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
-	sb_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
-	       ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
-	       ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
-
-	/* Max length: 6 "CE DF MF " */
-	if (ntohs(ih->frag_off) & IP_CE)
-		sb_add(m, "CE ");
-	if (ntohs(ih->frag_off) & IP_DF)
-		sb_add(m, "DF ");
-	if (ntohs(ih->frag_off) & IP_MF)
-		sb_add(m, "MF ");
-
-	/* Max length: 11 "FRAG:65535 " */
-	if (ntohs(ih->frag_off) & IP_OFFSET)
-		sb_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
-
-	if ((logflags & IPT_LOG_IPOPT) &&
-	    ih->ihl * 4 > sizeof(struct iphdr)) {
-		const unsigned char *op;
-		unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
-		unsigned int i, optsize;
-
-		optsize = ih->ihl * 4 - sizeof(struct iphdr);
-		op = skb_header_pointer(skb, iphoff+sizeof(_iph),
-					optsize, _opt);
-		if (op == NULL) {
-			sb_add(m, "TRUNCATED");
-			return;
-		}
-
-		/* Max length: 127 "OPT (" 15*4*2chars ") " */
-		sb_add(m, "OPT (");
-		for (i = 0; i < optsize; i++)
-			sb_add(m, "%02X", op[i]);
-		sb_add(m, ") ");
-	}
-
-	switch (ih->protocol) {
-	case IPPROTO_TCP: {
-		struct tcphdr _tcph;
-		const struct tcphdr *th;
-
-		/* Max length: 10 "PROTO=TCP " */
-		sb_add(m, "PROTO=TCP ");
-
-		if (ntohs(ih->frag_off) & IP_OFFSET)
-			break;
-
-		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
-		th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
-					sizeof(_tcph), &_tcph);
-		if (th == NULL) {
-			sb_add(m, "INCOMPLETE [%u bytes] ",
-			       skb->len - iphoff - ih->ihl*4);
-			break;
-		}
-
-		/* Max length: 20 "SPT=65535 DPT=65535 " */
-		sb_add(m, "SPT=%u DPT=%u ",
-		       ntohs(th->source), ntohs(th->dest));
-		/* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
-		if (logflags & IPT_LOG_TCPSEQ)
-			sb_add(m, "SEQ=%u ACK=%u ",
-			       ntohl(th->seq), ntohl(th->ack_seq));
-		/* Max length: 13 "WINDOW=65535 " */
-		sb_add(m, "WINDOW=%u ", ntohs(th->window));
-		/* Max length: 9 "RES=0x3F " */
-		sb_add(m, "RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
-		/* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
-		if (th->cwr)
-			sb_add(m, "CWR ");
-		if (th->ece)
-			sb_add(m, "ECE ");
-		if (th->urg)
-			sb_add(m, "URG ");
-		if (th->ack)
-			sb_add(m, "ACK ");
-		if (th->psh)
-			sb_add(m, "PSH ");
-		if (th->rst)
-			sb_add(m, "RST ");
-		if (th->syn)
-			sb_add(m, "SYN ");
-		if (th->fin)
-			sb_add(m, "FIN ");
-		/* Max length: 11 "URGP=65535 " */
-		sb_add(m, "URGP=%u ", ntohs(th->urg_ptr));
-
-		if ((logflags & IPT_LOG_TCPOPT) &&
-		    th->doff * 4 > sizeof(struct tcphdr)) {
-			unsigned char _opt[4 * 15 - sizeof(struct tcphdr)];
-			const unsigned char *op;
-			unsigned int i, optsize;
-
-			optsize = th->doff * 4 - sizeof(struct tcphdr);
-			op = skb_header_pointer(skb,
-						iphoff+ih->ihl*4+sizeof(_tcph),
-						optsize, _opt);
-			if (op == NULL) {
-				sb_add(m, "TRUNCATED");
-				return;
-			}
-
-			/* Max length: 127 "OPT (" 15*4*2chars ") " */
-			sb_add(m, "OPT (");
-			for (i = 0; i < optsize; i++)
-				sb_add(m, "%02X", op[i]);
-			sb_add(m, ") ");
-		}
-		break;
-	}
-	case IPPROTO_UDP:
-	case IPPROTO_UDPLITE: {
-		struct udphdr _udph;
-		const struct udphdr *uh;
-
-		if (ih->protocol == IPPROTO_UDP)
-			/* Max length: 10 "PROTO=UDP "     */
-			sb_add(m, "PROTO=UDP " );
-		else	/* Max length: 14 "PROTO=UDPLITE " */
-			sb_add(m, "PROTO=UDPLITE ");
-
-		if (ntohs(ih->frag_off) & IP_OFFSET)
-			break;
-
-		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
-		uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
-					sizeof(_udph), &_udph);
-		if (uh == NULL) {
-			sb_add(m, "INCOMPLETE [%u bytes] ",
-			       skb->len - iphoff - ih->ihl*4);
-			break;
-		}
-
-		/* Max length: 20 "SPT=65535 DPT=65535 " */
-		sb_add(m, "SPT=%u DPT=%u LEN=%u ",
-		       ntohs(uh->source), ntohs(uh->dest),
-		       ntohs(uh->len));
-		break;
-	}
-	case IPPROTO_ICMP: {
-		struct icmphdr _icmph;
-		const struct icmphdr *ich;
-		static const size_t required_len[NR_ICMP_TYPES+1]
-			= { [ICMP_ECHOREPLY] = 4,
-			    [ICMP_DEST_UNREACH]
-			    = 8 + sizeof(struct iphdr),
-			    [ICMP_SOURCE_QUENCH]
-			    = 8 + sizeof(struct iphdr),
-			    [ICMP_REDIRECT]
-			    = 8 + sizeof(struct iphdr),
-			    [ICMP_ECHO] = 4,
-			    [ICMP_TIME_EXCEEDED]
-			    = 8 + sizeof(struct iphdr),
-			    [ICMP_PARAMETERPROB]
-			    = 8 + sizeof(struct iphdr),
-			    [ICMP_TIMESTAMP] = 20,
-			    [ICMP_TIMESTAMPREPLY] = 20,
-			    [ICMP_ADDRESS] = 12,
-			    [ICMP_ADDRESSREPLY] = 12 };
-
-		/* Max length: 11 "PROTO=ICMP " */
-		sb_add(m, "PROTO=ICMP ");
-
-		if (ntohs(ih->frag_off) & IP_OFFSET)
-			break;
-
-		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
-		ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
-					 sizeof(_icmph), &_icmph);
-		if (ich == NULL) {
-			sb_add(m, "INCOMPLETE [%u bytes] ",
-			       skb->len - iphoff - ih->ihl*4);
-			break;
-		}
-
-		/* Max length: 18 "TYPE=255 CODE=255 " */
-		sb_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code);
-
-		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
-		if (ich->type <= NR_ICMP_TYPES &&
-		    required_len[ich->type] &&
-		    skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
-			sb_add(m, "INCOMPLETE [%u bytes] ",
-			       skb->len - iphoff - ih->ihl*4);
-			break;
-		}
-
-		switch (ich->type) {
-		case ICMP_ECHOREPLY:
-		case ICMP_ECHO:
-			/* Max length: 19 "ID=65535 SEQ=65535 " */
-			sb_add(m, "ID=%u SEQ=%u ",
-			       ntohs(ich->un.echo.id),
-			       ntohs(ich->un.echo.sequence));
-			break;
-
-		case ICMP_PARAMETERPROB:
-			/* Max length: 14 "PARAMETER=255 " */
-			sb_add(m, "PARAMETER=%u ",
-			       ntohl(ich->un.gateway) >> 24);
-			break;
-		case ICMP_REDIRECT:
-			/* Max length: 24 "GATEWAY=255.255.255.255 " */
-			sb_add(m, "GATEWAY=%pI4 ", &ich->un.gateway);
-			/* Fall through */
-		case ICMP_DEST_UNREACH:
-		case ICMP_SOURCE_QUENCH:
-		case ICMP_TIME_EXCEEDED:
-			/* Max length: 3+maxlen */
-			if (!iphoff) { /* Only recurse once. */
-				sb_add(m, "[");
-				dump_packet(m, info, skb,
-					    iphoff + ih->ihl*4+sizeof(_icmph));
-				sb_add(m, "] ");
-			}
-
-			/* Max length: 10 "MTU=65535 " */
-			if (ich->type == ICMP_DEST_UNREACH &&
-			    ich->code == ICMP_FRAG_NEEDED)
-				sb_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu));
-		}
-		break;
-	}
-	/* Max Length */
-	case IPPROTO_AH: {
-		struct ip_auth_hdr _ahdr;
-		const struct ip_auth_hdr *ah;
-
-		if (ntohs(ih->frag_off) & IP_OFFSET)
-			break;
-
-		/* Max length: 9 "PROTO=AH " */
-		sb_add(m, "PROTO=AH ");
-
-		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
-		ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
-					sizeof(_ahdr), &_ahdr);
-		if (ah == NULL) {
-			sb_add(m, "INCOMPLETE [%u bytes] ",
-			       skb->len - iphoff - ih->ihl*4);
-			break;
-		}
-
-		/* Length: 15 "SPI=0xF1234567 " */
-		sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
-		break;
-	}
-	case IPPROTO_ESP: {
-		struct ip_esp_hdr _esph;
-		const struct ip_esp_hdr *eh;
-
-		/* Max length: 10 "PROTO=ESP " */
-		sb_add(m, "PROTO=ESP ");
-
-		if (ntohs(ih->frag_off) & IP_OFFSET)
-			break;
-
-		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
-		eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
-					sizeof(_esph), &_esph);
-		if (eh == NULL) {
-			sb_add(m, "INCOMPLETE [%u bytes] ",
-			       skb->len - iphoff - ih->ihl*4);
-			break;
-		}
-
-		/* Length: 15 "SPI=0xF1234567 " */
-		sb_add(m, "SPI=0x%x ", ntohl(eh->spi));
-		break;
-	}
-	/* Max length: 10 "PROTO 255 " */
-	default:
-		sb_add(m, "PROTO=%u ", ih->protocol);
-	}
-
-	/* Max length: 15 "UID=4294967295 " */
-	if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
-		read_lock_bh(&skb->sk->sk_callback_lock);
-		if (skb->sk->sk_socket && skb->sk->sk_socket->file)
-			sb_add(m, "UID=%u GID=%u ",
-				skb->sk->sk_socket->file->f_cred->fsuid,
-				skb->sk->sk_socket->file->f_cred->fsgid);
-		read_unlock_bh(&skb->sk->sk_callback_lock);
-	}
-
-	/* Max length: 16 "MARK=0xFFFFFFFF " */
-	if (!iphoff && skb->mark)
-		sb_add(m, "MARK=0x%x ", skb->mark);
-
-	/* Proto    Max log string length */
-	/* IP:      40+46+6+11+127 = 230 */
-	/* TCP:     10+max(25,20+30+13+9+32+11+127) = 252 */
-	/* UDP:     10+max(25,20) = 35 */
-	/* UDPLITE: 14+max(25,20) = 39 */
-	/* ICMP:    11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
-	/* ESP:     10+max(25)+15 = 50 */
-	/* AH:      9+max(25)+15 = 49 */
-	/* unknown: 10 */
-
-	/* (ICMP allows recursion one level deep) */
-	/* maxlen =  IP + ICMP +  IP + max(TCP,UDP,ICMP,unknown) */
-	/* maxlen = 230+   91  + 230 + 252 = 803 */
-}
-
-static void dump_mac_header(struct sbuff *m,
-			    const struct nf_loginfo *info,
-			    const struct sk_buff *skb)
-{
-	struct net_device *dev = skb->dev;
-	unsigned int logflags = 0;
-
-	if (info->type == NF_LOG_TYPE_LOG)
-		logflags = info->u.log.logflags;
-
-	if (!(logflags & IPT_LOG_MACDECODE))
-		goto fallback;
-
-	switch (dev->type) {
-	case ARPHRD_ETHER:
-		sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
-		       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
-		       ntohs(eth_hdr(skb)->h_proto));
-		return;
-	default:
-		break;
-	}
-
-fallback:
-	sb_add(m, "MAC=");
-	if (dev->hard_header_len &&
-	    skb->mac_header != skb->network_header) {
-		const unsigned char *p = skb_mac_header(skb);
-		unsigned int i;
-
-		sb_add(m, "%02x", *p++);
-		for (i = 1; i < dev->hard_header_len; i++, p++)
-			sb_add(m, ":%02x", *p);
-	}
-	sb_add(m, " ");
-}
-
-static struct nf_loginfo default_loginfo = {
-	.type	= NF_LOG_TYPE_LOG,
-	.u = {
-		.log = {
-			.level    = 5,
-			.logflags = NF_LOG_MASK,
-		},
-	},
-};
-
-static void
-ipt_log_packet(u_int8_t pf,
-	       unsigned int hooknum,
-	       const struct sk_buff *skb,
-	       const struct net_device *in,
-	       const struct net_device *out,
-	       const struct nf_loginfo *loginfo,
-	       const char *prefix)
-{
-	struct sbuff *m = sb_open();
-
-	if (!loginfo)
-		loginfo = &default_loginfo;
-
-	sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
-	       prefix,
-	       in ? in->name : "",
-	       out ? out->name : "");
-#ifdef CONFIG_BRIDGE_NETFILTER
-	if (skb->nf_bridge) {
-		const struct net_device *physindev;
-		const struct net_device *physoutdev;
-
-		physindev = skb->nf_bridge->physindev;
-		if (physindev && in != physindev)
-			sb_add(m, "PHYSIN=%s ", physindev->name);
-		physoutdev = skb->nf_bridge->physoutdev;
-		if (physoutdev && out != physoutdev)
-			sb_add(m, "PHYSOUT=%s ", physoutdev->name);
-	}
-#endif
-
-	if (in != NULL)
-		dump_mac_header(m, loginfo, skb);
-
-	dump_packet(m, loginfo, skb, 0);
-
-	sb_close(m);
-}
-
-static unsigned int
-log_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	const struct ipt_log_info *loginfo = par->targinfo;
-	struct nf_loginfo li;
-
-	li.type = NF_LOG_TYPE_LOG;
-	li.u.log.level = loginfo->level;
-	li.u.log.logflags = loginfo->logflags;
-
-	ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, par->out, &li,
-		       loginfo->prefix);
-	return XT_CONTINUE;
-}
-
-static int log_tg_check(const struct xt_tgchk_param *par)
-{
-	const struct ipt_log_info *loginfo = par->targinfo;
-
-	if (loginfo->level >= 8) {
-		pr_debug("level %u >= 8\n", loginfo->level);
-		return -EINVAL;
-	}
-	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
-		pr_debug("prefix is not null-terminated\n");
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static struct xt_target log_tg_reg __read_mostly = {
-	.name		= "LOG",
-	.family		= NFPROTO_IPV4,
-	.target		= log_tg,
-	.targetsize	= sizeof(struct ipt_log_info),
-	.checkentry	= log_tg_check,
-	.me		= THIS_MODULE,
-};
-
-static struct nf_logger ipt_log_logger __read_mostly = {
-	.name		= "ipt_LOG",
-	.logfn		= &ipt_log_packet,
-	.me		= THIS_MODULE,
-};
-
-static int __init log_tg_init(void)
-{
-	int ret;
-
-	ret = xt_register_target(&log_tg_reg);
-	if (ret < 0)
-		return ret;
-	nf_log_register(NFPROTO_IPV4, &ipt_log_logger);
-	return 0;
-}
-
-static void __exit log_tg_exit(void)
-{
-	nf_log_unregister(&ipt_log_logger);
-	xt_unregister_target(&log_tg_reg);
-}
-
-module_init(log_tg_init);
-module_exit(log_tg_exit);
-- 
cgit v1.2.3


From 2c8503f55fbdfbeff4164f133df804cf4d316290 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 28 Feb 2012 18:23:31 +0100
Subject: netfilter: nf_conntrack: pass timeout array to l4->new and l4->packet

This patch defines a new interface for l4 protocol trackers:

unsigned int *(*get_timeouts)(struct net *net);

that is used to return the array of unsigned int that contains
the timeouts that will be applied for this flow. This is passed
to the l4proto->new(...) and l4proto->packet(...) functions to
specify the timeout policy.

This interface allows per-net global timeout configuration
(although only DCCP supports this by now) and it will allow
custom custom timeout configuration by means of follow-up
patches.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index ab5b27a2916f..6b801124b31f 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -75,25 +75,31 @@ static int icmp_print_tuple(struct seq_file *s,
 			  ntohs(tuple->src.u.icmp.id));
 }
 
+static unsigned int *icmp_get_timeouts(struct net *net)
+{
+	return &nf_ct_icmp_timeout;
+}
+
 /* Returns verdict for packet, or -1 for invalid. */
 static int icmp_packet(struct nf_conn *ct,
 		       const struct sk_buff *skb,
 		       unsigned int dataoff,
 		       enum ip_conntrack_info ctinfo,
 		       u_int8_t pf,
-		       unsigned int hooknum)
+		       unsigned int hooknum,
+		       unsigned int *timeout)
 {
 	/* Do not immediately delete the connection after the first
 	   successful reply to avoid excessive conntrackd traffic
 	   and also to handle correctly ICMP echo reply duplicates. */
-	nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
+	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
 
 	return NF_ACCEPT;
 }
 
 /* Called when a new connection for this protocol found. */
 static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
-		     unsigned int dataoff)
+		     unsigned int dataoff, unsigned int *timeouts)
 {
 	static const u_int8_t valid_new[] = {
 		[ICMP_ECHO] = 1,
@@ -298,6 +304,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
 	.invert_tuple		= icmp_invert_tuple,
 	.print_tuple		= icmp_print_tuple,
 	.packet			= icmp_packet,
+	.get_timeouts		= icmp_get_timeouts,
 	.new			= icmp_new,
 	.error			= icmp_error,
 	.destroy		= NULL,
-- 
cgit v1.2.3


From 50978462300f74dc48aea4a38471cb69bdf741a5 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 28 Feb 2012 19:13:48 +0100
Subject: netfilter: add cttimeout infrastructure for fine timeout tuning

This patch adds the infrastructure to add fine timeout tuning
over nfnetlink. Now you can use the NFNL_SUBSYS_CTNETLINK_TIMEOUT
subsystem to create/delete/dump timeout objects that contain some
specific timeout policy for one flow.

The follow up patches will allow you attach timeout policy object
to conntrack via the CT target and the conntrack extension
infrastructure.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 47 ++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 6b801124b31f..7cbe9cb261c2 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -269,6 +269,44 @@ static int icmp_nlattr_tuple_size(void)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data)
+{
+	unsigned int *timeout = data;
+
+	if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
+		*timeout =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
+	} else {
+		/* Set default ICMP timeout. */
+		*timeout = nf_ct_icmp_timeout;
+	}
+	return 0;
+}
+
+static int
+icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+	const unsigned int *timeout = data;
+
+	NLA_PUT_BE32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ));
+
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static const struct nla_policy
+icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
+	[CTA_TIMEOUT_ICMP_TIMEOUT]	= { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+
 #ifdef CONFIG_SYSCTL
 static struct ctl_table_header *icmp_sysctl_header;
 static struct ctl_table icmp_sysctl_table[] = {
@@ -315,6 +353,15 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
 	.nlattr_to_tuple	= icmp_nlattr_to_tuple,
 	.nla_policy		= icmp_nla_policy,
 #endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+	.ctnl_timeout		= {
+		.nlattr_to_obj	= icmp_timeout_nlattr_to_obj,
+		.obj_to_nlattr	= icmp_timeout_obj_to_nlattr,
+		.nlattr_max	= CTA_TIMEOUT_ICMP_MAX,
+		.obj_size	= sizeof(unsigned int),
+		.nla_policy	= icmp_timeout_nla_policy,
+	},
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
 #ifdef CONFIG_SYSCTL
 	.ctl_table_header	= &icmp_sysctl_header,
 	.ctl_table		= icmp_sysctl_table,
-- 
cgit v1.2.3


From b4fb05ea402cb6930b40d3152d8acabc391b23e2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 7 Mar 2012 04:45:43 +0000
Subject: tcp: md5: correct a RCU lockdep splat

commit a8afca0329 (tcp: md5: protects md5sig_info with RCU) added a
lockdep splat in tcp_md5_do_lookup() in case a timer fires a tcp
retransmit.

At this point, socket lock is owned by the sofirq handler, not the user,
so we should adjust a bit the lockdep condition, as we dont hold
rcu_read_lock().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Reported-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 94abee8cf563..507924b640ef 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -927,7 +927,8 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
 
 	/* caller either holds rcu_read_lock() or socket lock */
 	md5sig = rcu_dereference_check(tp->md5sig_info,
-				       sock_owned_by_user(sk));
+				       sock_owned_by_user(sk) ||
+				       lockdep_is_held(&sk->sk_lock.slock));
 	if (!md5sig)
 		return NULL;
 #if IS_ENABLED(CONFIG_IPV6)
-- 
cgit v1.2.3


From ba57b4db2624793c6eb8f2c051c9f7b8a6e7b6a6 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 7 Mar 2012 20:45:32 -0500
Subject: ipv4: Make ip_call_ra_chain() return bool.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_input.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 073a9b01c40c..70afe5bf1945 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -148,7 +148,7 @@
 /*
  *	Process Router Attention IP option (RFC 2113)
  */
-int ip_call_ra_chain(struct sk_buff *skb)
+bool ip_call_ra_chain(struct sk_buff *skb)
 {
 	struct ip_ra_chain *ra;
 	u8 protocol = ip_hdr(skb)->protocol;
@@ -167,7 +167,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
 		    net_eq(sock_net(sk), dev_net(dev))) {
 			if (ip_is_fragment(ip_hdr(skb))) {
 				if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN))
-					return 1;
+					return true;
 			}
 			if (last) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
@@ -180,9 +180,9 @@ int ip_call_ra_chain(struct sk_buff *skb)
 
 	if (last) {
 		raw_rcv(last, skb);
-		return 1;
+		return true;
 	}
-	return 0;
+	return false;
 }
 
 static int ip_local_deliver_finish(struct sk_buff *skb)
-- 
cgit v1.2.3


From 6a91395f20119d696330e2604451614a64369d1b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 7 Mar 2012 20:48:08 -0500
Subject: ipv4: Make ip_rcv_options() return bool.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_input.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 70afe5bf1945..bdaa2c5c28e4 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -265,7 +265,7 @@ int ip_local_deliver(struct sk_buff *skb)
 		       ip_local_deliver_finish);
 }
 
-static inline int ip_rcv_options(struct sk_buff *skb)
+static inline bool ip_rcv_options(struct sk_buff *skb)
 {
 	struct ip_options *opt;
 	const struct iphdr *iph;
@@ -309,9 +309,9 @@ static inline int ip_rcv_options(struct sk_buff *skb)
 			goto drop;
 	}
 
-	return 0;
+	return false;
 drop:
-	return -1;
+	return true;
 }
 
 static int ip_rcv_finish(struct sk_buff *skb)
-- 
cgit v1.2.3


From 058bd4d2a4ff0aaa4a5381c67e776729d840c785 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 11 Mar 2012 18:36:11 +0000
Subject: net: Convert printks to pr_<level>

Use a more current kernel messaging style.

Convert a printk block to print_hex_dump.
Coalesce formats, align arguments.
Use %s, __func__ instead of embedding function names.

Some messages that were prefixed with <foo>_close are
now prefixed with <foo>_fini.  Some ah4 and esp messages
are now not prefixed with "ip ".

The intent of this patch is to later add something like
  #define pr_fmt(fmt) "IPv4: " fmt.
to standardize the output messages.

Text size is trivially reduced. (x86-32 allyesconfig)

$ size net/ipv4/built-in.o*
   text	   data	    bss	    dec	    hex	filename
 887888	  31558	 249696	1169142	 11d6f6	net/ipv4/built-in.o.new
 887934	  31558	 249800	1169292	 11d78c	net/ipv4/built-in.o.old

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c       |  25 +++++------
 net/ipv4/ah4.c           |  14 +++---
 net/ipv4/esp4.c          |   8 ++--
 net/ipv4/fib_frontend.c  |   6 +--
 net/ipv4/fib_semantics.c |   2 +-
 net/ipv4/fib_trie.c      |   5 +--
 net/ipv4/icmp.c          |  19 ++++----
 net/ipv4/ip_fragment.c   |   3 +-
 net/ipv4/ip_gre.c        |   6 +--
 net/ipv4/ip_input.c      |   4 +-
 net/ipv4/ip_options.c    |   2 +-
 net/ipv4/ipcomp.c        |   8 ++--
 net/ipv4/ipconfig.c      | 110 +++++++++++++++++++++++------------------------
 net/ipv4/ipip.c          |   4 +-
 net/ipv4/ipmr.c          |   4 +-
 net/ipv4/ping.c          |  18 ++++----
 net/ipv4/raw.c           |   7 +--
 net/ipv4/route.c         |  43 ++++++++----------
 net/ipv4/tcp.c           |   7 ++-
 net/ipv4/tcp_cong.c      |   7 ++-
 net/ipv4/tcp_input.c     |   8 ++--
 net/ipv4/tcp_ipv4.c      |   8 ++--
 net/ipv4/tunnel4.c       |   8 ++--
 net/ipv4/udplite.c       |   4 +-
 net/ipv4/xfrm4_tunnel.c  |  14 +++---
 25 files changed, 163 insertions(+), 181 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e588a34e85c2..b7a946611f2f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1085,13 +1085,11 @@ out:
 	return;
 
 out_permanent:
-	printk(KERN_ERR "Attempt to override permanent protocol %d.\n",
-	       protocol);
+	pr_err("Attempt to override permanent protocol %d\n", protocol);
 	goto out;
 
 out_illegal:
-	printk(KERN_ERR
-	       "Ignoring attempt to register invalid socket type %d.\n",
+	pr_err("Ignoring attempt to register invalid socket type %d\n",
 	       p->type);
 	goto out;
 }
@@ -1100,8 +1098,7 @@ EXPORT_SYMBOL(inet_register_protosw);
 void inet_unregister_protosw(struct inet_protosw *p)
 {
 	if (INET_PROTOSW_PERMANENT & p->flags) {
-		printk(KERN_ERR
-		       "Attempt to unregister permanent protocol %d.\n",
+		pr_err("Attempt to unregister permanent protocol %d\n",
 		       p->protocol);
 	} else {
 		spin_lock_bh(&inetsw_lock);
@@ -1150,8 +1147,8 @@ static int inet_sk_reselect_saddr(struct sock *sk)
 		return 0;
 
 	if (sysctl_ip_dynaddr > 1) {
-		printk(KERN_INFO "%s(): shifting inet->saddr from %pI4 to %pI4\n",
-		       __func__, &old_saddr, &new_saddr);
+		pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
+			__func__, &old_saddr, &new_saddr);
 	}
 
 	inet->inet_saddr = inet->inet_rcv_saddr = new_saddr;
@@ -1680,14 +1677,14 @@ static int __init inet_init(void)
 	 */
 
 	if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
-		printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");
+		pr_crit("%s: Cannot add ICMP protocol\n", __func__);
 	if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
-		printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n");
+		pr_crit("%s: Cannot add UDP protocol\n", __func__);
 	if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
-		printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");
+		pr_crit("%s: Cannot add TCP protocol\n", __func__);
 #ifdef CONFIG_IP_MULTICAST
 	if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
-		printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");
+		pr_crit("%s: Cannot add IGMP protocol\n", __func__);
 #endif
 
 	/* Register the socket-side information for inet_create. */
@@ -1734,14 +1731,14 @@ static int __init inet_init(void)
 	 */
 #if defined(CONFIG_IP_MROUTE)
 	if (ip_mr_init())
-		printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n");
+		pr_crit("%s: Cannot init ipv4 mroute\n", __func__);
 #endif
 	/*
 	 *	Initialise per-cpu ipv4 mibs
 	 */
 
 	if (init_ipv4_mibs())
-		printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n");
+		pr_crit("%s: Cannot init ipv4 mibs\n", __func__);
 
 	ipv4_proc_init();
 
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 36d14406261e..b5524660c81d 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -445,9 +445,9 @@ static int ah_init_state(struct xfrm_state *x)
 
 	if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
 	    crypto_ahash_digestsize(ahash)) {
-		printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
-		       x->aalg->alg_name, crypto_ahash_digestsize(ahash),
-		       aalg_desc->uinfo.auth.icv_fullbits/8);
+		pr_info("AH: %s digestsize %u != %hu\n",
+			x->aalg->alg_name, crypto_ahash_digestsize(ahash),
+			aalg_desc->uinfo.auth.icv_fullbits/8);
 		goto error;
 	}
 
@@ -510,11 +510,11 @@ static const struct net_protocol ah4_protocol = {
 static int __init ah4_init(void)
 {
 	if (xfrm_register_type(&ah_type, AF_INET) < 0) {
-		printk(KERN_INFO "ip ah init: can't add xfrm type\n");
+		pr_info("%s: can't add xfrm type\n", __func__);
 		return -EAGAIN;
 	}
 	if (inet_add_protocol(&ah4_protocol, IPPROTO_AH) < 0) {
-		printk(KERN_INFO "ip ah init: can't add protocol\n");
+		pr_info("%s: can't add protocol\n", __func__);
 		xfrm_unregister_type(&ah_type, AF_INET);
 		return -EAGAIN;
 	}
@@ -524,9 +524,9 @@ static int __init ah4_init(void)
 static void __exit ah4_fini(void)
 {
 	if (inet_del_protocol(&ah4_protocol, IPPROTO_AH) < 0)
-		printk(KERN_INFO "ip ah close: can't remove protocol\n");
+		pr_info("%s: can't remove protocol\n", __func__);
 	if (xfrm_unregister_type(&ah_type, AF_INET) < 0)
-		printk(KERN_INFO "ip ah close: can't remove xfrm type\n");
+		pr_info("%s: can't remove xfrm type\n", __func__);
 }
 
 module_init(ah4_init);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index a5b413416da3..0a86dbe9454b 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -706,11 +706,11 @@ static const struct net_protocol esp4_protocol = {
 static int __init esp4_init(void)
 {
 	if (xfrm_register_type(&esp_type, AF_INET) < 0) {
-		printk(KERN_INFO "ip esp init: can't add xfrm type\n");
+		pr_info("%s: can't add xfrm type\n", __func__);
 		return -EAGAIN;
 	}
 	if (inet_add_protocol(&esp4_protocol, IPPROTO_ESP) < 0) {
-		printk(KERN_INFO "ip esp init: can't add protocol\n");
+		pr_info("%s: can't add protocol\n", __func__);
 		xfrm_unregister_type(&esp_type, AF_INET);
 		return -EAGAIN;
 	}
@@ -720,9 +720,9 @@ static int __init esp4_init(void)
 static void __exit esp4_fini(void)
 {
 	if (inet_del_protocol(&esp4_protocol, IPPROTO_ESP) < 0)
-		printk(KERN_INFO "ip esp close: can't remove protocol\n");
+		pr_info("%s: can't remove protocol\n", __func__);
 	if (xfrm_unregister_type(&esp_type, AF_INET) < 0)
-		printk(KERN_INFO "ip esp close: can't remove xfrm type\n");
+		pr_info("%s: can't remove xfrm type\n", __func__);
 }
 
 module_init(esp4_init);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 92fc5f69f5da..76e72bacc217 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -695,7 +695,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
 	if (ifa->ifa_flags & IFA_F_SECONDARY) {
 		prim = inet_ifa_byprefix(in_dev, prefix, mask);
 		if (prim == NULL) {
-			printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
+			pr_warn("%s: bug: prim == NULL\n", __func__);
 			return;
 		}
 	}
@@ -749,11 +749,11 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
 	if (ifa->ifa_flags & IFA_F_SECONDARY) {
 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
 		if (prim == NULL) {
-			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
+			pr_warn("%s: bug: prim == NULL\n", __func__);
 			return;
 		}
 		if (iprim && iprim != prim) {
-			printk(KERN_WARNING "fib_del_ifaddr: bug: iprim != prim\n");
+			pr_warn("%s: bug: iprim != prim\n", __func__);
 			return;
 		}
 	} else if (!ipv4_is_zeronet(any) &&
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 80106d89d548..a8c5c1d6715b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -154,7 +154,7 @@ static void free_fib_info_rcu(struct rcu_head *head)
 void free_fib_info(struct fib_info *fi)
 {
 	if (fi->fib_dead == 0) {
-		pr_warning("Freeing alive fib_info %p\n", fi);
+		pr_warn("Freeing alive fib_info %p\n", fi);
 		return;
 	}
 	change_nexthops(fi) {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2b555a5521e0..da9b9cb2282d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1170,9 +1170,8 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 	}
 
 	if (tp && tp->pos + tp->bits > 32)
-		pr_warning("fib_trie"
-			   " tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
-			   tp, tp->pos, tp->bits, key, plen);
+		pr_warn("fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
+			tp, tp->pos, tp->bits, key, plen);
 
 	/* Rebalance the trie */
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ab188ae12fd9..5c7323b7810f 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -670,7 +670,7 @@ static void icmp_unreach(struct sk_buff *skb)
 			break;
 		case ICMP_FRAG_NEEDED:
 			if (ipv4_config.no_pmtu_disc) {
-				LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: fragmentation needed and DF set.\n",
+				LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: fragmentation needed and DF set\n",
 					       &iph->daddr);
 			} else {
 				info = ip_rt_frag_needed(net, iph,
@@ -681,7 +681,7 @@ static void icmp_unreach(struct sk_buff *skb)
 			}
 			break;
 		case ICMP_SR_FAILED:
-			LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: Source Route Failed.\n",
+			LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: Source Route Failed\n",
 				       &iph->daddr);
 			break;
 		default:
@@ -713,13 +713,10 @@ static void icmp_unreach(struct sk_buff *skb)
 	if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses &&
 	    inet_addr_type(net, iph->daddr) == RTN_BROADCAST) {
 		if (net_ratelimit())
-			printk(KERN_WARNING "%pI4 sent an invalid ICMP "
-					    "type %u, code %u "
-					    "error to a broadcast: %pI4 on %s\n",
-			       &ip_hdr(skb)->saddr,
-			       icmph->type, icmph->code,
-			       &iph->daddr,
-			       skb->dev->name);
+			pr_warn("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",
+				&ip_hdr(skb)->saddr,
+				icmph->type, icmph->code,
+				&iph->daddr, skb->dev->name);
 		goto out;
 	}
 
@@ -946,8 +943,8 @@ static void icmp_address_reply(struct sk_buff *skb)
 				break;
 		}
 		if (!ifa && net_ratelimit()) {
-			printk(KERN_INFO "Wrong address mask %pI4 from %s/%pI4\n",
-			       mp, dev->name, &ip_hdr(skb)->saddr);
+			pr_info("Wrong address mask %pI4 from %s/%pI4\n",
+				mp, dev->name, &ip_hdr(skb)->saddr);
 		}
 	}
 }
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 1f23a57aa9e6..b2fd333abd59 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -643,8 +643,7 @@ out_nomem:
 	goto out_fail;
 out_oversize:
 	if (net_ratelimit())
-		printk(KERN_INFO "Oversized IP packet from %pI4.\n",
-			&qp->saddr);
+		pr_info("Oversized IP packet from %pI4\n", &qp->saddr);
 out_fail:
 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
 	return err;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 6ef66af12291..185e0a75ecec 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1716,7 +1716,7 @@ static int __init ipgre_init(void)
 {
 	int err;
 
-	printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
+	pr_info("GRE over IPv4 tunneling driver\n");
 
 	err = register_pernet_device(&ipgre_net_ops);
 	if (err < 0)
@@ -1724,7 +1724,7 @@ static int __init ipgre_init(void)
 
 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
 	if (err < 0) {
-		printk(KERN_INFO "ipgre init: can't add protocol\n");
+		pr_info("%s: can't add protocol\n", __func__);
 		goto add_proto_failed;
 	}
 
@@ -1753,7 +1753,7 @@ static void __exit ipgre_fini(void)
 	rtnl_link_unregister(&ipgre_tap_ops);
 	rtnl_link_unregister(&ipgre_link_ops);
 	if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
-		printk(KERN_INFO "ipgre close: can't remove protocol\n");
+		pr_info("%s: can't remove protocol\n", __func__);
 	unregister_pernet_device(&ipgre_net_ops);
 }
 
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index bdaa2c5c28e4..de8e9be4aac8 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -299,8 +299,8 @@ static inline bool ip_rcv_options(struct sk_buff *skb)
 			if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
 				if (IN_DEV_LOG_MARTIANS(in_dev) &&
 				    net_ratelimit())
-					printk(KERN_INFO "source route option %pI4 -> %pI4\n",
-					       &iph->saddr, &iph->daddr);
+					pr_info("source route option %pI4 -> %pI4\n",
+						&iph->saddr, &iph->daddr);
 				goto drop;
 			}
 		}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 42dd1a90edea..e25ee7c8bdde 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -577,7 +577,7 @@ void ip_forward_options(struct sk_buff *skb)
 			ip_rt_get_source(&optptr[srrptr-1], skb, rt);
 			optptr[2] = srrptr+4;
 		} else if (net_ratelimit())
-			printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
+			pr_crit("%s(): Argh! Destination lost!\n", __func__);
 		if (opt->ts_needaddr) {
 			optptr = raw + opt->ts;
 			ip_rt_get_source(&optptr[optptr[2]-9], skb, rt);
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index c857f6f49b03..63b64c45a826 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -156,11 +156,11 @@ static const struct net_protocol ipcomp4_protocol = {
 static int __init ipcomp4_init(void)
 {
 	if (xfrm_register_type(&ipcomp_type, AF_INET) < 0) {
-		printk(KERN_INFO "ipcomp init: can't add xfrm type\n");
+		pr_info("%s: can't add xfrm type\n", __func__);
 		return -EAGAIN;
 	}
 	if (inet_add_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0) {
-		printk(KERN_INFO "ipcomp init: can't add protocol\n");
+		pr_info("%s: can't add protocol\n", __func__);
 		xfrm_unregister_type(&ipcomp_type, AF_INET);
 		return -EAGAIN;
 	}
@@ -170,9 +170,9 @@ static int __init ipcomp4_init(void)
 static void __exit ipcomp4_fini(void)
 {
 	if (inet_del_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0)
-		printk(KERN_INFO "ip ipcomp close: can't remove protocol\n");
+		pr_info("%s: can't remove protocol\n", __func__);
 	if (xfrm_unregister_type(&ipcomp_type, AF_INET) < 0)
-		printk(KERN_INFO "ip ipcomp close: can't remove xfrm type\n");
+		pr_info("%s: can't remove xfrm type\n", __func__);
 }
 
 module_init(ipcomp4_init);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 6e412a60a91f..92ac7e7363a0 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -214,7 +214,7 @@ static int __init ic_open_devs(void)
 		if (!(dev->flags & IFF_LOOPBACK))
 			continue;
 		if (dev_change_flags(dev, dev->flags | IFF_UP) < 0)
-			printk(KERN_ERR "IP-Config: Failed to open %s\n", dev->name);
+			pr_err("IP-Config: Failed to open %s\n", dev->name);
 	}
 
 	for_each_netdev(&init_net, dev) {
@@ -223,7 +223,8 @@ static int __init ic_open_devs(void)
 			if (dev->mtu >= 364)
 				able |= IC_BOOTP;
 			else
-				printk(KERN_WARNING "DHCP/BOOTP: Ignoring device %s, MTU %d too small", dev->name, dev->mtu);
+				pr_warn("DHCP/BOOTP: Ignoring device %s, MTU %d too small",
+					dev->name, dev->mtu);
 			if (!(dev->flags & IFF_NOARP))
 				able |= IC_RARP;
 			able &= ic_proto_enabled;
@@ -231,7 +232,8 @@ static int __init ic_open_devs(void)
 				continue;
 			oflags = dev->flags;
 			if (dev_change_flags(dev, oflags | IFF_UP) < 0) {
-				printk(KERN_ERR "IP-Config: Failed to open %s\n", dev->name);
+				pr_err("IP-Config: Failed to open %s\n",
+				       dev->name);
 				continue;
 			}
 			if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL))) {
@@ -273,9 +275,10 @@ have_carrier:
 
 	if (!ic_first_dev) {
 		if (user_dev_name[0])
-			printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name);
+			pr_err("IP-Config: Device `%s' not found\n",
+			       user_dev_name);
 		else
-			printk(KERN_ERR "IP-Config: No network devices available.\n");
+			pr_err("IP-Config: No network devices available\n");
 		return -ENODEV;
 	}
 	return 0;
@@ -359,17 +362,20 @@ static int __init ic_setup_if(void)
 	strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name);
 	set_sockaddr(sin, ic_myaddr, 0);
 	if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) {
-		printk(KERN_ERR "IP-Config: Unable to set interface address (%d).\n", err);
+		pr_err("IP-Config: Unable to set interface address (%d)\n",
+		       err);
 		return -1;
 	}
 	set_sockaddr(sin, ic_netmask, 0);
 	if ((err = ic_devinet_ioctl(SIOCSIFNETMASK, &ir)) < 0) {
-		printk(KERN_ERR "IP-Config: Unable to set interface netmask (%d).\n", err);
+		pr_err("IP-Config: Unable to set interface netmask (%d)\n",
+		       err);
 		return -1;
 	}
 	set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0);
 	if ((err = ic_devinet_ioctl(SIOCSIFBRDADDR, &ir)) < 0) {
-		printk(KERN_ERR "IP-Config: Unable to set interface broadcast address (%d).\n", err);
+		pr_err("IP-Config: Unable to set interface broadcast address (%d)\n",
+		       err);
 		return -1;
 	}
 	/* Handle the case where we need non-standard MTU on the boot link (a network
@@ -380,8 +386,8 @@ static int __init ic_setup_if(void)
 		strcpy(ir.ifr_name, ic_dev->name);
 		ir.ifr_mtu = ic_dev_mtu;
 		if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0)
-			printk(KERN_ERR "IP-Config: Unable to set interface mtu to %d (%d).\n",
-			                 ic_dev_mtu, err);
+			pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n",
+			       ic_dev_mtu, err);
 	}
 	return 0;
 }
@@ -396,7 +402,7 @@ static int __init ic_setup_routes(void)
 
 		memset(&rm, 0, sizeof(rm));
 		if ((ic_gateway ^ ic_myaddr) & ic_netmask) {
-			printk(KERN_ERR "IP-Config: Gateway not on directly connected network.\n");
+			pr_err("IP-Config: Gateway not on directly connected network\n");
 			return -1;
 		}
 		set_sockaddr((struct sockaddr_in *) &rm.rt_dst, 0, 0);
@@ -404,7 +410,8 @@ static int __init ic_setup_routes(void)
 		set_sockaddr((struct sockaddr_in *) &rm.rt_gateway, ic_gateway, 0);
 		rm.rt_flags = RTF_UP | RTF_GATEWAY;
 		if ((err = ic_route_ioctl(SIOCADDRT, &rm)) < 0) {
-			printk(KERN_ERR "IP-Config: Cannot add default route (%d).\n", err);
+			pr_err("IP-Config: Cannot add default route (%d)\n",
+			       err);
 			return -1;
 		}
 	}
@@ -437,8 +444,8 @@ static int __init ic_defaults(void)
 		else if (IN_CLASSC(ntohl(ic_myaddr)))
 			ic_netmask = htonl(IN_CLASSC_NET);
 		else {
-			printk(KERN_ERR "IP-Config: Unable to guess netmask for address %pI4\n",
-				&ic_myaddr);
+			pr_err("IP-Config: Unable to guess netmask for address %pI4\n",
+			       &ic_myaddr);
 			return -1;
 		}
 		printk("IP-Config: Guessing netmask %pI4\n", &ic_netmask);
@@ -688,8 +695,8 @@ ic_dhcp_init_options(u8 *options)
 			e += len;
 		}
 		if (*vendor_class_identifier) {
-			printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n",
-			       vendor_class_identifier);
+			pr_info("DHCP: sending class identifier \"%s\"\n",
+				vendor_class_identifier);
 			*e++ = 60;	/* Class-identifier */
 			len = strlen(vendor_class_identifier);
 			*e++ = len;
@@ -949,8 +956,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 	/* Fragments are not supported */
 	if (ip_is_fragment(h)) {
 		if (net_ratelimit())
-			printk(KERN_ERR "DHCP/BOOTP: Ignoring fragmented "
-			       "reply.\n");
+			pr_err("DHCP/BOOTP: Ignoring fragmented reply\n");
 		goto drop;
 	}
 
@@ -999,8 +1005,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 	if (b->op != BOOTP_REPLY ||
 	    b->xid != d->xid) {
 		if (net_ratelimit())
-			printk(KERN_ERR "DHCP/BOOTP: Reply not for us, "
-			       "op[%x] xid[%x]\n",
+			pr_err("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n",
 			       b->op, b->xid);
 		goto drop_unlock;
 	}
@@ -1008,7 +1013,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 	/* Is it a reply for the device we are configuring? */
 	if (b->xid != ic_dev_xid) {
 		if (net_ratelimit())
-			printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet\n");
+			pr_err("DHCP/BOOTP: Ignoring delayed packet\n");
 		goto drop_unlock;
 	}
 
@@ -1146,17 +1151,17 @@ static int __init ic_dynamic(void)
 	 * are missing, and without DHCP/BOOTP/RARP we are unable to get it.
 	 */
 	if (!ic_proto_enabled) {
-		printk(KERN_ERR "IP-Config: Incomplete network configuration information.\n");
+		pr_err("IP-Config: Incomplete network configuration information\n");
 		return -1;
 	}
 
 #ifdef IPCONFIG_BOOTP
 	if ((ic_proto_enabled ^ ic_proto_have_if) & IC_BOOTP)
-		printk(KERN_ERR "DHCP/BOOTP: No suitable device found.\n");
+		pr_err("DHCP/BOOTP: No suitable device found\n");
 #endif
 #ifdef IPCONFIG_RARP
 	if ((ic_proto_enabled ^ ic_proto_have_if) & IC_RARP)
-		printk(KERN_ERR "RARP: No suitable device found.\n");
+		pr_err("RARP: No suitable device found\n");
 #endif
 
 	if (!ic_proto_have_if)
@@ -1183,11 +1188,11 @@ static int __init ic_dynamic(void)
 	 * [Actually we could now, but the nothing else running note still
 	 *  applies.. - AC]
 	 */
-	printk(KERN_NOTICE "Sending %s%s%s requests .",
-	       do_bootp
-		? ((ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP") : "",
-	       (do_bootp && do_rarp) ? " and " : "",
-	       do_rarp ? "RARP" : "");
+	pr_notice("Sending %s%s%s requests .",
+		  do_bootp
+		  ? ((ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP") : "",
+		  (do_bootp && do_rarp) ? " and " : "",
+		  do_rarp ? "RARP" : "");
 
 	start_jiffies = jiffies;
 	d = ic_first_dev;
@@ -1216,13 +1221,13 @@ static int __init ic_dynamic(void)
 		    (ic_proto_enabled & IC_USE_DHCP) &&
 		    ic_dhcp_msgtype != DHCPACK) {
 			ic_got_reply = 0;
-			printk(KERN_CONT ",");
+			pr_cont(",");
 			continue;
 		}
 #endif /* IPCONFIG_DHCP */
 
 		if (ic_got_reply) {
-			printk(KERN_CONT " OK\n");
+			pr_cont(" OK\n");
 			break;
 		}
 
@@ -1230,7 +1235,7 @@ static int __init ic_dynamic(void)
 			continue;
 
 		if (! --retries) {
-			printk(KERN_CONT " timed out!\n");
+			pr_cont(" timed out!\n");
 			break;
 		}
 
@@ -1240,7 +1245,7 @@ static int __init ic_dynamic(void)
 		if (timeout > CONF_TIMEOUT_MAX)
 			timeout = CONF_TIMEOUT_MAX;
 
-		printk(KERN_CONT ".");
+		pr_cont(".");
 	}
 
 #ifdef IPCONFIG_BOOTP
@@ -1260,8 +1265,8 @@ static int __init ic_dynamic(void)
 	printk("IP-Config: Got %s answer from %pI4, ",
 		((ic_got_reply & IC_RARP) ? "RARP"
 		 : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"),
-		&ic_servaddr);
-	printk(KERN_CONT "my address is %pI4\n", &ic_myaddr);
+	       &ic_servaddr);
+	pr_cont("my address is %pI4\n", &ic_myaddr);
 
 	return 0;
 }
@@ -1437,24 +1442,22 @@ static int __init ip_auto_config(void)
 			 */
 #ifdef CONFIG_ROOT_NFS
 			if (ROOT_DEV ==  Root_NFS) {
-				printk(KERN_ERR
-					"IP-Config: Retrying forever (NFS root)...\n");
+				pr_err("IP-Config: Retrying forever (NFS root)...\n");
 				goto try_try_again;
 			}
 #endif
 
 			if (--retries) {
-				printk(KERN_ERR
-				       "IP-Config: Reopening network devices...\n");
+				pr_err("IP-Config: Reopening network devices...\n");
 				goto try_try_again;
 			}
 
 			/* Oh, well.  At least we tried. */
-			printk(KERN_ERR "IP-Config: Auto-configuration of network failed.\n");
+			pr_err("IP-Config: Auto-configuration of network failed\n");
 			return -1;
 		}
 #else /* !DYNAMIC */
-		printk(KERN_ERR "IP-Config: Incomplete network configuration information.\n");
+		pr_err("IP-Config: Incomplete network configuration information\n");
 		ic_close_devs();
 		return -1;
 #endif /* IPCONFIG_DYNAMIC */
@@ -1492,19 +1495,16 @@ static int __init ip_auto_config(void)
 	/*
 	 * Clue in the operator.
 	 */
-	printk("IP-Config: Complete:\n");
-	printk("     device=%s", ic_dev->name);
-	printk(KERN_CONT ", addr=%pI4", &ic_myaddr);
-	printk(KERN_CONT ", mask=%pI4", &ic_netmask);
-	printk(KERN_CONT ", gw=%pI4", &ic_gateway);
-	printk(KERN_CONT ",\n     host=%s, domain=%s, nis-domain=%s",
-	       utsname()->nodename, ic_domain, utsname()->domainname);
-	printk(KERN_CONT ",\n     bootserver=%pI4", &ic_servaddr);
-	printk(KERN_CONT ", rootserver=%pI4", &root_server_addr);
-	printk(KERN_CONT ", rootpath=%s", root_server_path);
+	pr_info("IP-Config: Complete:\n");
+	pr_info("     device=%s, addr=%pI4, mask=%pI4, gw=%pI4\n",
+		ic_dev->name, &ic_myaddr, &ic_netmask, &ic_gateway);
+	pr_info("     host=%s, domain=%s, nis-domain=%s\n",
+		utsname()->nodename, ic_domain, utsname()->domainname);
+	pr_info("     bootserver=%pI4, rootserver=%pI4, rootpath=%s",
+		&ic_servaddr, &root_server_addr, root_server_path);
 	if (ic_dev_mtu)
-		printk(KERN_CONT ", mtu=%d", ic_dev_mtu);
-	printk(KERN_CONT "\n");
+		pr_cont(", mtu=%d", ic_dev_mtu);
+	pr_cont("\n");
 #endif /* !SILENT */
 
 	return 0;
@@ -1637,8 +1637,8 @@ static int __init vendor_class_identifier_setup(char *addrs)
 	if (strlcpy(vendor_class_identifier, addrs,
 		    sizeof(vendor_class_identifier))
 	    >= sizeof(vendor_class_identifier))
-		printk(KERN_WARNING "DHCP: vendorclass too long, truncated to \"%s\"",
-		       vendor_class_identifier);
+		pr_warn("DHCP: vendorclass too long, truncated to \"%s\"",
+			vendor_class_identifier);
 	return 1;
 }
 
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index f84ebff5cdb0..ae1413e3f2f8 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -892,7 +892,7 @@ static int __init ipip_init(void)
 	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
 	if (err < 0) {
 		unregister_pernet_device(&ipip_net_ops);
-		printk(KERN_INFO "ipip init: can't register tunnel\n");
+		pr_info("%s: can't register tunnel\n", __func__);
 	}
 	return err;
 }
@@ -900,7 +900,7 @@ static int __init ipip_init(void)
 static void __exit ipip_fini(void)
 {
 	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
-		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
+		pr_info("%s: can't deregister tunnel\n", __func__);
 
 	unregister_pernet_device(&ipip_net_ops);
 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 7bc2db6db8d4..0518a4fb177b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -951,7 +951,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 	rcu_read_unlock();
 	if (ret < 0) {
 		if (net_ratelimit())
-			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
+			pr_warn("mroute: pending queue full, dropping entries\n");
 		kfree_skb(skb);
 	}
 
@@ -2538,7 +2538,7 @@ int __init ip_mr_init(void)
 		goto reg_notif_fail;
 #ifdef CONFIG_IP_PIMSM_V2
 	if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
-		printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
+		pr_err("%s: can't add PIM protocol\n", __func__);
 		err = -EAGAIN;
 		goto add_proto_fail;
 	}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 4398a45a9600..ab6b36e6da15 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -156,7 +156,7 @@ static struct sock *ping_v4_lookup(struct net *net, __be32 saddr, __be32 daddr,
 	struct hlist_nulls_node *hnode;
 
 	pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
-			 (int)ident, &daddr, dif);
+		 (int)ident, &daddr, dif);
 	read_lock_bh(&ping_table.lock);
 
 	ping_portaddr_for_each_entry(sk, hnode, hslot) {
@@ -229,7 +229,7 @@ static int ping_init_sock(struct sock *sk)
 static void ping_close(struct sock *sk, long timeout)
 {
 	pr_debug("ping_close(sk=%p,sk->num=%u)\n",
-		inet_sk(sk), inet_sk(sk)->inet_num);
+		 inet_sk(sk), inet_sk(sk)->inet_num);
 	pr_debug("isk->refcnt = %d\n", sk->sk_refcnt.counter);
 
 	sk_common_release(sk);
@@ -252,7 +252,7 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		return -EINVAL;
 
 	pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n",
-		sk, addr->sin_addr.s_addr, ntohs(addr->sin_port));
+		 sk, addr->sin_addr.s_addr, ntohs(addr->sin_port));
 
 	chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
 	if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
@@ -280,9 +280,9 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	}
 
 	pr_debug("after bind(): num = %d, daddr = %pI4, dif = %d\n",
-		(int)isk->inet_num,
-		&isk->inet_rcv_saddr,
-		(int)sk->sk_bound_dev_if);
+		 (int)isk->inet_num,
+		 &isk->inet_rcv_saddr,
+		 (int)sk->sk_bound_dev_if);
 
 	err = 0;
 	if (isk->inet_rcv_saddr)
@@ -335,7 +335,7 @@ void ping_err(struct sk_buff *skb, u32 info)
 		return;
 
 	pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type,
-		code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
+		 code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
 
 	sk = ping_v4_lookup(net, iph->daddr, iph->saddr,
 			    ntohs(icmph->un.echo.id), skb->dev->ifindex);
@@ -679,7 +679,7 @@ out:
 static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
-		inet_sk(sk), inet_sk(sk)->inet_num, skb);
+		 inet_sk(sk), inet_sk(sk)->inet_num, skb);
 	if (sock_queue_rcv_skb(sk, skb) < 0) {
 		kfree_skb(skb);
 		pr_debug("ping_queue_rcv_skb -> failed\n");
@@ -705,7 +705,7 @@ void ping_rcv(struct sk_buff *skb)
 	/* We assume the packet has already been checked by icmp_rcv */
 
 	pr_debug("ping_rcv(skb=%p,id=%04x,seq=%04x)\n",
-		skb, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
+		 skb, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
 
 	/* Push ICMP header back */
 	skb_push(skb, skb->data - (u8 *)icmph);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ab466305b629..bbd604c68e68 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -491,11 +491,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		if (msg->msg_namelen < sizeof(*usin))
 			goto out;
 		if (usin->sin_family != AF_INET) {
-			static int complained;
-			if (!complained++)
-				printk(KERN_INFO "%s forgot to set AF_INET in "
-						 "raw sendmsg. Fix it!\n",
-						 current->comm);
+			pr_info_once("%s: %s forgot to set AF_INET. Fix it!\n",
+				     __func__, current->comm);
 			err = -EAFNOSUPPORT;
 			if (usin->sin_family)
 				goto out;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 815989b90dea..cbda6de0a77c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -959,7 +959,7 @@ void rt_cache_flush_batch(struct net *net)
 static void rt_emergency_hash_rebuild(struct net *net)
 {
 	if (net_ratelimit())
-		printk(KERN_WARNING "Route hash chain too long!\n");
+		pr_warn("Route hash chain too long!\n");
 	rt_cache_invalidate(net);
 }
 
@@ -1083,7 +1083,7 @@ static int rt_garbage_collect(struct dst_ops *ops)
 	if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size)
 		goto out;
 	if (net_ratelimit())
-		printk(KERN_WARNING "dst cache overflow\n");
+		pr_warn("dst cache overflow\n");
 	RT_CACHE_STAT_INC(gc_dst_overflow);
 	return 1;
 
@@ -1181,8 +1181,7 @@ restart:
 			int err = rt_bind_neighbour(rt);
 			if (err) {
 				if (net_ratelimit())
-					printk(KERN_WARNING
-					    "Neighbour table failure & not caching routes.\n");
+					pr_warn("Neighbour table failure & not caching routes\n");
 				ip_rt_put(rt);
 				return ERR_PTR(err);
 			}
@@ -1258,7 +1257,7 @@ restart:
 			struct net *net = dev_net(rt->dst.dev);
 			int num = ++net->ipv4.current_rt_cache_rebuild_count;
 			if (!rt_caching(net)) {
-				printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
+				pr_warn("%s: %d rebuilds is over limit, route caching disabled\n",
 					rt->dst.dev->name, num);
 			}
 			rt_emergency_hash_rebuild(net);
@@ -1299,7 +1298,7 @@ restart:
 			}
 
 			if (net_ratelimit())
-				printk(KERN_WARNING "ipv4: Neighbour table overflow.\n");
+				pr_warn("ipv4: Neighbour table overflow\n");
 			rt_drop(rt);
 			return ERR_PTR(-ENOBUFS);
 		}
@@ -1503,10 +1502,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 reject_redirect:
 #ifdef CONFIG_IP_ROUTE_VERBOSE
 	if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
-		printk(KERN_INFO "Redirect from %pI4 on %s about %pI4 ignored.\n"
+		pr_info("Redirect from %pI4 on %s about %pI4 ignored\n"
 			"  Advised path = %pI4 -> %pI4\n",
-		       &old_gw, dev->name, &new_gw,
-		       &saddr, &daddr);
+			&old_gw, dev->name, &new_gw,
+			&saddr, &daddr);
 #endif
 	;
 }
@@ -1618,8 +1617,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 		if (log_martians &&
 		    peer->rate_tokens == ip_rt_redirect_number &&
 		    net_ratelimit())
-			printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
-			       &ip_hdr(skb)->saddr, rt->rt_iif,
+			pr_warn("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
+				&ip_hdr(skb)->saddr, rt->rt_iif,
 				&rt->rt_dst, &rt->rt_gateway);
 #endif
 	}
@@ -2105,18 +2104,13 @@ static void ip_handle_martian_source(struct net_device *dev,
 		 *	RFC1812 recommendation, if source is martian,
 		 *	the only hint is MAC header.
 		 */
-		printk(KERN_WARNING "martian source %pI4 from %pI4, on dev %s\n",
+		pr_warn("martian source %pI4 from %pI4, on dev %s\n",
 			&daddr, &saddr, dev->name);
 		if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
-			int i;
-			const unsigned char *p = skb_mac_header(skb);
-			printk(KERN_WARNING "ll header: ");
-			for (i = 0; i < dev->hard_header_len; i++, p++) {
-				printk("%02x", *p);
-				if (i < (dev->hard_header_len - 1))
-					printk(":");
-			}
-			printk("\n");
+			print_hex_dump(KERN_WARNING, "ll header: ",
+				       DUMP_PREFIX_OFFSET, 16, 1,
+				       skb_mac_header(skb),
+				       dev->hard_header_len, true);
 		}
 	}
 #endif
@@ -2140,8 +2134,7 @@ static int __mkroute_input(struct sk_buff *skb,
 	out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
 	if (out_dev == NULL) {
 		if (net_ratelimit())
-			printk(KERN_CRIT "Bug in ip_route_input" \
-			       "_slow(). Please, report\n");
+			pr_crit("Bug in ip_route_input_slow(). Please report.\n");
 		return -EINVAL;
 	}
 
@@ -2413,7 +2406,7 @@ martian_destination:
 	RT_CACHE_STAT_INC(in_martian_dst);
 #ifdef CONFIG_IP_ROUTE_VERBOSE
 	if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
-		printk(KERN_WARNING "martian destination %pI4 from %pI4, dev %s\n",
+		pr_warn("martian destination %pI4 from %pI4, dev %s\n",
 			&daddr, &saddr, dev->name);
 #endif
 
@@ -3490,7 +3483,7 @@ int __init ip_rt_init(void)
 		net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
 
 	if (ip_rt_proc_init())
-		printk(KERN_ERR "Unable to create route proc files\n");
+		pr_err("Unable to create route proc files\n");
 #ifdef CONFIG_XFRM
 	xfrm_init();
 	xfrm4_init(ip_rt_max_size);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 22ef5f9fd2ff..2baba1bed810 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1675,7 +1675,7 @@ do_prequeue:
 
 				if (tp->ucopy.dma_cookie < 0) {
 
-					printk(KERN_ALERT "dma_cookie < 0\n");
+					pr_alert("dma_cookie < 0\n");
 
 					/* Exception. Bailout! */
 					if (!copied)
@@ -3311,9 +3311,8 @@ void __init tcp_init(void)
 	sysctl_tcp_rmem[1] = 87380;
 	sysctl_tcp_rmem[2] = max(87380, max_share);
 
-	printk(KERN_INFO "TCP: Hash tables configured "
-	       "(established %u bind %u)\n",
-	       tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
+	pr_info("TCP: Hash tables configured (established %u bind %u)\n",
+		tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
 
 	tcp_register_congestion_control(&tcp_reno);
 
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index fc6d475f488f..67bab75f441f 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -41,18 +41,17 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
 
 	/* all algorithms must implement ssthresh and cong_avoid ops */
 	if (!ca->ssthresh || !ca->cong_avoid) {
-		printk(KERN_ERR "TCP %s does not implement required ops\n",
-		       ca->name);
+		pr_err("TCP %s does not implement required ops\n", ca->name);
 		return -EINVAL;
 	}
 
 	spin_lock(&tcp_cong_list_lock);
 	if (tcp_ca_find(ca->name)) {
-		printk(KERN_NOTICE "TCP %s already registered\n", ca->name);
+		pr_notice("TCP %s already registered\n", ca->name);
 		ret = -EEXIST;
 	} else {
 		list_add_tail_rcu(&ca->list, &tcp_cong_list);
-		printk(KERN_INFO "TCP %s registered\n", ca->name);
+		pr_info("TCP %s registered\n", ca->name);
 	}
 	spin_unlock(&tcp_cong_list_lock);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b5e315f13641..23ca3663d1e8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3867,9 +3867,9 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
 					opt_rx->wscale_ok = 1;
 					if (snd_wscale > 14) {
 						if (net_ratelimit())
-							printk(KERN_INFO "tcp_parse_options: Illegal window "
-							       "scaling value %d >14 received.\n",
-							       snd_wscale);
+							pr_info("%s: Illegal window scaling value %d >14 received\n",
+								__func__,
+								snd_wscale);
 						snd_wscale = 14;
 					}
 					opt_rx->snd_wscale = snd_wscale;
@@ -4191,7 +4191,7 @@ static void tcp_fin(struct sock *sk)
 		/* Only TCP_LISTEN and TCP_CLOSE are left, in these
 		 * cases we should never reach this piece of code.
 		 */
-		printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
+		pr_err("%s: Impossible, sk->sk_state=%d\n",
 		       __func__, sk->sk_state);
 		break;
 	}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 507924b640ef..257dba66eaca 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1227,10 +1227,10 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
 
 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
 		if (net_ratelimit()) {
-			printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
-			       &iph->saddr, ntohs(th->source),
-			       &iph->daddr, ntohs(th->dest),
-			       genhash ? " tcp_v4_calc_md5_hash failed" : "");
+			pr_info("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
+				&iph->saddr, ntohs(th->source),
+				&iph->daddr, ntohs(th->dest),
+				genhash ? " tcp_v4_calc_md5_hash failed" : "");
 		}
 		return 1;
 	}
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 01775983b997..0d0171830620 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -164,12 +164,12 @@ static const struct net_protocol tunnel64_protocol = {
 static int __init tunnel4_init(void)
 {
 	if (inet_add_protocol(&tunnel4_protocol, IPPROTO_IPIP)) {
-		printk(KERN_ERR "tunnel4 init: can't add protocol\n");
+		pr_err("%s: can't add protocol\n", __func__);
 		return -EAGAIN;
 	}
 #if IS_ENABLED(CONFIG_IPV6)
 	if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) {
-		printk(KERN_ERR "tunnel64 init: can't add protocol\n");
+		pr_err("tunnel64 init: can't add protocol\n");
 		inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
 		return -EAGAIN;
 	}
@@ -181,10 +181,10 @@ static void __exit tunnel4_fini(void)
 {
 #if IS_ENABLED(CONFIG_IPV6)
 	if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6))
-		printk(KERN_ERR "tunnel64 close: can't remove protocol\n");
+		pr_err("tunnel64 close: can't remove protocol\n");
 #endif
 	if (inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP))
-		printk(KERN_ERR "tunnel4 close: can't remove protocol\n");
+		pr_err("tunnel4 close: can't remove protocol\n");
 }
 
 module_init(tunnel4_init);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 12e9499a1a6c..280bfb12adcc 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -129,11 +129,11 @@ void __init udplite4_register(void)
 	inet_register_protosw(&udplite4_protosw);
 
 	if (udplite4_proc_init())
-		printk(KERN_ERR "%s: Cannot register /proc!\n", __func__);
+		pr_err("%s: Cannot register /proc!\n", __func__);
 	return;
 
 out_unregister_proto:
 	proto_unregister(&udplite_prot);
 out_register_err:
-	printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__);
+	pr_crit("%s: Cannot add UDP-Lite protocol\n", __func__);
 }
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 9247d9d70e9d..5b48f49df0ba 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -75,18 +75,18 @@ static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = {
 static int __init ipip_init(void)
 {
 	if (xfrm_register_type(&ipip_type, AF_INET) < 0) {
-		printk(KERN_INFO "ipip init: can't add xfrm type\n");
+		pr_info("%s: can't add xfrm type\n", __func__);
 		return -EAGAIN;
 	}
 
 	if (xfrm4_tunnel_register(&xfrm_tunnel_handler, AF_INET)) {
-		printk(KERN_INFO "ipip init: can't add xfrm handler for AF_INET\n");
+		pr_info("%s: can't add xfrm handler for AF_INET\n", __func__);
 		xfrm_unregister_type(&ipip_type, AF_INET);
 		return -EAGAIN;
 	}
 #if IS_ENABLED(CONFIG_IPV6)
 	if (xfrm4_tunnel_register(&xfrm64_tunnel_handler, AF_INET6)) {
-		printk(KERN_INFO "ipip init: can't add xfrm handler for AF_INET6\n");
+		pr_info("%s: can't add xfrm handler for AF_INET6\n", __func__);
 		xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET);
 		xfrm_unregister_type(&ipip_type, AF_INET);
 		return -EAGAIN;
@@ -99,12 +99,14 @@ static void __exit ipip_fini(void)
 {
 #if IS_ENABLED(CONFIG_IPV6)
 	if (xfrm4_tunnel_deregister(&xfrm64_tunnel_handler, AF_INET6))
-		printk(KERN_INFO "ipip close: can't remove xfrm handler for AF_INET6\n");
+		pr_info("%s: can't remove xfrm handler for AF_INET6\n",
+			__func__);
 #endif
 	if (xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET))
-		printk(KERN_INFO "ipip close: can't remove xfrm handler for AF_INET\n");
+		pr_info("%s: can't remove xfrm handler for AF_INET\n",
+			__func__);
 	if (xfrm_unregister_type(&ipip_type, AF_INET) < 0)
-		printk(KERN_INFO "ipip close: can't remove xfrm type\n");
+		pr_info("%s: can't remove xfrm type\n", __func__);
 }
 
 module_init(ipip_init);
-- 
cgit v1.2.3


From afd465030acb4098abcb6b965a5aebc7ea2209e0 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 12 Mar 2012 07:03:32 +0000
Subject: net: ipv4: Standardize prefixes for message logging

Add #define pr_fmt(fmt) as appropriate.

Add "IPv4: ", "TCP: ", and "IPsec: " to appropriate files.
Standardize on "UDPLite: " for appropriate uses.
Some prefixes were previously "UDPLITE: " and "UDP-Lite: ".

Add KBUILD_MODNAME ": " to icmp and gre.
Remove embedded prefixes as appropriate.

Add missing "\n" to pr_info in gre.c.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c      |  2 ++
 net/ipv4/ah4.c          |  9 ++++++---
 net/ipv4/esp4.c         |  2 ++
 net/ipv4/gre.c          |  6 ++++--
 net/ipv4/icmp.c         |  6 ++++--
 net/ipv4/ip_fragment.c  |  8 +++++---
 net/ipv4/ip_gre.c       |  2 ++
 net/ipv4/ip_input.c     |  2 ++
 net/ipv4/ip_options.c   |  2 ++
 net/ipv4/route.c        |  4 +++-
 net/ipv4/tcp.c          | 11 +++++++----
 net/ipv4/tcp_cong.c     |  8 +++++---
 net/ipv4/tcp_input.c    |  2 ++
 net/ipv4/tcp_ipv4.c     |  6 +++---
 net/ipv4/tcp_probe.c    |  4 +++-
 net/ipv4/tcp_timer.c    | 14 ++++++++------
 net/ipv4/udp.c          | 34 ++++++++++++++--------------------
 net/ipv4/udplite.c      |  3 +++
 net/ipv4/xfrm4_tunnel.c |  2 ++
 19 files changed, 79 insertions(+), 48 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b7a946611f2f..fdf49fd44bb4 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -65,6 +65,8 @@
  *		2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) "IPv4: " fmt
+
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/types.h>
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index b5524660c81d..fd508b526014 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) "IPsec: " fmt
+
 #include <crypto/hash.h>
 #include <linux/err.h>
 #include <linux/module.h>
@@ -445,9 +447,10 @@ static int ah_init_state(struct xfrm_state *x)
 
 	if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
 	    crypto_ahash_digestsize(ahash)) {
-		pr_info("AH: %s digestsize %u != %hu\n",
-			x->aalg->alg_name, crypto_ahash_digestsize(ahash),
-			aalg_desc->uinfo.auth.icv_fullbits/8);
+		pr_info("%s: %s digestsize %u != %hu\n",
+			__func__, x->aalg->alg_name,
+			crypto_ahash_digestsize(ahash),
+			aalg_desc->uinfo.auth.icv_fullbits / 8);
 		goto error;
 	}
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 0a86dbe9454b..89a47b35905d 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) "IPsec: " fmt
+
 #include <crypto/aead.h>
 #include <crypto/authenc.h>
 #include <linux/err.h>
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index 8cb1ebb7cd74..42a491055c76 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -10,6 +10,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/kmod.h>
@@ -118,10 +120,10 @@ static const struct net_protocol net_gre_protocol = {
 
 static int __init gre_init(void)
 {
-	pr_info("GRE over IPv4 demultiplexor driver");
+	pr_info("GRE over IPv4 demultiplexor driver\n");
 
 	if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
-		pr_err("gre: can't add protocol\n");
+		pr_err("can't add protocol\n");
 		return -EAGAIN;
 	}
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 5c7323b7810f..9664d353ccd8 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -62,6 +62,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/jiffies.h>
@@ -670,7 +672,7 @@ static void icmp_unreach(struct sk_buff *skb)
 			break;
 		case ICMP_FRAG_NEEDED:
 			if (ipv4_config.no_pmtu_disc) {
-				LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: fragmentation needed and DF set\n",
+				LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"),
 					       &iph->daddr);
 			} else {
 				info = ip_rt_frag_needed(net, iph,
@@ -681,7 +683,7 @@ static void icmp_unreach(struct sk_buff *skb)
 			}
 			break;
 		case ICMP_SR_FAILED:
-			LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: Source Route Failed\n",
+			LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: Source Route Failed\n"),
 				       &iph->daddr);
 			break;
 		default:
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b2fd333abd59..3727e234c884 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -20,6 +20,8 @@
  *		Patrick McHardy :	LRU queue of frag heads for evictor.
  */
 
+#define pr_fmt(fmt) "IPv4: " fmt
+
 #include <linux/compiler.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -299,7 +301,7 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
 	return container_of(q, struct ipq, q);
 
 out_nomem:
-	LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n");
+	LIMIT_NETDEBUG(KERN_ERR pr_fmt("ip_frag_create: no memory left !\n"));
 	return NULL;
 }
 
@@ -637,8 +639,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	return 0;
 
 out_nomem:
-	LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing "
-			      "queue %p\n", qp);
+	LIMIT_NETDEBUG(KERN_ERR pr_fmt("queue_glue: no memory for gluing queue %p\n"),
+		       qp);
 	err = -ENOMEM;
 	goto out_fail;
 out_oversize:
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 185e0a75ecec..b57532d4742c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -10,6 +10,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/types.h>
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index de8e9be4aac8..f3f1108940f5 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -113,6 +113,8 @@
  *		2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) "IPv4: " fmt
+
 #include <asm/system.h>
 #include <linux/module.h>
 #include <linux/types.h>
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index e25ee7c8bdde..a0d0d9d9b870 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -9,6 +9,8 @@
  *
  */
 
+#define pr_fmt(fmt) "IPv4: " fmt
+
 #include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/slab.h>
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cbda6de0a77c..12ccf880eb88 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -62,6 +62,8 @@
  *		2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) "IPv4: " fmt
+
 #include <linux/module.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -1298,7 +1300,7 @@ restart:
 			}
 
 			if (net_ratelimit())
-				pr_warn("ipv4: Neighbour table overflow\n");
+				pr_warn("Neighbour table overflow\n");
 			rt_drop(rt);
 			return ERR_PTR(-ENOBUFS);
 		}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2baba1bed810..cfd7edda0a8e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -245,6 +245,8 @@
  *	TCP_CLOSE		socket is finished
  */
 
+#define pr_fmt(fmt) "TCP: " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -1675,7 +1677,8 @@ do_prequeue:
 
 				if (tp->ucopy.dma_cookie < 0) {
 
-					pr_alert("dma_cookie < 0\n");
+					pr_alert("%s: dma_cookie < 0\n",
+						 __func__);
 
 					/* Exception. Bailout! */
 					if (!copied)
@@ -1884,9 +1887,9 @@ bool tcp_check_oom(struct sock *sk, int shift)
 	out_of_socket_memory = tcp_out_of_memory(sk);
 
 	if (too_many_orphans && net_ratelimit())
-		pr_info("TCP: too many orphaned sockets\n");
+		pr_info("too many orphaned sockets\n");
 	if (out_of_socket_memory && net_ratelimit())
-		pr_info("TCP: out of memory -- consider tuning tcp_mem\n");
+		pr_info("out of memory -- consider tuning tcp_mem\n");
 	return too_many_orphans || out_of_socket_memory;
 }
 
@@ -3311,7 +3314,7 @@ void __init tcp_init(void)
 	sysctl_tcp_rmem[1] = 87380;
 	sysctl_tcp_rmem[2] = max(87380, max_share);
 
-	pr_info("TCP: Hash tables configured (established %u bind %u)\n",
+	pr_info("Hash tables configured (established %u bind %u)\n",
 		tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
 
 	tcp_register_congestion_control(&tcp_reno);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 67bab75f441f..272a84593c85 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -6,6 +6,8 @@
  * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org>
  */
 
+#define pr_fmt(fmt) "TCP: " fmt
+
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/types.h>
@@ -41,17 +43,17 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
 
 	/* all algorithms must implement ssthresh and cong_avoid ops */
 	if (!ca->ssthresh || !ca->cong_avoid) {
-		pr_err("TCP %s does not implement required ops\n", ca->name);
+		pr_err("%s does not implement required ops\n", ca->name);
 		return -EINVAL;
 	}
 
 	spin_lock(&tcp_cong_list_lock);
 	if (tcp_ca_find(ca->name)) {
-		pr_notice("TCP %s already registered\n", ca->name);
+		pr_notice("%s already registered\n", ca->name);
 		ret = -EEXIST;
 	} else {
 		list_add_tail_rcu(&ca->list, &tcp_cong_list);
-		pr_info("TCP %s registered\n", ca->name);
+		pr_info("%s registered\n", ca->name);
 	}
 	spin_unlock(&tcp_cong_list_lock);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 23ca3663d1e8..68d4057cba00 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -61,6 +61,8 @@
  *		Pasi Sarolahti:		F-RTO for dealing with spurious RTOs
  */
 
+#define pr_fmt(fmt) "TCP: " fmt
+
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/module.h>
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 257dba66eaca..fe9f604ed1e2 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -50,6 +50,7 @@
  *					a single port at the same time.
  */
 
+#define pr_fmt(fmt) "TCP: " fmt
 
 #include <linux/bottom_half.h>
 #include <linux/types.h>
@@ -876,8 +877,7 @@ int tcp_syn_flood_action(struct sock *sk,
 	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
 	if (!lopt->synflood_warned) {
 		lopt->synflood_warned = 1;
-		pr_info("%s: Possible SYN flooding on port %d. %s. "
-			" Check SNMP counters.\n",
+		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
 			proto, ntohs(tcp_hdr(skb)->dest), msg);
 	}
 	return want_cookie;
@@ -1399,7 +1399,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 			 * to destinations, already remembered
 			 * to the moment of synflood.
 			 */
-			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
+			LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
 				       &saddr, ntohs(tcp_hdr(skb)->source));
 			goto drop_and_release;
 		}
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 85ee7eb7e38e..a981cdc0a6e9 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -18,6 +18,8 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
 #include <linux/socket.h>
@@ -239,7 +241,7 @@ static __init int tcpprobe_init(void)
 	if (ret)
 		goto err1;
 
-	pr_info("TCP probe registered (port=%d) bufsize=%u\n", port, bufsize);
+	pr_info("probe registered (port=%d) bufsize=%u\n", port, bufsize);
 	return 0;
  err1:
 	proc_net_remove(&init_net, procname);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index cd2e0723266d..34d4a02c2f16 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -333,16 +333,18 @@ void tcp_retransmit_timer(struct sock *sk)
 		 */
 		struct inet_sock *inet = inet_sk(sk);
 		if (sk->sk_family == AF_INET) {
-			LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
-			       &inet->inet_daddr, ntohs(inet->inet_dport),
-			       inet->inet_num, tp->snd_una, tp->snd_nxt);
+			LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"),
+				       &inet->inet_daddr,
+				       ntohs(inet->inet_dport), inet->inet_num,
+				       tp->snd_una, tp->snd_nxt);
 		}
 #if IS_ENABLED(CONFIG_IPV6)
 		else if (sk->sk_family == AF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
-			LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
-			       &np->daddr, ntohs(inet->inet_dport),
-			       inet->inet_num, tp->snd_una, tp->snd_nxt);
+			LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"),
+				       &np->daddr,
+				       ntohs(inet->inet_dport), inet->inet_num,
+				       tp->snd_una, tp->snd_nxt);
 		}
 #endif
 		if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7c41ab84e72e..d6f5feeb3eaf 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -77,6 +77,8 @@
  *		2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) "UDP: " fmt
+
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -975,7 +977,7 @@ back_from_confirm:
 		/* ... which is an evident application bug. --ANK */
 		release_sock(sk);
 
-		LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
+		LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("cork app bug 2\n"));
 		err = -EINVAL;
 		goto out;
 	}
@@ -1054,7 +1056,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
 	if (unlikely(!up->pending)) {
 		release_sock(sk);
 
-		LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
+		LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("udp cork app bug 3\n"));
 		return -EINVAL;
 	}
 
@@ -1447,9 +1449,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		 * provided by the application."
 		 */
 		if (up->pcrlen == 0) {          /* full coverage was set  */
-			LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
-				"%d while full coverage %d requested\n",
-				UDP_SKB_CB(skb)->cscov, skb->len);
+			LIMIT_NETDEBUG(KERN_WARNING "UDPLite: partial coverage %d while full coverage %d requested\n",
+				       UDP_SKB_CB(skb)->cscov, skb->len);
 			goto drop;
 		}
 		/* The next case involves violating the min. coverage requested
@@ -1459,9 +1460,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		 * Therefore the above ...()->partial_cov statement is essential.
 		 */
 		if (UDP_SKB_CB(skb)->cscov  <  up->pcrlen) {
-			LIMIT_NETDEBUG(KERN_WARNING
-				"UDPLITE: coverage %d too small, need min %d\n",
-				UDP_SKB_CB(skb)->cscov, up->pcrlen);
+			LIMIT_NETDEBUG(KERN_WARNING "UDPLite: coverage %d too small, need min %d\n",
+				       UDP_SKB_CB(skb)->cscov, up->pcrlen);
 			goto drop;
 		}
 	}
@@ -1689,13 +1689,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 
 short_packet:
 	LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
-		       proto == IPPROTO_UDPLITE ? "-Lite" : "",
-		       &saddr,
-		       ntohs(uh->source),
-		       ulen,
-		       skb->len,
-		       &daddr,
-		       ntohs(uh->dest));
+		       proto == IPPROTO_UDPLITE ? "Lite" : "",
+		       &saddr, ntohs(uh->source),
+		       ulen, skb->len,
+		       &daddr, ntohs(uh->dest));
 	goto drop;
 
 csum_error:
@@ -1704,11 +1701,8 @@ csum_error:
 	 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
 	 */
 	LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
-		       proto == IPPROTO_UDPLITE ? "-Lite" : "",
-		       &saddr,
-		       ntohs(uh->source),
-		       &daddr,
-		       ntohs(uh->dest),
+		       proto == IPPROTO_UDPLITE ? "Lite" : "",
+		       &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
 		       ulen);
 drop:
 	UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 280bfb12adcc..2c46acd4cc36 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -10,6 +10,9 @@
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
  */
+
+#define pr_fmt(fmt) "UDPLite: " fmt
+
 #include <linux/export.h>
 #include "udp_impl.h"
 
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 5b48f49df0ba..05a5df2febc9 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -3,6 +3,8 @@
  * Copyright (C) 2003 David S. Miller (davem@redhat.com)
  */
 
+#define pr_fmt(fmt) "IPsec: " fmt
+
 #include <linux/skbuff.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-- 
cgit v1.2.3


From 124d37e9f088a8f56494b0264d63d22555f53fef Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Thu, 15 Mar 2012 05:25:58 +0000
Subject: arp: allow arp processing to honor per interface arp_accept sysctl

I found recently that the arp_process function which handles all of our received
arp frames, is using IPV4_DEVCONF_ALL macro to check the state of the arp_process
flag.  This seems wrong, as it implies that either none or all of the network
interfaces accept gratuitous arps.  This patch corrects that, allowing
per-interface arp_accept configuration to deviate from the all setting.  Note
this also brings us into line with the way the arp_filter setting is handled
during arp_process execution.

Tested this myself on my home network, and confirmed it works as expected.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 63e49890ad31..73f46d691abc 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -889,7 +889,7 @@ static int arp_process(struct sk_buff *skb)
 
 	n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
 
-	if (IPV4_DEVCONF_ALL(dev_net(dev), ARP_ACCEPT)) {
+	if (IN_DEV_ARP_ACCEPT(in_dev)) {
 		/* Unsolicited ARP is not accepted by default.
 		   It is possible, that this option should be enabled for some
 		   devices (strip is candidate)
-- 
cgit v1.2.3


From e86b291962cbf477e35d983d312428cf737bc0f8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 18 Mar 2012 11:06:44 +0000
Subject: tcp: introduce tcp_data_queue_ofo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Split tcp_data_queue() in two parts for better readability.

tcp_data_queue_ofo() is responsible for queueing incoming skb into out
of order queue.

Change code layout so that the skb_set_owner_r() is performed only if
skb is not dropped.

This is a preliminary patch before "reduce out_of_order memory use"
following patch.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: H.K. Jerry Chu <hkchu@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 214 +++++++++++++++++++++++++++------------------------
 1 file changed, 115 insertions(+), 99 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 68d4057cba00..fa7de12c4a52 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4446,6 +4446,120 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
 	return 0;
 }
 
+static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb1;
+	u32 seq, end_seq;
+
+	TCP_ECN_check_ce(tp, skb);
+
+	if (tcp_try_rmem_schedule(sk, skb->truesize)) {
+		/* TODO: should increment a counter */
+		__kfree_skb(skb);
+		return;
+	}
+
+	/* Disable header prediction. */
+	tp->pred_flags = 0;
+	inet_csk_schedule_ack(sk);
+
+	SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
+		   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+
+	skb1 = skb_peek_tail(&tp->out_of_order_queue);
+	if (!skb1) {
+		/* Initial out of order segment, build 1 SACK. */
+		if (tcp_is_sack(tp)) {
+			tp->rx_opt.num_sacks = 1;
+			tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
+			tp->selective_acks[0].end_seq =
+						TCP_SKB_CB(skb)->end_seq;
+		}
+		__skb_queue_head(&tp->out_of_order_queue, skb);
+		goto end;
+	}
+
+	seq = TCP_SKB_CB(skb)->seq;
+	end_seq = TCP_SKB_CB(skb)->end_seq;
+
+	if (seq == TCP_SKB_CB(skb1)->end_seq) {
+		__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+
+		if (!tp->rx_opt.num_sacks ||
+		    tp->selective_acks[0].end_seq != seq)
+			goto add_sack;
+
+		/* Common case: data arrive in order after hole. */
+		tp->selective_acks[0].end_seq = end_seq;
+		goto end;
+	}
+
+	/* Find place to insert this segment. */
+	while (1) {
+		if (!after(TCP_SKB_CB(skb1)->seq, seq))
+			break;
+		if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
+			skb1 = NULL;
+			break;
+		}
+		skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
+	}
+
+	/* Do skb overlap to previous one? */
+	if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
+		if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+			/* All the bits are present. Drop. */
+			__kfree_skb(skb);
+			skb = NULL;
+			tcp_dsack_set(sk, seq, end_seq);
+			goto add_sack;
+		}
+		if (after(seq, TCP_SKB_CB(skb1)->seq)) {
+			/* Partial overlap. */
+			tcp_dsack_set(sk, seq,
+				      TCP_SKB_CB(skb1)->end_seq);
+		} else {
+			if (skb_queue_is_first(&tp->out_of_order_queue,
+					       skb1))
+				skb1 = NULL;
+			else
+				skb1 = skb_queue_prev(
+					&tp->out_of_order_queue,
+					skb1);
+		}
+	}
+	if (!skb1)
+		__skb_queue_head(&tp->out_of_order_queue, skb);
+	else
+		__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+
+	/* And clean segments covered by new one as whole. */
+	while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
+		skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
+
+		if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
+			break;
+		if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+			tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
+					 end_seq);
+			break;
+		}
+		__skb_unlink(skb1, &tp->out_of_order_queue);
+		tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
+				 TCP_SKB_CB(skb1)->end_seq);
+		__kfree_skb(skb1);
+	}
+
+add_sack:
+	if (tcp_is_sack(tp))
+		tcp_sack_new_ofo_skb(sk, seq, end_seq);
+end:
+	if (skb)
+		skb_set_owner_r(skb, sk);
+}
+
+
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
@@ -4561,105 +4675,7 @@ drop:
 		goto queue_and_out;
 	}
 
-	TCP_ECN_check_ce(tp, skb);
-
-	if (tcp_try_rmem_schedule(sk, skb->truesize))
-		goto drop;
-
-	/* Disable header prediction. */
-	tp->pred_flags = 0;
-	inet_csk_schedule_ack(sk);
-
-	SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
-		   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
-
-	skb_set_owner_r(skb, sk);
-
-	if (!skb_peek(&tp->out_of_order_queue)) {
-		/* Initial out of order segment, build 1 SACK. */
-		if (tcp_is_sack(tp)) {
-			tp->rx_opt.num_sacks = 1;
-			tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
-			tp->selective_acks[0].end_seq =
-						TCP_SKB_CB(skb)->end_seq;
-		}
-		__skb_queue_head(&tp->out_of_order_queue, skb);
-	} else {
-		struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue);
-		u32 seq = TCP_SKB_CB(skb)->seq;
-		u32 end_seq = TCP_SKB_CB(skb)->end_seq;
-
-		if (seq == TCP_SKB_CB(skb1)->end_seq) {
-			__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
-
-			if (!tp->rx_opt.num_sacks ||
-			    tp->selective_acks[0].end_seq != seq)
-				goto add_sack;
-
-			/* Common case: data arrive in order after hole. */
-			tp->selective_acks[0].end_seq = end_seq;
-			return;
-		}
-
-		/* Find place to insert this segment. */
-		while (1) {
-			if (!after(TCP_SKB_CB(skb1)->seq, seq))
-				break;
-			if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
-				skb1 = NULL;
-				break;
-			}
-			skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
-		}
-
-		/* Do skb overlap to previous one? */
-		if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
-			if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
-				/* All the bits are present. Drop. */
-				__kfree_skb(skb);
-				tcp_dsack_set(sk, seq, end_seq);
-				goto add_sack;
-			}
-			if (after(seq, TCP_SKB_CB(skb1)->seq)) {
-				/* Partial overlap. */
-				tcp_dsack_set(sk, seq,
-					      TCP_SKB_CB(skb1)->end_seq);
-			} else {
-				if (skb_queue_is_first(&tp->out_of_order_queue,
-						       skb1))
-					skb1 = NULL;
-				else
-					skb1 = skb_queue_prev(
-						&tp->out_of_order_queue,
-						skb1);
-			}
-		}
-		if (!skb1)
-			__skb_queue_head(&tp->out_of_order_queue, skb);
-		else
-			__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
-
-		/* And clean segments covered by new one as whole. */
-		while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
-			skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
-
-			if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
-				break;
-			if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
-				tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
-						 end_seq);
-				break;
-			}
-			__skb_unlink(skb1, &tp->out_of_order_queue);
-			tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
-					 TCP_SKB_CB(skb1)->end_seq);
-			__kfree_skb(skb1);
-		}
-
-add_sack:
-		if (tcp_is_sack(tp))
-			tcp_sack_new_ofo_skb(sk, seq, end_seq);
-	}
+	tcp_data_queue_ofo(sk, skb);
 }
 
 static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
-- 
cgit v1.2.3


From c8628155ece363487b57d33441ea0359018c0fa7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 18 Mar 2012 11:07:47 +0000
Subject: tcp: reduce out_of_order memory use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With increasing receive window sizes, but speed of light not improved
that much, out of order queue can contain a huge number of skbs, waiting
to be moved to receive_queue when missing packets can fill the holes.

Some devices happen to use fat skbs (truesize of 4096 + sizeof(struct
sk_buff)) to store regular (MTU <= 1500) frames. This makes highly
probable sk_rmem_alloc hits sk_rcvbuf limit, which can be 4Mbytes in
many cases.

When limit is hit, tcp stack calls tcp_collapse_ofo_queue(), a true
latency killer and cpu cache blower.

Doing the coalescing attempt each time we add a frame in ofo queue
permits to keep memory use tight and in many cases avoid the
tcp_collapse() thing later.

Tested on various wireless setups (b43, ath9k, ...) known to use big skb
truesize, this patch removed the "packets collapsed in receive queue due
to low socket buffer" I had before.

This also reduced average memory used by tcp sockets.

With help from Neal Cardwell.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: H.K. Jerry Chu <hkchu@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/proc.c      |  1 +
 net/ipv4/tcp_input.c | 19 ++++++++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 02d61079f08b..8af0d44e4e22 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -257,6 +257,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES),
 	SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP),
 	SNMP_MIB_ITEM("TCPRetransFail", LINUX_MIB_TCPRETRANSFAIL),
+	SNMP_MIB_ITEM("TCPRcvCoalesce", LINUX_MIB_TCPRCVCOALESCE),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fa7de12c4a52..e886e2f7fa8d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4484,7 +4484,24 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 	end_seq = TCP_SKB_CB(skb)->end_seq;
 
 	if (seq == TCP_SKB_CB(skb1)->end_seq) {
-		__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+		/* Packets in ofo can stay in queue a long time.
+		 * Better try to coalesce them right now
+		 * to avoid future tcp_collapse_ofo_queue(),
+		 * probably the most expensive function in tcp stack.
+		 */
+		if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) {
+			NET_INC_STATS_BH(sock_net(sk),
+					 LINUX_MIB_TCPRCVCOALESCE);
+			BUG_ON(skb_copy_bits(skb, 0,
+					     skb_put(skb1, skb->len),
+					     skb->len));
+			TCP_SKB_CB(skb1)->end_seq = end_seq;
+			TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
+			__kfree_skb(skb);
+			skb = NULL;
+		} else {
+			__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+		}
 
 		if (!tp->rx_opt.num_sacks ||
 		    tp->selective_acks[0].end_seq != seq)
-- 
cgit v1.2.3