summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2012-03-05 09:20:08 +0100
committerIngo Molnar <mingo@elte.hu>2012-03-05 09:20:08 +0100
commit737f24bda723fdf89ecaacb99fa2bf5683c32799 (patch)
tree35495fff3e9956679cb5468e74e6814c8e44ee66 /net/ipv4
parent8eedce996556d7d06522cd3a0e6069141c8dffe0 (diff)
parentb7c924274c456499264d1cfa3d44063bb11eb5db (diff)
Merge branch 'perf/urgent' into perf/core
Conflicts: tools/perf/builtin-record.c tools/perf/builtin-top.c tools/perf/perf.h tools/perf/util/top.h Merge reason: resolve these cherry-picking conflicts. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig2
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/ip_gre.c10
-rw-r--r--net/ipv4/ip_options.c2
-rw-r--r--net/ipv4/ping.c1
-rw-r--r--net/ipv4/sysctl_net_ipv4.c6
-rw-r--r--net/ipv4/tcp.c28
-rw-r--r--net/ipv4/tcp_input.c45
-rw-r--r--net/ipv4/tcp_ipv4.c5
-rw-r--r--net/ipv4/tcp_timer.c5
-rw-r--r--net/ipv4/xfrm4_mode_beet.c5
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c6
12 files changed, 68 insertions, 50 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index aa2a2c79776f..d183262943d9 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -409,7 +409,7 @@ config INET_TCP_DIAG
config INET_UDP_DIAG
tristate "UDP: socket monitoring interface"
- depends on INET_DIAG
+ depends on INET_DIAG && (IPV6 || IPV6=n)
default n
---help---
Support for UDP socket monitoring interface used by the ss tool.
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 59402be133f0..63e49890ad31 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -863,7 +863,8 @@ static int arp_process(struct sk_buff *skb)
if (addr_type == RTN_UNICAST &&
(arp_fwd_proxy(in_dev, dev, rt) ||
arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
- pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
+ (rt->dst.dev != dev &&
+ pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) {
n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
if (n)
neigh_release(n);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 6b3ca5ba4450..38673d2860e2 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -65,7 +65,7 @@
it is infeasible task. The most general solutions would be
to keep skb->encapsulation counter (sort of local ttl),
and silently drop packet when it expires. It is a good
- solution, but it supposes maintaing new variable in ALL
+ solution, but it supposes maintaining new variable in ALL
skb, even if no tunneling is used.
Current solution: xmit_recursion breaks dead loops. This is a percpu
@@ -91,14 +91,14 @@
One of them is to parse packet trying to detect inner encapsulation
made by our node. It is difficult or even impossible, especially,
- taking into account fragmentation. TO be short, tt is not solution at all.
+ taking into account fragmentation. TO be short, ttl is not solution at all.
Current solution: The solution was UNEXPECTEDLY SIMPLE.
We force DF flag on tunnels with preconfigured hop limit,
that is ALL. :-) Well, it does not remove the problem completely,
but exponential growth of network traffic is changed to linear
(branches, that exceed pmtu are pruned) and tunnel mtu
- fastly degrades to value <68, where looping stops.
+ rapidly degrades to value <68, where looping stops.
Yes, it is not good if there exists a router in the loop,
which does not force DF, even when encapsulating packets have DF set.
But it is not our problem! Nobody could accuse us, we made
@@ -457,8 +457,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
GRE tunnels with enabled checksum. Tell them "thank you".
Well, I wonder, rfc1812 was written by Cisco employee,
- what the hell these idiots break standrads established
- by themself???
+ what the hell these idiots break standards established
+ by themselves???
*/
const struct iphdr *iph = (const struct iphdr *)skb->data;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 1e60f7679075..42dd1a90edea 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -573,8 +573,8 @@ void ip_forward_options(struct sk_buff *skb)
}
if (srrptr + 3 <= srrspace) {
opt->is_changed = 1;
- ip_rt_get_source(&optptr[srrptr-1], skb, rt);
ip_hdr(skb)->daddr = opt->nexthop;
+ ip_rt_get_source(&optptr[srrptr-1], skb, rt);
optptr[2] = srrptr+4;
} else if (net_ratelimit())
printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index aea5a199c37a..b072386cee21 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -630,6 +630,7 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, isk->inet_num);
+ err = -EOPNOTSUPP;
if (flags & MSG_OOB)
goto out;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4cb9cd2f2c39..7a7724da9bff 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -778,7 +778,6 @@ EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
static __net_init int ipv4_sysctl_init_net(struct net *net)
{
struct ctl_table *table;
- unsigned long limit;
table = ipv4_net_table;
if (!net_eq(net, &init_net)) {
@@ -815,11 +814,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
net->ipv4.sysctl_rt_cache_rebuild_count = 4;
tcp_init_mem(net);
- limit = nr_free_buffer_pages() / 8;
- limit = max(limit, 128UL);
- net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
- net->ipv4.sysctl_tcp_mem[1] = limit;
- net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2;
net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
net_ipv4_ctl_path, table);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 06373b4a449a..22ef5f9fd2ff 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1876,6 +1876,20 @@ void tcp_shutdown(struct sock *sk, int how)
}
EXPORT_SYMBOL(tcp_shutdown);
+bool tcp_check_oom(struct sock *sk, int shift)
+{
+ bool too_many_orphans, out_of_socket_memory;
+
+ too_many_orphans = tcp_too_many_orphans(sk, shift);
+ out_of_socket_memory = tcp_out_of_memory(sk);
+
+ if (too_many_orphans && net_ratelimit())
+ pr_info("TCP: too many orphaned sockets\n");
+ if (out_of_socket_memory && net_ratelimit())
+ pr_info("TCP: out of memory -- consider tuning tcp_mem\n");
+ return too_many_orphans || out_of_socket_memory;
+}
+
void tcp_close(struct sock *sk, long timeout)
{
struct sk_buff *skb;
@@ -2015,10 +2029,7 @@ adjudge_to_death:
}
if (sk->sk_state != TCP_CLOSE) {
sk_mem_reclaim(sk);
- if (tcp_too_many_orphans(sk, 0)) {
- if (net_ratelimit())
- printk(KERN_INFO "TCP: too many of orphaned "
- "sockets\n");
+ if (tcp_check_oom(sk, 0)) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
NET_INC_STATS_BH(sock_net(sk),
@@ -3218,7 +3229,6 @@ __setup("thash_entries=", set_thash_entries);
void tcp_init_mem(struct net *net)
{
- /* Set per-socket limits to no more than 1/128 the pressure threshold */
unsigned long limit = nr_free_buffer_pages() / 8;
limit = max(limit, 128UL);
net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
@@ -3230,7 +3240,8 @@ void __init tcp_init(void)
{
struct sk_buff *skb = NULL;
unsigned long limit;
- int i, max_share, cnt;
+ int max_share, cnt;
+ unsigned int i;
unsigned long jiffy = jiffies;
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -3273,7 +3284,7 @@ void __init tcp_init(void)
&tcp_hashinfo.bhash_size,
NULL,
64 * 1024);
- tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size;
+ tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
spin_lock_init(&tcp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
@@ -3287,7 +3298,8 @@ void __init tcp_init(void)
sysctl_max_syn_backlog = max(128, cnt / 256);
tcp_init_mem(&init_net);
- limit = nr_free_buffer_pages() / 8;
+ /* Set per-socket limits to no more than 1/128 the pressure threshold */
+ limit = nr_free_buffer_pages() << (PAGE_SHIFT - 10);
limit = max(limit, 128UL);
max_share = min(4UL*1024*1024, limit);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 976034f82320..53c8ce4046b2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1307,25 +1307,26 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
return in_sack;
}
-static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
- struct tcp_sacktag_state *state,
+/* Mark the given newly-SACKed range as such, adjusting counters and hints. */
+static u8 tcp_sacktag_one(struct sock *sk,
+ struct tcp_sacktag_state *state, u8 sacked,
+ u32 start_seq, u32 end_seq,
int dup_sack, int pcount)
{
struct tcp_sock *tp = tcp_sk(sk);
- u8 sacked = TCP_SKB_CB(skb)->sacked;
int fack_count = state->fack_count;
/* Account D-SACK for retransmitted packet. */
if (dup_sack && (sacked & TCPCB_RETRANS)) {
if (tp->undo_marker && tp->undo_retrans &&
- after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
+ after(end_seq, tp->undo_marker))
tp->undo_retrans--;
if (sacked & TCPCB_SACKED_ACKED)
state->reord = min(fack_count, state->reord);
}
/* Nothing to do; acked frame is about to be dropped (was ACKed). */
- if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
+ if (!after(end_seq, tp->snd_una))
return sacked;
if (!(sacked & TCPCB_SACKED_ACKED)) {
@@ -1344,13 +1345,13 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
/* New sack for not retransmitted frame,
* which was in hole. It is reordering.
*/
- if (before(TCP_SKB_CB(skb)->seq,
+ if (before(start_seq,
tcp_highest_sack_seq(tp)))
state->reord = min(fack_count,
state->reord);
/* SACK enhanced F-RTO (RFC4138; Appendix B) */
- if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
+ if (!after(end_seq, tp->frto_highmark))
state->flag |= FLAG_ONLY_ORIG_SACKED;
}
@@ -1368,8 +1369,7 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
/* Lost marker hint past SACKed? Tweak RFC3517 cnt */
if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
- before(TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(tp->lost_skb_hint)->seq))
+ before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
tp->lost_cnt_hint += pcount;
if (fack_count > tp->fackets_out)
@@ -1388,6 +1388,9 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
return sacked;
}
+/* Shift newly-SACKed bytes from this skb to the immediately previous
+ * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
+ */
static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
struct tcp_sacktag_state *state,
unsigned int pcount, int shifted, int mss,
@@ -1395,10 +1398,13 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
+ u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */
+ u32 end_seq = start_seq + shifted; /* end of newly-SACKed */
BUG_ON(!pcount);
- if (skb == tp->lost_skb_hint)
+ /* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */
+ if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint))
tp->lost_cnt_hint += pcount;
TCP_SKB_CB(prev)->end_seq += shifted;
@@ -1424,8 +1430,11 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
skb_shinfo(skb)->gso_type = 0;
}
- /* We discard results */
- tcp_sacktag_one(skb, sk, state, dup_sack, pcount);
+ /* Adjust counters and hints for the newly sacked sequence range but
+ * discard the return value since prev is already marked.
+ */
+ tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
+ start_seq, end_seq, dup_sack, pcount);
/* Difference in this won't matter, both ACKed by the same cumul. ACK */
TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
@@ -1664,10 +1673,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
break;
if (in_sack) {
- TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk,
- state,
- dup_sack,
- tcp_skb_pcount(skb));
+ TCP_SKB_CB(skb)->sacked =
+ tcp_sacktag_one(sk,
+ state,
+ TCP_SKB_CB(skb)->sacked,
+ TCP_SKB_CB(skb)->seq,
+ TCP_SKB_CB(skb)->end_seq,
+ dup_sack,
+ tcp_skb_pcount(skb));
if (!before(TCP_SKB_CB(skb)->seq,
tcp_highest_sack_seq(tp)))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 337ba4cca052..94d683a61cba 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -651,6 +651,11 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
arg.iov[0].iov_len, IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
+ /* When socket is gone, all binding information is lost.
+ * routing might fail in this case. using iif for oif to
+ * make sure we can deliver it
+ */
+ arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
net = dev_net(skb_dst(skb)->dev);
arg.tos = ip_hdr(skb)->tos;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index a516d1e399df..cd2e0723266d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -77,10 +77,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
if (sk->sk_err_soft)
shift++;
- if (tcp_too_many_orphans(sk, shift)) {
- if (net_ratelimit())
- printk(KERN_INFO "Out of socket memory\n");
-
+ if (tcp_check_oom(sk, shift)) {
/* Catch exceptional cases, when connection requires reset.
* 1. Last segment was sent recently. */
if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 63418185f524..e3db3f915114 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -110,10 +110,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
skb_push(skb, sizeof(*iph));
skb_reset_network_header(skb);
-
- memmove(skb->data - skb->mac_len, skb_mac_header(skb),
- skb->mac_len);
- skb_set_mac_header(skb, -skb->mac_len);
+ skb_mac_header_rebuild(skb);
xfrm4_beet_make_header(skb);
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 534972e114ac..ed4bf11ef9f4 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -66,7 +66,6 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
- const unsigned char *old_mac;
int err = -EINVAL;
if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
@@ -84,10 +83,9 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
if (!(x->props.flags & XFRM_STATE_NOECN))
ipip_ecn_decapsulate(skb);
- old_mac = skb_mac_header(skb);
- skb_set_mac_header(skb, -skb->mac_len);
- memmove(skb_mac_header(skb), old_mac, skb->mac_len);
skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+
err = 0;
out: