diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 18 | ||||
-rw-r--r-- | net/ipv4/inet_fragment.c | 3 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 1 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 25 | ||||
-rw-r--r-- | net/ipv4/ip_vti.c | 14 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 85 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_memcontrol.c | 90 | ||||
-rw-r--r-- | net/ipv4/tcp_metrics.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_offload.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 6 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 1 |
15 files changed, 95 insertions, 183 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9433a6186f54..09d78d4a3cff 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1251,8 +1251,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; unsigned int offset = 0; + bool udpfrag, encap; struct iphdr *iph; - bool tunnel; int proto; int nhoff; int ihl; @@ -1265,6 +1265,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_GRE | SKB_GSO_IPIP | + SKB_GSO_SIT | SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | SKB_GSO_MPLS | @@ -1289,8 +1290,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, goto out; __skb_pull(skb, ihl); - tunnel = SKB_GSO_CB(skb)->encap_level > 0; - if (tunnel) + encap = SKB_GSO_CB(skb)->encap_level > 0; + if (encap) features = skb->dev->hw_enc_features & netif_skb_features(skb); SKB_GSO_CB(skb)->encap_level += ihl; @@ -1305,24 +1306,23 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (IS_ERR_OR_NULL(segs)) goto out; + udpfrag = !!skb->encapsulation && proto == IPPROTO_UDP; skb = segs; do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); - if (!tunnel && proto == IPPROTO_UDP) { + if (udpfrag) { iph->id = htons(id); iph->frag_off = htons(offset >> 3); if (skb->next != NULL) iph->frag_off |= htons(IP_MF); offset += skb->len - nhoff - ihl; - } else { + } else { iph->id = htons(id++); } iph->tot_len = htons(skb->len - nhoff); ip_send_check(iph); - if (tunnel) { + if (encap) skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } skb->network_header = (u8 *)iph - skb->head; } while ((skb = skb->next)); @@ -1697,8 +1697,6 @@ static int __init inet_init(void) ip_static_sysctl_init(); #endif - tcp_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem; - /* * Add all the base protocols. */ diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index c5313a9c019b..bb075fc9a14f 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -93,9 +93,6 @@ void inet_frags_init(struct inet_frags *f) } rwlock_init(&f->lock); - f->rnd = (u32) ((totalram_pages ^ (totalram_pages >> 7)) ^ - (jiffies ^ (jiffies >> 6))); - setup_timer(&f->secret_timer, inet_frag_secret_rebuild, (unsigned long)f); f->secret_timer.expires = jiffies + f->secret_interval; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b66910aaef4d..2481993a4970 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -106,6 +106,7 @@ struct ip4_create_arg { static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) { + net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd)); return jhash_3words((__force u32)id << 16 | prot, (__force u32)saddr, (__force u32)daddr, ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7d8357bb2ba6..51be64e18e32 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -772,15 +772,20 @@ static inline int ip_ufo_append_data(struct sock *sk, /* initialize protocol header pointer */ skb->transport_header = skb->network_header + fragheaderlen; - skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - /* specify the length of each IP datagram fragment */ - skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + __skb_queue_tail(queue, skb); + } else if (skb_is_gso(skb)) { + goto append; } + skb->ip_summed = CHECKSUM_PARTIAL; + /* specify the length of each IP datagram fragment */ + skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + +append: return skb_append_datato_frags(sk, skb, getfrag, from, (length - transhdrlen)); } @@ -805,7 +810,7 @@ static int __ip_append_data(struct sock *sk, int copy; int err; int offset = 0; - unsigned int maxfraglen, fragheaderlen; + unsigned int maxfraglen, fragheaderlen, maxnonfragsize; int csummode = CHECKSUM_NONE; struct rtable *rt = (struct rtable *)cork->dst; @@ -818,8 +823,10 @@ static int __ip_append_data(struct sock *sk, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; + maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ? + mtu : 0xFFFF; - if (cork->length + length > 0xFFFF - fragheaderlen) { + if (cork->length + length > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu-exthdrlen); return -EMSGSIZE; @@ -1117,7 +1124,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, int mtu; int len; int err; - unsigned int maxfraglen, fragheaderlen, fraggap; + unsigned int maxfraglen, fragheaderlen, fraggap, maxnonfragsize; if (inet->hdrincl) return -EPERM; @@ -1141,8 +1148,10 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; + maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ? + mtu : 0xFFFF; - if (cork->length + size > 0xFFFF - fragheaderlen) { + if (cork->length + size > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu); return -EMSGSIZE; } diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 91f69bc883fe..5d9c845d288a 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -61,8 +61,17 @@ static int vti_rcv(struct sk_buff *skb) iph->saddr, iph->daddr, 0); if (tunnel != NULL) { struct pcpu_tstats *tstats; + u32 oldmark = skb->mark; + int ret; - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + + /* temporarily mark the skb with the tunnel o_key, to + * only match policies with this mark. + */ + skb->mark = be32_to_cpu(tunnel->parms.o_key); + ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb); + skb->mark = oldmark; + if (!ret) return -1; tstats = this_cpu_ptr(tunnel->dev->tstats); @@ -71,7 +80,6 @@ static int vti_rcv(struct sk_buff *skb) tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); - skb->mark = 0; secpath_reset(skb); skb->dev = tunnel->dev; return 1; @@ -103,7 +111,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) memset(&fl4, 0, sizeof(fl4)); flowi4_init_output(&fl4, tunnel->parms.link, - be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos), + be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPIP, 0, dst, tiph->saddr, 0, 0); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4b161d5aba0b..d5b1390eebbe 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -200,49 +200,6 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl, return ret; } -static int ipv4_tcp_mem(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) -{ - int ret; - unsigned long vec[3]; - struct net *net = current->nsproxy->net_ns; -#ifdef CONFIG_MEMCG_KMEM - struct mem_cgroup *memcg; -#endif - - struct ctl_table tmp = { - .data = &vec, - .maxlen = sizeof(vec), - .mode = ctl->mode, - }; - - if (!write) { - ctl->data = &net->ipv4.sysctl_tcp_mem; - return proc_doulongvec_minmax(ctl, write, buffer, lenp, ppos); - } - - ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos); - if (ret) - return ret; - -#ifdef CONFIG_MEMCG_KMEM - rcu_read_lock(); - memcg = mem_cgroup_from_task(current); - - tcp_prot_mem(memcg, vec[0], 0); - tcp_prot_mem(memcg, vec[1], 1); - tcp_prot_mem(memcg, vec[2], 2); - rcu_read_unlock(); -#endif - - net->ipv4.sysctl_tcp_mem[0] = vec[0]; - net->ipv4.sysctl_tcp_mem[1] = vec[1]; - net->ipv4.sysctl_tcp_mem[2] = vec[2]; - - return 0; -} - static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -557,6 +514,13 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { + .procname = "tcp_mem", + .maxlen = sizeof(sysctl_tcp_mem), + .data = &sysctl_tcp_mem, + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, + { .procname = "tcp_wmem", .data = &sysctl_tcp_wmem, .maxlen = sizeof(sysctl_tcp_wmem), @@ -865,12 +829,6 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = ipv4_local_port_range, }, - { - .procname = "tcp_mem", - .maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem), - .mode = 0644, - .proc_handler = ipv4_tcp_mem, - }, { } }; @@ -880,32 +838,15 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) table = ipv4_net_table; if (!net_eq(net, &init_net)) { + int i; + table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL); if (table == NULL) goto err_alloc; - table[0].data = - &net->ipv4.sysctl_icmp_echo_ignore_all; - table[1].data = - &net->ipv4.sysctl_icmp_echo_ignore_broadcasts; - table[2].data = - &net->ipv4.sysctl_icmp_ignore_bogus_error_responses; - table[3].data = - &net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr; - table[4].data = - &net->ipv4.sysctl_icmp_ratelimit; - table[5].data = - &net->ipv4.sysctl_icmp_ratemask; - table[6].data = - &net->ipv4.sysctl_ping_group_range; - table[7].data = - &net->ipv4.sysctl_tcp_ecn; - table[8].data = - &net->ipv4.sysctl_local_ports.range; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; + /* Update the variables to point into the current struct net */ + for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) + table[i].data += (void *)net - (void *)&init_net; } /* @@ -922,8 +863,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) net->ipv4.sysctl_local_ports.range[0] = 32768; net->ipv4.sysctl_local_ports.range[1] = 61000; - tcp_init_mem(net); - net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); if (net->ipv4.ipv4_hdr == NULL) goto err_reg; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index be4b161802e8..8e8529d3c8c9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -288,9 +288,11 @@ int sysctl_tcp_min_tso_segs __read_mostly = 2; struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); +long sysctl_tcp_mem[3] __read_mostly; int sysctl_tcp_wmem[3] __read_mostly; int sysctl_tcp_rmem[3] __read_mostly; +EXPORT_SYMBOL(sysctl_tcp_mem); EXPORT_SYMBOL(sysctl_tcp_rmem); EXPORT_SYMBOL(sysctl_tcp_wmem); @@ -3097,13 +3099,13 @@ static int __init set_thash_entries(char *str) } __setup("thash_entries=", set_thash_entries); -void tcp_init_mem(struct net *net) +static void tcp_init_mem(void) { unsigned long limit = nr_free_buffer_pages() / 8; limit = max(limit, 128UL); - net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3; - net->ipv4.sysctl_tcp_mem[1] = limit; - net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2; + sysctl_tcp_mem[0] = limit / 4 * 3; + sysctl_tcp_mem[1] = limit; + sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; } void __init tcp_init(void) @@ -3165,7 +3167,7 @@ void __init tcp_init(void) sysctl_tcp_max_orphans = cnt / 2; sysctl_max_syn_backlog = max(128, cnt / 256); - tcp_init_mem(&init_net); + tcp_init_mem(); /* Set per-socket limits to no more than 1/128 the pressure threshold */ limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); max_wshare = min(4UL*1024*1024, limit); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eb651a069a6c..b935397c703c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3338,7 +3338,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) tcp_init_cwnd_reduction(sk, true); tcp_set_ca_state(sk, TCP_CA_CWR); tcp_end_cwnd_reduction(sk); - tcp_set_ca_state(sk, TCP_CA_Open); + tcp_try_keep_open(sk); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSPROBERECOVERY); } @@ -5751,6 +5751,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } else tcp_init_metrics(sk); + tcp_update_pacing_rate(sk); + /* Prevent spurious tcp_cwnd_restart() on first data packet */ tp->lsndtime = tcp_time_stamp; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 114d1b748cbb..300ab2c93f29 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2749,6 +2749,7 @@ struct proto tcp_prot = { .orphan_count = &tcp_orphan_count, .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, + .sysctl_mem = sysctl_tcp_mem, .sysctl_wmem = sysctl_tcp_wmem, .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 559d4ae6ebf4..03e9154f7e68 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -6,15 +6,10 @@ #include <linux/memcontrol.h> #include <linux/module.h> -static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto) -{ - return container_of(cg_proto, struct tcp_memcontrol, cg_proto); -} - static void memcg_tcp_enter_memory_pressure(struct sock *sk) { if (sk->sk_cgrp->memory_pressure) - *sk->sk_cgrp->memory_pressure = 1; + sk->sk_cgrp->memory_pressure = 1; } EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); @@ -27,34 +22,24 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) */ struct res_counter *res_parent = NULL; struct cg_proto *cg_proto, *parent_cg; - struct tcp_memcontrol *tcp; struct mem_cgroup *parent = parent_mem_cgroup(memcg); - struct net *net = current->nsproxy->net_ns; cg_proto = tcp_prot.proto_cgroup(memcg); if (!cg_proto) return 0; - tcp = tcp_from_cgproto(cg_proto); - - tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0]; - tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1]; - tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2]; - tcp->tcp_memory_pressure = 0; + cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0]; + cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1]; + cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2]; + cg_proto->memory_pressure = 0; + cg_proto->memcg = memcg; parent_cg = tcp_prot.proto_cgroup(parent); if (parent_cg) - res_parent = parent_cg->memory_allocated; - - res_counter_init(&tcp->tcp_memory_allocated, res_parent); - percpu_counter_init(&tcp->tcp_sockets_allocated, 0); + res_parent = &parent_cg->memory_allocated; - cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure; - cg_proto->memory_pressure = &tcp->tcp_memory_pressure; - cg_proto->sysctl_mem = tcp->tcp_prot_mem; - cg_proto->memory_allocated = &tcp->tcp_memory_allocated; - cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated; - cg_proto->memcg = memcg; + res_counter_init(&cg_proto->memory_allocated, res_parent); + percpu_counter_init(&cg_proto->sockets_allocated, 0); return 0; } @@ -63,21 +48,17 @@ EXPORT_SYMBOL(tcp_init_cgroup); void tcp_destroy_cgroup(struct mem_cgroup *memcg) { struct cg_proto *cg_proto; - struct tcp_memcontrol *tcp; cg_proto = tcp_prot.proto_cgroup(memcg); if (!cg_proto) return; - tcp = tcp_from_cgproto(cg_proto); - percpu_counter_destroy(&tcp->tcp_sockets_allocated); + percpu_counter_destroy(&cg_proto->sockets_allocated); } EXPORT_SYMBOL(tcp_destroy_cgroup); static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) { - struct net *net = current->nsproxy->net_ns; - struct tcp_memcontrol *tcp; struct cg_proto *cg_proto; u64 old_lim; int i; @@ -90,16 +71,14 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) if (val > RES_COUNTER_MAX) val = RES_COUNTER_MAX; - tcp = tcp_from_cgproto(cg_proto); - - old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); - ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val); + old_lim = res_counter_read_u64(&cg_proto->memory_allocated, RES_LIMIT); + ret = res_counter_set_limit(&cg_proto->memory_allocated, val); if (ret) return ret; for (i = 0; i < 3; i++) - tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, - net->ipv4.sysctl_tcp_mem[i]); + cg_proto->sysctl_mem[i] = min_t(long, val >> PAGE_SHIFT, + sysctl_tcp_mem[i]); if (val == RES_COUNTER_MAX) clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); @@ -156,28 +135,24 @@ static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) { - struct tcp_memcontrol *tcp; struct cg_proto *cg_proto; cg_proto = tcp_prot.proto_cgroup(memcg); if (!cg_proto) return default_val; - tcp = tcp_from_cgproto(cg_proto); - return res_counter_read_u64(&tcp->tcp_memory_allocated, type); + return res_counter_read_u64(&cg_proto->memory_allocated, type); } static u64 tcp_read_usage(struct mem_cgroup *memcg) { - struct tcp_memcontrol *tcp; struct cg_proto *cg_proto; cg_proto = tcp_prot.proto_cgroup(memcg); if (!cg_proto) return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT; - tcp = tcp_from_cgproto(cg_proto); - return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); + return res_counter_read_u64(&cg_proto->memory_allocated, RES_USAGE); } static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) @@ -205,54 +180,25 @@ static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) { struct mem_cgroup *memcg; - struct tcp_memcontrol *tcp; struct cg_proto *cg_proto; memcg = mem_cgroup_from_css(css); cg_proto = tcp_prot.proto_cgroup(memcg); if (!cg_proto) return 0; - tcp = tcp_from_cgproto(cg_proto); switch (event) { case RES_MAX_USAGE: - res_counter_reset_max(&tcp->tcp_memory_allocated); + res_counter_reset_max(&cg_proto->memory_allocated); break; case RES_FAILCNT: - res_counter_reset_failcnt(&tcp->tcp_memory_allocated); + res_counter_reset_failcnt(&cg_proto->memory_allocated); break; } return 0; } -unsigned long long tcp_max_memory(const struct mem_cgroup *memcg) -{ - struct tcp_memcontrol *tcp; - struct cg_proto *cg_proto; - - cg_proto = tcp_prot.proto_cgroup((struct mem_cgroup *)memcg); - if (!cg_proto) - return 0; - - tcp = tcp_from_cgproto(cg_proto); - return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); -} - -void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx) -{ - struct tcp_memcontrol *tcp; - struct cg_proto *cg_proto; - - cg_proto = tcp_prot.proto_cgroup(memcg); - if (!cg_proto) - return; - - tcp = tcp_from_cgproto(cg_proto); - - tcp->tcp_prot_mem[idx] = val; -} - static struct cftype tcp_files[] = { { .name = "kmem.tcp.limit_in_bytes", diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 4a2a84110dfb..2ab09cbae74d 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -671,8 +671,9 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen; write_seqlock_bh(&fastopen_seqlock); - tfom->mss = mss; - if (cookie->len > 0) + if (mss) + tfom->mss = mss; + if (cookie && cookie->len > 0) tfom->cookie = *cookie; if (syn_lost) { ++tfom->syn_loss; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index dfc96b00673e..a7a5583eab04 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -57,6 +57,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, SKB_GSO_TCPV6 | SKB_GSO_GRE | SKB_GSO_IPIP | + SKB_GSO_SIT | SKB_GSO_MPLS | SKB_GSO_UDP_TUNNEL | 0) || diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ce7c4d9d9195..672854664ff5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -986,8 +986,10 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || !sk_can_gso(sk) || - skb->ip_summed == CHECKSUM_NONE) { + /* Make sure we own this skb before messing gso_size/gso_segs */ + WARN_ON_ONCE(skb_cloned(skb)); + + if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal * non-TSO case. */ @@ -1067,9 +1069,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, if (nsize < 0) nsize = 0; - if (skb_cloned(skb) && - skb_is_nonlinear(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; /* Get a new skb... force flag on. */ @@ -2344,6 +2344,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) int oldpcount = tcp_skb_pcount(skb); if (unlikely(oldpcount > 1)) { + if (skb_unclone(skb, GFP_ATOMIC)) + return -ENOMEM; tcp_init_tso_segs(sk, skb, cur_mss); tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index af07b5b23ebf..64f0354c84c7 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -156,12 +156,16 @@ static bool retransmits_timed_out(struct sock *sk, static int tcp_write_timeout(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); int retry_until; bool do_reset, syn_set = false; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { - if (icsk->icsk_retransmits) + if (icsk->icsk_retransmits) { dst_negative_advice(sk); + if (tp->syn_fastopen || tp->syn_data) + tcp_fastopen_cache_set(sk, 0, NULL, true); + } retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; syn_set = true; } else { diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 9a459be24af7..ccde54248c8c 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -107,6 +107,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) memset(fl4, 0, sizeof(struct flowi4)); fl4->flowi4_mark = skb->mark; + fl4->flowi4_oif = skb_dst(skb)->dev->ifindex; if (!ip_is_fragment(iph)) { switch (iph->protocol) { |