summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/devinet.c10
-rw-r--r--net/ipv4/fib_frontend.c20
-rw-r--r--net/ipv4/fib_rules.c2
-rw-r--r--net/ipv4/fib_trie.c8
-rw-r--r--net/ipv4/inet_connection_sock.c7
-rw-r--r--net/ipv4/inetpeer.c7
-rw-r--r--net/ipv4/ip_output.c10
-rw-r--r--net/ipv4/ipmr.c14
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c14
-rw-r--r--net/ipv4/raw.c14
-rw-r--r--net/ipv4/route.c54
-rw-r--r--net/ipv4/tcp.c27
-rw-r--r--net/ipv4/tcp_cong.c3
-rw-r--r--net/ipv4/tcp_input.c24
-rw-r--r--net/ipv4/tcp_ipv4.c23
-rw-r--r--net/ipv4/tcp_metrics.c12
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c37
-rw-r--r--net/ipv4/tcp_timer.c6
-rw-r--r--net/ipv4/udp.c7
21 files changed, 171 insertions, 132 deletions
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 77e87aff419a..47800459e4cb 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1225,7 +1225,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event,
switch (event) {
case NETDEV_CHANGEADDR:
neigh_changeaddr(&arp_tbl, dev);
- rt_cache_flush(dev_net(dev), 0);
+ rt_cache_flush(dev_net(dev));
break;
default:
break;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 44bf82e3aef7..e12fad773852 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -725,7 +725,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
break;
case SIOCSIFFLAGS:
- ret = -EACCES;
+ ret = -EPERM;
if (!capable(CAP_NET_ADMIN))
goto out;
break;
@@ -733,7 +733,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCSIFBRDADDR: /* Set the broadcast address */
case SIOCSIFDSTADDR: /* Set the destination address */
case SIOCSIFNETMASK: /* Set the netmask for the interface */
- ret = -EACCES;
+ ret = -EPERM;
if (!capable(CAP_NET_ADMIN))
goto out;
ret = -EINVAL;
@@ -1503,7 +1503,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
if ((new_value == 0) && (old_value != 0))
- rt_cache_flush(net, 0);
+ rt_cache_flush(net);
}
return ret;
@@ -1537,7 +1537,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
dev_disable_lro(idev->dev);
}
rtnl_unlock();
- rt_cache_flush(net, 0);
+ rt_cache_flush(net);
}
}
@@ -1554,7 +1554,7 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write,
struct net *net = ctl->extra2;
if (write && *valp != val)
- rt_cache_flush(net, 0);
+ rt_cache_flush(net);
return ret;
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c43ae3fba792..8e2b475da9fa 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -148,7 +148,7 @@ static void fib_flush(struct net *net)
}
if (flushed)
- rt_cache_flush(net, -1);
+ rt_cache_flush(net);
}
/*
@@ -999,11 +999,11 @@ static void nl_fib_lookup_exit(struct net *net)
net->ipv4.fibnl = NULL;
}
-static void fib_disable_ip(struct net_device *dev, int force, int delay)
+static void fib_disable_ip(struct net_device *dev, int force)
{
if (fib_sync_down_dev(dev, force))
fib_flush(dev_net(dev));
- rt_cache_flush(dev_net(dev), delay);
+ rt_cache_flush(dev_net(dev));
arp_ifdown(dev);
}
@@ -1020,7 +1020,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
fib_sync_up(dev);
#endif
atomic_inc(&net->ipv4.dev_addr_genid);
- rt_cache_flush(dev_net(dev), -1);
+ rt_cache_flush(dev_net(dev));
break;
case NETDEV_DOWN:
fib_del_ifaddr(ifa, NULL);
@@ -1029,9 +1029,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
/* Last address was deleted from this interface.
* Disable IP.
*/
- fib_disable_ip(dev, 1, 0);
+ fib_disable_ip(dev, 1);
} else {
- rt_cache_flush(dev_net(dev), -1);
+ rt_cache_flush(dev_net(dev));
}
break;
}
@@ -1045,7 +1045,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
struct net *net = dev_net(dev);
if (event == NETDEV_UNREGISTER) {
- fib_disable_ip(dev, 2, -1);
+ fib_disable_ip(dev, 2);
rt_flush_dev(dev);
return NOTIFY_DONE;
}
@@ -1062,14 +1062,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
fib_sync_up(dev);
#endif
atomic_inc(&net->ipv4.dev_addr_genid);
- rt_cache_flush(dev_net(dev), -1);
+ rt_cache_flush(dev_net(dev));
break;
case NETDEV_DOWN:
- fib_disable_ip(dev, 0, 0);
+ fib_disable_ip(dev, 0);
break;
case NETDEV_CHANGEMTU:
case NETDEV_CHANGE:
- rt_cache_flush(dev_net(dev), 0);
+ rt_cache_flush(dev_net(dev));
break;
case NETDEV_UNREGISTER_BATCH:
break;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index a83d74e498d2..274309d3aded 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -259,7 +259,7 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
{
- rt_cache_flush(ops->fro_net, -1);
+ rt_cache_flush(ops->fro_net);
}
static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f0cdb30921c0..d1b93595b4a7 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -367,7 +367,7 @@ static void __leaf_free_rcu(struct rcu_head *head)
static inline void free_leaf(struct leaf *l)
{
- call_rcu_bh(&l->rcu, __leaf_free_rcu);
+ call_rcu(&l->rcu, __leaf_free_rcu);
}
static inline void free_leaf_info(struct leaf_info *leaf)
@@ -1286,7 +1286,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
fib_release_info(fi_drop);
if (state & FA_S_ACCESSED)
- rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
@@ -1333,7 +1333,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
list_add_tail_rcu(&new_fa->fa_list,
(fa ? &fa->fa_list : fa_head));
- rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
succeeded:
@@ -1708,7 +1708,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
trie_leaf_remove(t, l);
if (fa->fa_state & FA_S_ACCESSED)
- rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index db0cf17c00f7..7f75f21d7b83 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -404,12 +404,15 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct inet_sock *newinet = inet_sk(newsk);
- struct ip_options_rcu *opt = ireq->opt;
+ struct ip_options_rcu *opt;
struct net *net = sock_net(sk);
struct flowi4 *fl4;
struct rtable *rt;
fl4 = &newinet->cork.fl.u.ip4;
+
+ rcu_read_lock();
+ opt = rcu_dereference(newinet->inet_opt);
flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -421,11 +424,13 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
goto no_route;
if (opt && opt->opt.is_strictroute && rt->rt_gateway)
goto route_err;
+ rcu_read_unlock();
return &rt->dst;
route_err:
ip_rt_put(rt);
no_route:
+ rcu_read_unlock();
IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index e1e0a4e8fd34..000e3d239d64 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -194,7 +194,7 @@ void __init inet_initpeers(void)
0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
NULL);
- INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker);
+ INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker);
}
static int addr_compare(const struct inetpeer_addr *a,
@@ -510,7 +510,10 @@ relookup:
secure_ipv6_id(daddr->addr.a6));
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
p->rate_tokens = 0;
- p->rate_last = 0;
+ /* 60*HZ is arbitrary, but chosen enough high so that the first
+ * calculation of tokens is at its maximum.
+ */
+ p->rate_last = jiffies - 60*HZ;
INIT_LIST_HEAD(&p->gc_list);
/* Link the node. */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ba39a52d18c1..c196d749daf2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -197,7 +197,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
- if (neigh) {
+ if (!IS_ERR(neigh)) {
int res = dst_neigh_output(dst, neigh, skb);
rcu_read_unlock_bh();
@@ -1338,10 +1338,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
iph->ihl = 5;
iph->tos = inet->tos;
iph->frag_off = df;
- ip_select_ident(iph, &rt->dst, sk);
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
ip_copy_addrs(iph, fl4);
+ ip_select_ident(iph, &rt->dst, sk);
if (opt) {
iph->ihl += opt->optlen>>2;
@@ -1366,9 +1366,8 @@ out:
return skb;
}
-int ip_send_skb(struct sk_buff *skb)
+int ip_send_skb(struct net *net, struct sk_buff *skb)
{
- struct net *net = sock_net(skb->sk);
int err;
err = ip_local_out(skb);
@@ -1391,7 +1390,7 @@ int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4)
return 0;
/* Netfilter gets whole the not fragmented skb. */
- return ip_send_skb(skb);
+ return ip_send_skb(sock_net(sk), skb);
}
/*
@@ -1536,6 +1535,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
arg->csumoffset) = csum_fold(csum_add(nskb->csum,
arg->csum));
nskb->ip_summed = CHECKSUM_NONE;
+ skb_orphan(nskb);
skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
ip_push_pending_frames(sk, &fl4);
}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8eec8f4a0536..ebdf06f938bf 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -124,6 +124,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
static struct kmem_cache *mrt_cachep __read_mostly;
static struct mr_table *ipmr_new_table(struct net *net, u32 id);
+static void ipmr_free_table(struct mr_table *mrt);
+
static int ip_mr_forward(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc_cache *cache,
int local);
@@ -131,6 +133,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert);
static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
struct mfc_cache *c, struct rtmsg *rtm);
+static void mroute_clean_tables(struct mr_table *mrt);
static void ipmr_expire_process(unsigned long arg);
#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -271,7 +274,7 @@ static void __net_exit ipmr_rules_exit(struct net *net)
list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
list_del(&mrt->list);
- kfree(mrt);
+ ipmr_free_table(mrt);
}
fib_rules_unregister(net->ipv4.mr_rules_ops);
}
@@ -299,7 +302,7 @@ static int __net_init ipmr_rules_init(struct net *net)
static void __net_exit ipmr_rules_exit(struct net *net)
{
- kfree(net->ipv4.mrt);
+ ipmr_free_table(net->ipv4.mrt);
}
#endif
@@ -336,6 +339,13 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
return mrt;
}
+static void ipmr_free_table(struct mr_table *mrt)
+{
+ del_timer_sync(&mrt->ipmr_expire_timer);
+ mroute_clean_tables(mrt);
+ kfree(mrt);
+}
+
/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index ea4a23813d26..9c87cde28ff8 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -148,7 +148,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
hdr, NULL, &matchoff, &matchlen,
&addr, &port) > 0) {
- unsigned int matchend, poff, plen, buflen, n;
+ unsigned int olen, matchend, poff, plen, buflen, n;
char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
/* We're only interested in headers related to this
@@ -163,17 +163,18 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
goto next;
}
+ olen = *datalen;
if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
&addr, port))
return NF_DROP;
- matchend = matchoff + matchlen;
+ matchend = matchoff + matchlen + *datalen - olen;
/* The maddr= parameter (RFC 2361) specifies where to send
* the reply. */
if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
"maddr=", &poff, &plen,
- &addr) > 0 &&
+ &addr, true) > 0 &&
addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
buflen = sprintf(buffer, "%pI4",
@@ -187,7 +188,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
* from which the server received the request. */
if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
"received=", &poff, &plen,
- &addr) > 0 &&
+ &addr, false) > 0 &&
addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
buflen = sprintf(buffer, "%pI4",
@@ -501,7 +502,10 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
ret = nf_ct_expect_related(rtcp_exp);
if (ret == 0)
break;
- else if (ret != -EBUSY) {
+ else if (ret == -EBUSY) {
+ nf_ct_unexpect_related(rtp_exp);
+ continue;
+ } else if (ret < 0) {
nf_ct_unexpect_related(rtp_exp);
port = 0;
break;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index f2425785d40a..73d1e4df4bf6 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -131,18 +131,20 @@ found:
* 0 - deliver
* 1 - block
*/
-static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
+static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
{
- int type;
+ struct icmphdr _hdr;
+ const struct icmphdr *hdr;
- if (!pskb_may_pull(skb, sizeof(struct icmphdr)))
+ hdr = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_hdr), &_hdr);
+ if (!hdr)
return 1;
- type = icmp_hdr(skb)->type;
- if (type < 32) {
+ if (hdr->type < 32) {
__u32 data = raw_sk(sk)->filter.data;
- return ((1 << type) & data) != 0;
+ return ((1U << hdr->type) & data) != 0;
}
/* Do not block unknown ICMP types */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c035251beb07..fd9af60397b5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -70,7 +70,6 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/bootmem.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
@@ -80,7 +79,6 @@
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
-#include <linux/workqueue.h>
#include <linux/skbuff.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
@@ -88,11 +86,9 @@
#include <linux/mroute.h>
#include <linux/netfilter_ipv4.h>
#include <linux/random.h>
-#include <linux/jhash.h>
#include <linux/rcupdate.h>
#include <linux/times.h>
#include <linux/slab.h>
-#include <linux/prefetch.h>
#include <net/dst.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
@@ -206,11 +202,6 @@ EXPORT_SYMBOL(ip_tos2prio);
static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
-static inline int rt_genid(struct net *net)
-{
- return atomic_read(&net->ipv4.rt_genid);
-}
-
#ifdef CONFIG_PROC_FS
static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
{
@@ -451,27 +442,9 @@ static inline bool rt_is_expired(const struct rtable *rth)
return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
}
-/*
- * Perturbation of rt_genid by a small quantity [1..256]
- * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
- * many times (2^24) without giving recent rt_genid.
- * Jenkins hash is strong enough that litle changes of rt_genid are OK.
- */
-static void rt_cache_invalidate(struct net *net)
-{
- unsigned char shuffle;
-
- get_random_bytes(&shuffle, sizeof(shuffle));
- atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
-}
-
-/*
- * delay < 0 : invalidate cache (fast : entries will be deleted later)
- * delay >= 0 : invalidate & flush cache (can be long)
- */
-void rt_cache_flush(struct net *net, int delay)
+void rt_cache_flush(struct net *net)
{
- rt_cache_invalidate(net);
+ rt_genid_bump(net);
}
static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
@@ -938,12 +911,14 @@ static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
if (mtu < ip_rt_min_pmtu)
mtu = ip_rt_min_pmtu;
+ rcu_read_lock();
if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) {
struct fib_nh *nh = &FIB_RES_NH(res);
update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
jiffies + ip_rt_mtu_expires);
}
+ rcu_read_unlock();
return mtu;
}
@@ -960,7 +935,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
dst->obsolete = DST_OBSOLETE_KILL;
} else {
rt->rt_pmtu = mtu;
- dst_set_expires(&rt->dst, ip_rt_mtu_expires);
+ rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires);
}
}
@@ -1267,7 +1242,7 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
{
struct rtable *rt = (struct rtable *) dst;
- if (dst->flags & DST_NOCACHE) {
+ if (!list_empty(&rt->rt_uncached)) {
spin_lock_bh(&rt_uncached_lock);
list_del(&rt->rt_uncached);
spin_unlock_bh(&rt_uncached_lock);
@@ -2032,7 +2007,6 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
}
dev_out = net->loopback_dev;
fl4->flowi4_oif = dev_out->ifindex;
- res.fi = NULL;
flags |= RTCF_LOCAL;
goto make_route;
}
@@ -2348,7 +2322,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
void ip_rt_multicast_event(struct in_device *in_dev)
{
- rt_cache_flush(dev_net(in_dev->dev), 0);
+ rt_cache_flush(dev_net(in_dev->dev));
}
#ifdef CONFIG_SYSCTL
@@ -2357,16 +2331,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
size_t *lenp, loff_t *ppos)
{
if (write) {
- int flush_delay;
- ctl_table ctl;
- struct net *net;
-
- memcpy(&ctl, __ctl, sizeof(ctl));
- ctl.data = &flush_delay;
- proc_dointvec(&ctl, write, buffer, lenp, ppos);
-
- net = (struct net *)__ctl->extra1;
- rt_cache_flush(net, flush_delay);
+ rt_cache_flush((struct net *)__ctl->extra1);
return 0;
}
@@ -2536,8 +2501,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
static __net_init int rt_genid_init(struct net *net)
{
- get_random_bytes(&net->ipv4.rt_genid,
- sizeof(net->ipv4.rt_genid));
+ atomic_set(&net->rt_genid, 0);
get_random_bytes(&net->ipv4.dev_addr_genid,
sizeof(net->ipv4.dev_addr_genid));
return 0;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e7e6eeae49c0..5f6419341821 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -811,7 +811,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
old_size_goal + mss_now > xmit_size_goal)) {
xmit_size_goal = old_size_goal;
} else {
- tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
+ tp->xmit_size_goal_segs =
+ min_t(u16, xmit_size_goal / mss_now,
+ sk->sk_gso_max_segs);
xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
}
}
@@ -1760,8 +1762,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
}
#ifdef CONFIG_NET_DMA
- if (tp->ucopy.dma_chan)
- dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+ if (tp->ucopy.dma_chan) {
+ if (tp->rcv_wnd == 0 &&
+ !skb_queue_empty(&sk->sk_async_wait_queue)) {
+ tcp_service_net_dma(sk, true);
+ tcp_cleanup_rbuf(sk, copied);
+ } else
+ dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+ }
#endif
if (copied >= target) {
/* Do not sleep, just process backlog. */
@@ -2323,10 +2331,17 @@ static int tcp_repair_options_est(struct tcp_sock *tp,
tp->rx_opt.mss_clamp = opt.opt_val;
break;
case TCPOPT_WINDOW:
- if (opt.opt_val > 14)
- return -EFBIG;
+ {
+ u16 snd_wscale = opt.opt_val & 0xFFFF;
+ u16 rcv_wscale = opt.opt_val >> 16;
+
+ if (snd_wscale > 14 || rcv_wscale > 14)
+ return -EFBIG;
- tp->rx_opt.snd_wscale = opt.opt_val;
+ tp->rx_opt.snd_wscale = snd_wscale;
+ tp->rx_opt.rcv_wscale = rcv_wscale;
+ tp->rx_opt.wscale_ok = 1;
+ }
break;
case TCPOPT_SACK_PERM:
if (opt.opt_val != 0)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 4d4db16e336e..1432cdb0644c 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -291,7 +291,8 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
left = tp->snd_cwnd - in_flight;
if (sk_can_gso(sk) &&
left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
- left * tp->mss_cache < sk->sk_gso_max_size)
+ left * tp->mss_cache < sk->sk_gso_max_size &&
+ left < sk->sk_gso_max_segs)
return true;
return left <= tcp_max_tso_deferred_mss(tp);
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2fd2bc9e3c64..d377f4854cb8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2926,13 +2926,14 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
- int newly_acked_sacked, bool is_dupack,
+ int prior_sacked, bool is_dupack,
int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
(tcp_fackets_out(tp) > tp->reordering));
+ int newly_acked_sacked = 0;
int fast_rexmit = 0;
if (WARN_ON(!tp->packets_out && tp->sacked_out))
@@ -2992,6 +2993,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
tcp_add_reno_sack(sk);
} else
do_lost = tcp_try_undo_partial(sk, pkts_acked);
+ newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
break;
case TCP_CA_Loss:
if (flag & FLAG_DATA_ACKED)
@@ -3013,6 +3015,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
if (is_dupack)
tcp_add_reno_sack(sk);
}
+ newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
if (icsk->icsk_ca_state <= TCP_CA_Disorder)
tcp_try_undo_dsack(sk);
@@ -3590,7 +3593,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
int prior_packets;
int prior_sacked = tp->sacked_out;
int pkts_acked = 0;
- int newly_acked_sacked = 0;
bool frto_cwnd = false;
/* If the ack is older than previous acks
@@ -3666,8 +3668,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
pkts_acked = prior_packets - tp->packets_out;
- newly_acked_sacked = (prior_packets - prior_sacked) -
- (tp->packets_out - tp->sacked_out);
if (tp->frto_counter)
frto_cwnd = tcp_process_frto(sk, flag);
@@ -3681,7 +3681,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_may_raise_cwnd(sk, flag))
tcp_cong_avoid(sk, ack, prior_in_flight);
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+ tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
is_dupack, flag);
} else {
if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
@@ -3698,7 +3698,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK)
- tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+ tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
is_dupack, flag);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
@@ -3718,8 +3718,7 @@ old_ack:
*/
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
- newly_acked_sacked = tp->sacked_out - prior_sacked;
- tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+ tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
is_dupack, flag);
}
@@ -4662,7 +4661,7 @@ queue_and_out:
if (eaten > 0)
kfree_skb_partial(skb, fragstolen);
- else if (!sock_flag(sk, SOCK_DEAD))
+ if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk, 0);
return;
}
@@ -5392,6 +5391,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
{
struct tcp_sock *tp = tcp_sk(sk);
+ if (unlikely(sk->sk_rx_dst == NULL))
+ inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
/*
* Header prediction.
* The code loosely follows the one in the famous
@@ -5555,8 +5556,7 @@ no_ack:
#endif
if (eaten)
kfree_skb_partial(skb, fragstolen);
- else
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk, 0);
return 0;
}
}
@@ -5605,7 +5605,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
tcp_set_state(sk, TCP_ESTABLISHED);
if (skb != NULL) {
- inet_sk_rx_dst_set(sk, skb);
+ icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
security_inet_conn_established(sk, skb);
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 642be8a4c6a3..be23a0b7b89e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -417,10 +417,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
tp->mtu_info = info;
- if (!sock_owned_by_user(sk))
+ if (!sock_owned_by_user(sk)) {
tcp_v4_mtu_reduced(sk);
- else
- set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags);
+ } else {
+ if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
+ sock_hold(sk);
+ }
goto out;
}
@@ -1462,6 +1464,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
goto exit_nonewsk;
newsk->sk_gso_type = SKB_GSO_TCPV4;
+ inet_sk_rx_dst_set(newsk, skb);
newtp = tcp_sk(newsk);
newinet = inet_sk(newsk);
@@ -1627,9 +1630,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
sk->sk_rx_dst = NULL;
}
}
- if (unlikely(sk->sk_rx_dst == NULL))
- inet_sk_rx_dst_set(sk, skb);
-
if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
goto reset;
@@ -1872,10 +1872,21 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = {
.twsk_destructor= tcp_twsk_destructor,
};
+void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+
+ dst_hold(dst);
+ sk->sk_rx_dst = dst;
+ inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+}
+EXPORT_SYMBOL(inet_sk_rx_dst_set);
+
const struct inet_connection_sock_af_ops ipv4_specific = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
.rebuild_header = inet_sk_rebuild_header,
+ .sk_rx_dst_set = inet_sk_rx_dst_set,
.conn_request = tcp_v4_conn_request,
.syn_recv_sock = tcp_v4_syn_recv_sock,
.net_header_len = sizeof(struct iphdr),
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 2288a6399e1e..0abe67bb4d3a 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -731,6 +731,18 @@ static int __net_init tcp_net_metrics_init(struct net *net)
static void __net_exit tcp_net_metrics_exit(struct net *net)
{
+ unsigned int i;
+
+ for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) {
+ struct tcp_metrics_block *tm, *next;
+
+ tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1);
+ while (tm) {
+ next = rcu_dereference_protected(tm->tcpm_next, 1);
+ kfree(tm);
+ tm = next;
+ }
+ }
kfree(net->ipv4.tcp_metrics_hash);
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 232a90c3ec86..6ff7f10dce9d 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -387,8 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
struct tcp_sock *oldtp = tcp_sk(sk);
struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
- inet_sk_rx_dst_set(newsk, skb);
-
/* TCP Cookie Transactions require space for the cookie pair,
* as it differs for each connection. There is no need to
* copy any s_data_payload stored at the original socket.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3f1bcff0b10b..d04632673a9e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -910,14 +910,18 @@ void tcp_release_cb(struct sock *sk)
if (flags & (1UL << TCP_TSQ_DEFERRED))
tcp_tsq_handler(sk);
- if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED))
+ if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
tcp_write_timer_handler(sk);
-
- if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED))
+ __sock_put(sk);
+ }
+ if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
tcp_delack_timer_handler(sk);
-
- if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED))
+ __sock_put(sk);
+ }
+ if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
sk->sk_prot->mtu_reduced(sk);
+ __sock_put(sk);
+ }
}
EXPORT_SYMBOL(tcp_release_cb);
@@ -940,7 +944,7 @@ void __init tcp_tasklet_init(void)
* We cant xmit new skbs from this context, as we might already
* hold qdisc lock.
*/
-void tcp_wfree(struct sk_buff *skb)
+static void tcp_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
struct tcp_sock *tp = tcp_sk(sk);
@@ -1522,21 +1526,21 @@ static void tcp_cwnd_validate(struct sock *sk)
* when we would be allowed to send the split-due-to-Nagle skb fully.
*/
static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
- unsigned int mss_now, unsigned int cwnd)
+ unsigned int mss_now, unsigned int max_segs)
{
const struct tcp_sock *tp = tcp_sk(sk);
- u32 needed, window, cwnd_len;
+ u32 needed, window, max_len;
window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
- cwnd_len = mss_now * cwnd;
+ max_len = mss_now * max_segs;
- if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
- return cwnd_len;
+ if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
+ return max_len;
needed = min(skb->len, window);
- if (cwnd_len <= needed)
- return cwnd_len;
+ if (max_len <= needed)
+ return max_len;
return needed - needed % mss_now;
}
@@ -1765,7 +1769,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
limit = min(send_win, cong_win);
/* If a full-sized TSO skb can be sent, do it. */
- if (limit >= sk->sk_gso_max_size)
+ if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
+ sk->sk_gso_max_segs * tp->mss_cache))
goto send_now;
/* Middle in queue won't get any more data, full sendable already? */
@@ -1999,7 +2004,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
- cwnd_quota);
+ min_t(unsigned int,
+ cwnd_quota,
+ sk->sk_gso_max_segs));
if (skb->len > limit &&
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6df36ad55a38..b774a03bd1dc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -252,7 +252,8 @@ static void tcp_delack_timer(unsigned long data)
inet_csk(sk)->icsk_ack.blocked = 1;
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
/* deleguate our work to tcp_release_cb() */
- set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags);
+ if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
+ sock_hold(sk);
}
bh_unlock_sock(sk);
sock_put(sk);
@@ -481,7 +482,8 @@ static void tcp_write_timer(unsigned long data)
tcp_write_timer_handler(sk);
} else {
/* deleguate our work to tcp_release_cb() */
- set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags);
+ if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
+ sock_hold(sk);
}
bh_unlock_sock(sk);
sock_put(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 53b89817c008..79c8dbe59b54 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -758,7 +758,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
uh->check = CSUM_MANGLED_0;
send:
- err = ip_send_skb(skb);
+ err = ip_send_skb(sock_net(sk), skb);
if (err) {
if (err == -ENOBUFS && !inet->recverr) {
UDP_INC_STATS_USER(sock_net(sk),
@@ -1226,6 +1226,11 @@ try_again:
if (unlikely(err)) {
trace_kfree_skb(skb, udp_recvmsg);
+ if (!peeked) {
+ atomic_inc(&sk->sk_drops);
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INERRORS, is_udplite);
+ }
goto out_free;
}