summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c21
-rw-r--r--net/core/dev.c255
-rw-r--r--net/core/fib_rules.c2
-rw-r--r--net/core/filter.c7
-rw-r--r--net/core/net_namespace.c95
-rw-r--r--net/core/netpoll.c26
-rw-r--r--net/core/rtnetlink.c19
-rw-r--r--net/core/skbuff.c29
-rw-r--r--net/core/sock.c63
-rw-r--r--net/core/stream.c22
10 files changed, 312 insertions, 227 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 2dccd4ee591b..e0097531417a 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -86,7 +86,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
int error;
DEFINE_WAIT_FUNC(wait, receiver_wake_function);
- prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
/* Socket errors? */
error = sock_error(sk);
@@ -115,7 +115,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
error = 0;
*timeo_p = schedule_timeout(*timeo_p);
out:
- finish_wait(sk->sk_sleep, &wait);
+ finish_wait(sk_sleep(sk), &wait);
return error;
interrupted:
error = sock_intr_errno(*timeo_p);
@@ -229,9 +229,18 @@ EXPORT_SYMBOL(skb_free_datagram);
void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
{
- lock_sock(sk);
- skb_free_datagram(sk, skb);
- release_sock(sk);
+ if (likely(atomic_read(&skb->users) == 1))
+ smp_rmb();
+ else if (likely(!atomic_dec_and_test(&skb->users)))
+ return;
+
+ lock_sock_bh(sk);
+ skb_orphan(skb);
+ sk_mem_reclaim_partial(sk);
+ unlock_sock_bh(sk);
+
+ /* skb is now orphaned, can be freed outside of locked section */
+ __kfree_skb(skb);
}
EXPORT_SYMBOL(skb_free_datagram_locked);
@@ -726,7 +735,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
struct sock *sk = sock->sk;
unsigned int mask;
- sock_poll_wait(file, sk->sk_sleep, wait);
+ sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
/* exceptional events? */
diff --git a/net/core/dev.c b/net/core/dev.c
index b31d5d69a467..32611c8f1219 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1557,8 +1557,9 @@ static inline void __netif_reschedule(struct Qdisc *q)
local_irq_save(flags);
sd = &__get_cpu_var(softnet_data);
- q->next_sched = sd->output_queue;
- sd->output_queue = q;
+ q->next_sched = NULL;
+ *sd->output_queue_tailp = q;
+ sd->output_queue_tailp = &q->next_sched;
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
}
@@ -1902,13 +1903,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
if (!list_empty(&ptype_all))
dev_queue_xmit_nit(skb, dev);
- if (netif_needs_gso(dev, skb)) {
- if (unlikely(dev_gso_segment(skb)))
- goto out_kfree_skb;
- if (skb->next)
- goto gso;
- }
-
/*
* If device doesnt need skb->dst, release it right now while
* its hot in this cpu cache
@@ -1917,6 +1911,14 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
skb_dst_drop(skb);
skb_orphan_try(skb);
+
+ if (netif_needs_gso(dev, skb)) {
+ if (unlikely(dev_gso_segment(skb)))
+ goto out_kfree_skb;
+ if (skb->next)
+ goto gso;
+ }
+
rc = ops->ndo_start_xmit(skb, dev);
if (rc == NETDEV_TX_OK)
txq_trans_update(txq);
@@ -1937,7 +1939,6 @@ gso:
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(nskb);
- skb_orphan_try(nskb);
rc = ops->ndo_start_xmit(nskb, dev);
if (unlikely(rc != NETDEV_TX_OK)) {
if (rc & ~NETDEV_TX_MASK)
@@ -2015,8 +2016,12 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
if (dev->real_num_tx_queues > 1)
queue_index = skb_tx_hash(dev, skb);
- if (sk && rcu_dereference_check(sk->sk_dst_cache, 1))
- sk_tx_queue_set(sk, queue_index);
+ if (sk) {
+ struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
+
+ if (dst && skb_dst(skb) == dst)
+ sk_tx_queue_set(sk, queue_index);
+ }
}
}
@@ -2200,7 +2205,13 @@ int netdev_max_backlog __read_mostly = 1000;
int netdev_budget __read_mostly = 300;
int weight_p __read_mostly = 64; /* old backlog weight */
-DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
+/* Called with irq disabled */
+static inline void ____napi_schedule(struct softnet_data *sd,
+ struct napi_struct *napi)
+{
+ list_add_tail(&napi->poll_list, &sd->poll_list);
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+}
#ifdef CONFIG_RPS
@@ -2225,7 +2236,11 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
int cpu = -1;
u8 ip_proto;
u16 tcpu;
- u32 addr1, addr2, ports, ihl;
+ u32 addr1, addr2, ihl;
+ union {
+ u32 v32;
+ u16 v16[2];
+ } ports;
if (skb_rx_queue_recorded(skb)) {
u16 index = skb_get_rx_queue(skb);
@@ -2271,7 +2286,6 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
default:
goto done;
}
- ports = 0;
switch (ip_proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
@@ -2281,25 +2295,20 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
case IPPROTO_SCTP:
case IPPROTO_UDPLITE:
if (pskb_may_pull(skb, (ihl * 4) + 4)) {
- __be16 *hports = (__be16 *) (skb->data + (ihl * 4));
- u32 sport, dport;
-
- sport = (__force u16) hports[0];
- dport = (__force u16) hports[1];
- if (dport < sport)
- swap(sport, dport);
- ports = (sport << 16) + dport;
+ ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
+ if (ports.v16[1] < ports.v16[0])
+ swap(ports.v16[0], ports.v16[1]);
+ break;
}
- break;
-
default:
+ ports.v32 = 0;
break;
}
/* get a consistent hash (same value on both flow directions) */
if (addr2 < addr1)
swap(addr1, addr2);
- skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd);
+ skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
if (!skb->rxhash)
skb->rxhash = 1;
@@ -2362,8 +2371,8 @@ static void rps_trigger_softirq(void *data)
{
struct softnet_data *sd = data;
- __napi_schedule(&sd->backlog);
- __get_cpu_var(netdev_rx_stat).received_rps++;
+ ____napi_schedule(sd, &sd->backlog);
+ sd->received_rps++;
}
#endif /* CONFIG_RPS */
@@ -2402,15 +2411,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
sd = &per_cpu(softnet_data, cpu);
local_irq_save(flags);
- __get_cpu_var(netdev_rx_stat).total++;
rps_lock(sd);
- if (sd->input_pkt_queue.qlen <= netdev_max_backlog) {
- if (sd->input_pkt_queue.qlen) {
+ if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
+ if (skb_queue_len(&sd->input_pkt_queue)) {
enqueue:
__skb_queue_tail(&sd->input_pkt_queue, skb);
#ifdef CONFIG_RPS
- *qtail = sd->input_queue_head + sd->input_pkt_queue.qlen;
+ *qtail = sd->input_queue_head +
+ skb_queue_len(&sd->input_pkt_queue);
#endif
rps_unlock(sd);
local_irq_restore(flags);
@@ -2420,14 +2429,14 @@ enqueue:
/* Schedule NAPI for backlog device */
if (napi_schedule_prep(&sd->backlog)) {
if (!rps_ipi_queued(sd))
- __napi_schedule(&sd->backlog);
+ ____napi_schedule(sd, &sd->backlog);
}
goto enqueue;
}
+ sd->dropped++;
rps_unlock(sd);
- __get_cpu_var(netdev_rx_stat).dropped++;
local_irq_restore(flags);
kfree_skb(skb);
@@ -2527,6 +2536,7 @@ static void net_tx_action(struct softirq_action *h)
local_irq_disable();
head = sd->output_queue;
sd->output_queue = NULL;
+ sd->output_queue_tailp = &sd->output_queue;
local_irq_enable();
while (head) {
@@ -2801,7 +2811,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
skb->dev = master;
}
- __get_cpu_var(netdev_rx_stat).total++;
+ __get_cpu_var(softnet_data).processed++;
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
@@ -2930,13 +2940,21 @@ static void flush_backlog(void *arg)
struct sk_buff *skb, *tmp;
rps_lock(sd);
- skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp)
+ skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
if (skb->dev == dev) {
__skb_unlink(skb, &sd->input_pkt_queue);
kfree_skb(skb);
- input_queue_head_incr(sd);
+ input_queue_head_add(sd, 1);
}
+ }
rps_unlock(sd);
+
+ skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
+ if (skb->dev == dev) {
+ __skb_unlink(skb, &sd->process_queue);
+ kfree_skb(skb);
+ }
+ }
}
static int napi_gro_complete(struct sk_buff *skb)
@@ -3239,30 +3257,85 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
}
EXPORT_SYMBOL(napi_gro_frags);
+/*
+ * net_rps_action sends any pending IPI's for rps.
+ * Note: called with local irq disabled, but exits with local irq enabled.
+ */
+static void net_rps_action_and_irq_enable(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+ struct softnet_data *remsd = sd->rps_ipi_list;
+
+ if (remsd) {
+ sd->rps_ipi_list = NULL;
+
+ local_irq_enable();
+
+ /* Send pending IPI's to kick RPS processing on remote cpus. */
+ while (remsd) {
+ struct softnet_data *next = remsd->rps_ipi_next;
+
+ if (cpu_online(remsd->cpu))
+ __smp_call_function_single(remsd->cpu,
+ &remsd->csd, 0);
+ remsd = next;
+ }
+ } else
+#endif
+ local_irq_enable();
+}
+
static int process_backlog(struct napi_struct *napi, int quota)
{
int work = 0;
- struct softnet_data *sd = &__get_cpu_var(softnet_data);
+ struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
+#ifdef CONFIG_RPS
+ /* Check if we have pending ipi, its better to send them now,
+ * not waiting net_rx_action() end.
+ */
+ if (sd->rps_ipi_list) {
+ local_irq_disable();
+ net_rps_action_and_irq_enable(sd);
+ }
+#endif
napi->weight = weight_p;
- do {
+ local_irq_disable();
+ while (work < quota) {
struct sk_buff *skb;
+ unsigned int qlen;
- local_irq_disable();
- rps_lock(sd);
- skb = __skb_dequeue(&sd->input_pkt_queue);
- if (!skb) {
- __napi_complete(napi);
- rps_unlock(sd);
+ while ((skb = __skb_dequeue(&sd->process_queue))) {
local_irq_enable();
- break;
+ __netif_receive_skb(skb);
+ if (++work >= quota)
+ return work;
+ local_irq_disable();
}
- input_queue_head_incr(sd);
- rps_unlock(sd);
- local_irq_enable();
- __netif_receive_skb(skb);
- } while (++work < quota);
+ rps_lock(sd);
+ qlen = skb_queue_len(&sd->input_pkt_queue);
+ if (qlen) {
+ input_queue_head_add(sd, qlen);
+ skb_queue_splice_tail_init(&sd->input_pkt_queue,
+ &sd->process_queue);
+ }
+ if (qlen < quota - work) {
+ /*
+ * Inline a custom version of __napi_complete().
+ * only current cpu owns and manipulates this napi,
+ * and NAPI_STATE_SCHED is the only possible flag set on backlog.
+ * we can use a plain write instead of clear_bit(),
+ * and we dont need an smp_mb() memory barrier.
+ */
+ list_del(&napi->poll_list);
+ napi->state = 0;
+
+ quota = work + qlen;
+ }
+ rps_unlock(sd);
+ }
+ local_irq_enable();
return work;
}
@@ -3278,8 +3351,7 @@ void __napi_schedule(struct napi_struct *n)
unsigned long flags;
local_irq_save(flags);
- list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ ____napi_schedule(&__get_cpu_var(softnet_data), n);
local_irq_restore(flags);
}
EXPORT_SYMBOL(__napi_schedule);
@@ -3350,45 +3422,16 @@ void netif_napi_del(struct napi_struct *napi)
}
EXPORT_SYMBOL(netif_napi_del);
-/*
- * net_rps_action sends any pending IPI's for rps.
- * Note: called with local irq disabled, but exits with local irq enabled.
- */
-static void net_rps_action_and_irq_disable(void)
-{
-#ifdef CONFIG_RPS
- struct softnet_data *sd = &__get_cpu_var(softnet_data);
- struct softnet_data *remsd = sd->rps_ipi_list;
-
- if (remsd) {
- sd->rps_ipi_list = NULL;
-
- local_irq_enable();
-
- /* Send pending IPI's to kick RPS processing on remote cpus. */
- while (remsd) {
- struct softnet_data *next = remsd->rps_ipi_next;
-
- if (cpu_online(remsd->cpu))
- __smp_call_function_single(remsd->cpu,
- &remsd->csd, 0);
- remsd = next;
- }
- } else
-#endif
- local_irq_enable();
-}
-
static void net_rx_action(struct softirq_action *h)
{
- struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
+ struct softnet_data *sd = &__get_cpu_var(softnet_data);
unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
void *have;
local_irq_disable();
- while (!list_empty(list)) {
+ while (!list_empty(&sd->poll_list)) {
struct napi_struct *n;
int work, weight;
@@ -3406,7 +3449,7 @@ static void net_rx_action(struct softirq_action *h)
* entries to the tail of this list, and only ->poll()
* calls can remove this head entry from the list.
*/
- n = list_first_entry(list, struct napi_struct, poll_list);
+ n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
have = netpoll_poll_lock(n);
@@ -3441,13 +3484,13 @@ static void net_rx_action(struct softirq_action *h)
napi_complete(n);
local_irq_disable();
} else
- list_move_tail(&n->poll_list, list);
+ list_move_tail(&n->poll_list, &sd->poll_list);
}
netpoll_poll_unlock(have);
}
out:
- net_rps_action_and_irq_disable();
+ net_rps_action_and_irq_enable(sd);
#ifdef CONFIG_NET_DMA
/*
@@ -3460,7 +3503,7 @@ out:
return;
softnet_break:
- __get_cpu_var(netdev_rx_stat).time_squeeze++;
+ sd->time_squeeze++;
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
goto out;
}
@@ -3661,17 +3704,17 @@ static int dev_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct netif_rx_stats *softnet_get_online(loff_t *pos)
+static struct softnet_data *softnet_get_online(loff_t *pos)
{
- struct netif_rx_stats *rc = NULL;
+ struct softnet_data *sd = NULL;
while (*pos < nr_cpu_ids)
if (cpu_online(*pos)) {
- rc = &per_cpu(netdev_rx_stat, *pos);
+ sd = &per_cpu(softnet_data, *pos);
break;
} else
++*pos;
- return rc;
+ return sd;
}
static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
@@ -3691,12 +3734,12 @@ static void softnet_seq_stop(struct seq_file *seq, void *v)
static int softnet_seq_show(struct seq_file *seq, void *v)
{
- struct netif_rx_stats *s = v;
+ struct softnet_data *sd = v;
seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
- s->total, s->dropped, s->time_squeeze, 0,
+ sd->processed, sd->dropped, sd->time_squeeze, 0,
0, 0, 0, 0, /* was fastroute */
- s->cpu_collision, s->received_rps);
+ sd->cpu_collision, sd->received_rps);
return 0;
}
@@ -5584,7 +5627,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
void *ocpu)
{
struct sk_buff **list_skb;
- struct Qdisc **list_net;
struct sk_buff *skb;
unsigned int cpu, oldcpu = (unsigned long)ocpu;
struct softnet_data *sd, *oldsd;
@@ -5605,13 +5647,13 @@ static int dev_cpu_callback(struct notifier_block *nfb,
*list_skb = oldsd->completion_queue;
oldsd->completion_queue = NULL;
- /* Find end of our output_queue. */
- list_net = &sd->output_queue;
- while (*list_net)
- list_net = &(*list_net)->next_sched;
/* Append output queue from offline CPU. */
- *list_net = oldsd->output_queue;
- oldsd->output_queue = NULL;
+ if (oldsd->output_queue) {
+ *sd->output_queue_tailp = oldsd->output_queue;
+ sd->output_queue_tailp = oldsd->output_queue_tailp;
+ oldsd->output_queue = NULL;
+ oldsd->output_queue_tailp = &oldsd->output_queue;
+ }
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
@@ -5619,8 +5661,10 @@ static int dev_cpu_callback(struct notifier_block *nfb,
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
netif_rx(skb);
- input_queue_head_incr(oldsd);
+ input_queue_head_add(oldsd, 1);
}
+ while ((skb = __skb_dequeue(&oldsd->process_queue)))
+ netif_rx(skb);
return NOTIFY_OK;
}
@@ -5838,10 +5882,13 @@ static int __init net_dev_init(void)
for_each_possible_cpu(i) {
struct softnet_data *sd = &per_cpu(softnet_data, i);
+ memset(sd, 0, sizeof(*sd));
skb_queue_head_init(&sd->input_pkt_queue);
+ skb_queue_head_init(&sd->process_queue);
sd->completion_queue = NULL;
INIT_LIST_HEAD(&sd->poll_list);
-
+ sd->output_queue = NULL;
+ sd->output_queue_tailp = &sd->output_queue;
#ifdef CONFIG_RPS
sd->csd.func = rps_trigger_softirq;
sd->csd.info = sd;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 1bc66592453c..42e84e08a1be 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -122,7 +122,7 @@ errout:
}
struct fib_rules_ops *
-fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
+fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
{
struct fib_rules_ops *ops;
int err;
diff --git a/net/core/filter.c b/net/core/filter.c
index ff943bed21af..da69fb728d32 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -302,6 +302,8 @@ load_b:
A = skb->pkt_type;
continue;
case SKF_AD_IFINDEX:
+ if (!skb->dev)
+ return 0;
A = skb->dev->ifindex;
continue;
case SKF_AD_MARK:
@@ -310,6 +312,11 @@ load_b:
case SKF_AD_QUEUE:
A = skb->queue_mapping;
continue;
+ case SKF_AD_HATYPE:
+ if (!skb->dev)
+ return 0;
+ A = skb->dev->type;
+ continue;
case SKF_AD_NLATTR: {
struct nlattr *nla;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index bd8c4712ea24..c988e685433a 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -27,6 +27,51 @@ EXPORT_SYMBOL(init_net);
#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
+static void net_generic_release(struct rcu_head *rcu)
+{
+ struct net_generic *ng;
+
+ ng = container_of(rcu, struct net_generic, rcu);
+ kfree(ng);
+}
+
+static int net_assign_generic(struct net *net, int id, void *data)
+{
+ struct net_generic *ng, *old_ng;
+
+ BUG_ON(!mutex_is_locked(&net_mutex));
+ BUG_ON(id == 0);
+
+ ng = old_ng = net->gen;
+ if (old_ng->len >= id)
+ goto assign;
+
+ ng = kzalloc(sizeof(struct net_generic) +
+ id * sizeof(void *), GFP_KERNEL);
+ if (ng == NULL)
+ return -ENOMEM;
+
+ /*
+ * Some synchronisation notes:
+ *
+ * The net_generic explores the net->gen array inside rcu
+ * read section. Besides once set the net->gen->ptr[x]
+ * pointer never changes (see rules in netns/generic.h).
+ *
+ * That said, we simply duplicate this array and schedule
+ * the old copy for kfree after a grace period.
+ */
+
+ ng->len = id;
+ memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
+
+ rcu_assign_pointer(net->gen, ng);
+ call_rcu(&old_ng->rcu, net_generic_release);
+assign:
+ ng->ptr[id - 1] = data;
+ return 0;
+}
+
static int ops_init(const struct pernet_operations *ops, struct net *net)
{
int err;
@@ -469,10 +514,10 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
* addition run the exit method for all existing network
* namespaces.
*/
-void unregister_pernet_subsys(struct pernet_operations *module)
+void unregister_pernet_subsys(struct pernet_operations *ops)
{
mutex_lock(&net_mutex);
- unregister_pernet_operations(module);
+ unregister_pernet_operations(ops);
mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
@@ -526,49 +571,3 @@ void unregister_pernet_device(struct pernet_operations *ops)
mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
-
-static void net_generic_release(struct rcu_head *rcu)
-{
- struct net_generic *ng;
-
- ng = container_of(rcu, struct net_generic, rcu);
- kfree(ng);
-}
-
-int net_assign_generic(struct net *net, int id, void *data)
-{
- struct net_generic *ng, *old_ng;
-
- BUG_ON(!mutex_is_locked(&net_mutex));
- BUG_ON(id == 0);
-
- ng = old_ng = net->gen;
- if (old_ng->len >= id)
- goto assign;
-
- ng = kzalloc(sizeof(struct net_generic) +
- id * sizeof(void *), GFP_KERNEL);
- if (ng == NULL)
- return -ENOMEM;
-
- /*
- * Some synchronisation notes:
- *
- * The net_generic explores the net->gen array inside rcu
- * read section. Besides once set the net->gen->ptr[x]
- * pointer never changes (see rules in netns/generic.h).
- *
- * That said, we simply duplicate this array and schedule
- * the old copy for kfree after a grace period.
- */
-
- ng->len = id;
- memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
-
- rcu_assign_pointer(net->gen, ng);
- call_rcu(&old_ng->rcu, net_generic_release);
-assign:
- ng->ptr[id - 1] = data;
- return 0;
-}
-EXPORT_SYMBOL_GPL(net_assign_generic);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a58f59b97597..94825b109551 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -179,9 +179,8 @@ static void service_arp_queue(struct netpoll_info *npi)
}
}
-void netpoll_poll(struct netpoll *np)
+void netpoll_poll_dev(struct net_device *dev)
{
- struct net_device *dev = np->dev;
const struct net_device_ops *ops;
if (!dev || !netif_running(dev))
@@ -201,6 +200,11 @@ void netpoll_poll(struct netpoll *np)
zap_completion_queue();
}
+void netpoll_poll(struct netpoll *np)
+{
+ netpoll_poll_dev(np->dev);
+}
+
static void refill_skbs(void)
{
struct sk_buff *skb;
@@ -282,7 +286,7 @@ static int netpoll_owner_active(struct net_device *dev)
return 0;
}
-static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
int status = NETDEV_TX_BUSY;
unsigned long tries;
@@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
tries > 0; --tries) {
if (__netif_tx_trylock(txq)) {
if (!netif_tx_queue_stopped(txq)) {
+ dev->priv_flags |= IFF_IN_NETPOLL;
status = ops->ndo_start_xmit(skb, dev);
+ dev->priv_flags &= ~IFF_IN_NETPOLL;
if (status == NETDEV_TX_OK)
txq_trans_update(txq);
}
@@ -756,7 +762,10 @@ int netpoll_setup(struct netpoll *np)
atomic_inc(&npinfo->refcnt);
}
- if (!ndev->netdev_ops->ndo_poll_controller) {
+ npinfo->netpoll = np;
+
+ if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
+ !ndev->netdev_ops->ndo_poll_controller) {
printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
np->name, np->dev_name);
err = -ENOTSUPP;
@@ -878,6 +887,7 @@ void netpoll_cleanup(struct netpoll *np)
}
if (atomic_dec_and_test(&npinfo->refcnt)) {
+ const struct net_device_ops *ops;
skb_queue_purge(&npinfo->arp_tx);
skb_queue_purge(&npinfo->txq);
cancel_rearming_delayed_work(&npinfo->tx_work);
@@ -885,7 +895,11 @@ void netpoll_cleanup(struct netpoll *np)
/* clean after last, unfinished work */
__skb_queue_purge(&npinfo->txq);
kfree(npinfo);
- np->dev->npinfo = NULL;
+ ops = np->dev->netdev_ops;
+ if (ops->ndo_netpoll_cleanup)
+ ops->ndo_netpoll_cleanup(np->dev);
+ else
+ np->dev->npinfo = NULL;
}
}
@@ -908,6 +922,7 @@ void netpoll_set_trap(int trap)
atomic_dec(&trapped);
}
+EXPORT_SYMBOL(netpoll_send_skb);
EXPORT_SYMBOL(netpoll_set_trap);
EXPORT_SYMBOL(netpoll_trap);
EXPORT_SYMBOL(netpoll_print_options);
@@ -915,4 +930,5 @@ EXPORT_SYMBOL(netpoll_parse_options);
EXPORT_SYMBOL(netpoll_setup);
EXPORT_SYMBOL(netpoll_cleanup);
EXPORT_SYMBOL(netpoll_send_udp);
+EXPORT_SYMBOL(netpoll_poll_dev);
EXPORT_SYMBOL(netpoll_poll);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 78c85985cb30..23a71cb21273 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -98,7 +98,7 @@ int lockdep_rtnl_is_held(void)
EXPORT_SYMBOL(lockdep_rtnl_is_held);
#endif /* #ifdef CONFIG_PROVE_LOCKING */
-static struct rtnl_link *rtnl_msg_handlers[NPROTO];
+static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
static inline int rtm_msgindex(int msgtype)
{
@@ -118,7 +118,7 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
{
struct rtnl_link *tab;
- if (protocol < NPROTO)
+ if (protocol <= RTNL_FAMILY_MAX)
tab = rtnl_msg_handlers[protocol];
else
tab = NULL;
@@ -133,7 +133,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
{
struct rtnl_link *tab;
- if (protocol < NPROTO)
+ if (protocol <= RTNL_FAMILY_MAX)
tab = rtnl_msg_handlers[protocol];
else
tab = NULL;
@@ -167,7 +167,7 @@ int __rtnl_register(int protocol, int msgtype,
struct rtnl_link *tab;
int msgindex;
- BUG_ON(protocol < 0 || protocol >= NPROTO);
+ BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype);
tab = rtnl_msg_handlers[protocol];
@@ -219,7 +219,7 @@ int rtnl_unregister(int protocol, int msgtype)
{
int msgindex;
- BUG_ON(protocol < 0 || protocol >= NPROTO);
+ BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
msgindex = rtm_msgindex(msgtype);
if (rtnl_msg_handlers[protocol] == NULL)
@@ -241,7 +241,7 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
*/
void rtnl_unregister_all(int protocol)
{
- BUG_ON(protocol < 0 || protocol >= NPROTO);
+ BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
kfree(rtnl_msg_handlers[protocol]);
rtnl_msg_handlers[protocol] = NULL;
@@ -1319,10 +1319,11 @@ replay:
err = ops->newlink(net, dev, tb, data);
else
err = register_netdevice(dev);
- if (err < 0 && !IS_ERR(dev)) {
+
+ if (err < 0 && !IS_ERR(dev))
free_netdev(dev);
+ if (err < 0)
goto out;
- }
err = rtnl_configure_link(dev, ifm);
if (err < 0)
@@ -1384,7 +1385,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
if (s_idx == 0)
s_idx = 1;
- for (idx = 1; idx < NPROTO; idx++) {
+ for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
int type = cb->nlh->nlmsg_type-RTM_BASE;
if (idx < s_idx || idx == PF_PACKET)
continue;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bdea0efdf8cb..a9b0e1f77806 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -117,7 +117,7 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = {
*
* Out of line support code for skb_put(). Not user callable.
*/
-void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_over_panic(struct sk_buff *skb, int sz, void *here)
{
printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
"data:%p tail:%#lx end:%#lx dev:%s\n",
@@ -126,7 +126,6 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
skb->dev ? skb->dev->name : "<NULL>");
BUG();
}
-EXPORT_SYMBOL(skb_over_panic);
/**
* skb_under_panic - private function
@@ -137,7 +136,7 @@ EXPORT_SYMBOL(skb_over_panic);
* Out of line support code for skb_push(). Not user callable.
*/
-void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
{
printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
"data:%p tail:%#lx end:%#lx dev:%s\n",
@@ -146,7 +145,6 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
skb->dev ? skb->dev->name : "<NULL>");
BUG();
}
-EXPORT_SYMBOL(skb_under_panic);
/* Allocate a new skbuff. We do this ourselves so we can fill in a few
* 'private' fields and also do memory statistics to find all the
@@ -183,12 +181,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
if (!skb)
goto out;
+ prefetchw(skb);
size = SKB_DATA_ALIGN(size);
data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
gfp_mask, node);
if (!data)
goto nodata;
+ prefetchw(data + size);
/*
* Only clear those fields we need to clear, not those that we will
@@ -210,15 +210,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
/* make sure we initialize shinfo sequentially */
shinfo = skb_shinfo(skb);
+ memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);
- shinfo->nr_frags = 0;
- shinfo->gso_size = 0;
- shinfo->gso_segs = 0;
- shinfo->gso_type = 0;
- shinfo->ip6_frag_id = 0;
- shinfo->tx_flags.flags = 0;
- skb_frag_list_init(skb);
- memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
if (fclone) {
struct sk_buff *child = skb + 1;
@@ -507,16 +500,10 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
return 0;
skb_release_head_state(skb);
+
shinfo = skb_shinfo(skb);
+ memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);
- shinfo->nr_frags = 0;
- shinfo->gso_size = 0;
- shinfo->gso_segs = 0;
- shinfo->gso_type = 0;
- shinfo->ip6_frag_id = 0;
- shinfo->tx_flags.flags = 0;
- skb_frag_list_init(skb);
- memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
memset(skb, 0, offsetof(struct sk_buff, tail));
skb->data = skb->head + NET_SKB_PAD;
@@ -1053,7 +1040,7 @@ EXPORT_SYMBOL(skb_push);
*/
unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
{
- return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
+ return skb_pull_inline(skb, len);
}
EXPORT_SYMBOL(skb_pull);
diff --git a/net/core/sock.c b/net/core/sock.c
index 7effa1e689df..94c4affdda9b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -327,6 +327,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
skb->dev = NULL;
+ if (sk_rcvqueues_full(sk, skb)) {
+ atomic_inc(&sk->sk_drops);
+ goto discard_and_relse;
+ }
if (nested)
bh_lock_sock_nested(sk);
else
@@ -1207,7 +1211,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
*/
sk_refcnt_debug_inc(newsk);
sk_set_socket(newsk, NULL);
- newsk->sk_sleep = NULL;
+ newsk->sk_wq = NULL;
if (newsk->sk_prot->sockets_allocated)
percpu_counter_inc(newsk->sk_prot->sockets_allocated);
@@ -1395,7 +1399,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
if (signal_pending(current))
break;
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
break;
if (sk->sk_shutdown & SEND_SHUTDOWN)
@@ -1404,7 +1408,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
break;
timeo = schedule_timeout(timeo);
}
- finish_wait(sk->sk_sleep, &wait);
+ finish_wait(sk_sleep(sk), &wait);
return timeo;
}
@@ -1570,11 +1574,11 @@ int sk_wait_data(struct sock *sk, long *timeo)
int rc;
DEFINE_WAIT(wait);
- prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
- finish_wait(sk->sk_sleep, &wait);
+ finish_wait(sk_sleep(sk), &wait);
return rc;
}
EXPORT_SYMBOL(sk_wait_data);
@@ -1796,41 +1800,53 @@ EXPORT_SYMBOL(sock_no_sendpage);
static void sock_def_wakeup(struct sock *sk)
{
- read_lock(&sk->sk_callback_lock);
- if (sk_has_sleeper(sk))
- wake_up_interruptible_all(sk->sk_sleep);
- read_unlock(&sk->sk_callback_lock);
+ struct socket_wq *wq;
+
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (wq_has_sleeper(wq))
+ wake_up_interruptible_all(&wq->wait);
+ rcu_read_unlock();
}
static void sock_def_error_report(struct sock *sk)
{
- read_lock(&sk->sk_callback_lock);
- if (sk_has_sleeper(sk))
- wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
+ struct socket_wq *wq;
+
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (wq_has_sleeper(wq))
+ wake_up_interruptible_poll(&wq->wait, POLLERR);
sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
- read_unlock(&sk->sk_callback_lock);
+ rcu_read_unlock();
}
static void sock_def_readable(struct sock *sk, int len)
{
- read_lock(&sk->sk_callback_lock);
- if (sk_has_sleeper(sk))
- wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
+ struct socket_wq *wq;
+
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (wq_has_sleeper(wq))
+ wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
POLLRDNORM | POLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
- read_unlock(&sk->sk_callback_lock);
+ rcu_read_unlock();
}
static void sock_def_write_space(struct sock *sk)
{
- read_lock(&sk->sk_callback_lock);
+ struct socket_wq *wq;
+
+ rcu_read_lock();
/* Do not wake up a writer until he can make "significant"
* progress. --DaveM
*/
if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
- if (sk_has_sleeper(sk))
- wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
+ wq = rcu_dereference(sk->sk_wq);
+ if (wq_has_sleeper(wq))
+ wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
POLLWRNORM | POLLWRBAND);
/* Should agree with poll, otherwise some programs break */
@@ -1838,7 +1854,7 @@ static void sock_def_write_space(struct sock *sk)
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
- read_unlock(&sk->sk_callback_lock);
+ rcu_read_unlock();
}
static void sock_def_destruct(struct sock *sk)
@@ -1885,7 +1901,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_allocation = GFP_KERNEL;
sk->sk_rcvbuf = sysctl_rmem_default;
sk->sk_sndbuf = sysctl_wmem_default;
- sk->sk_backlog.limit = sk->sk_rcvbuf << 1;
sk->sk_state = TCP_CLOSE;
sk_set_socket(sk, sock);
@@ -1893,10 +1908,10 @@ void sock_init_data(struct socket *sock, struct sock *sk)
if (sock) {
sk->sk_type = sock->type;
- sk->sk_sleep = &sock->wait;
+ sk->sk_wq = sock->wq;
sock->sk = sk;
} else
- sk->sk_sleep = NULL;
+ sk->sk_wq = NULL;
spin_lock_init(&sk->sk_dst_lock);
rwlock_init(&sk->sk_callback_lock);
diff --git a/net/core/stream.c b/net/core/stream.c
index a37debfeb1b2..cc196f42b8d8 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -28,15 +28,19 @@
void sk_stream_write_space(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
+ struct socket_wq *wq;
if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
clear_bit(SOCK_NOSPACE, &sock->flags);
- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible_poll(sk->sk_sleep, POLLOUT |
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (wq_has_sleeper(wq))
+ wake_up_interruptible_poll(&wq->wait, POLLOUT |
POLLWRNORM | POLLWRBAND);
- if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
+ if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
+ rcu_read_unlock();
}
}
@@ -66,13 +70,13 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
if (signal_pending(tsk))
return sock_intr_errno(*timeo_p);
- prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
sk->sk_write_pending++;
done = sk_wait_event(sk, timeo_p,
!sk->sk_err &&
!((1 << sk->sk_state) &
~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
- finish_wait(sk->sk_sleep, &wait);
+ finish_wait(sk_sleep(sk), &wait);
sk->sk_write_pending--;
} while (!done);
return 0;
@@ -96,13 +100,13 @@ void sk_stream_wait_close(struct sock *sk, long timeout)
DEFINE_WAIT(wait);
do {
- prepare_to_wait(sk->sk_sleep, &wait,
+ prepare_to_wait(sk_sleep(sk), &wait,
TASK_INTERRUPTIBLE);
if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
break;
} while (!signal_pending(current) && timeout);
- finish_wait(sk->sk_sleep, &wait);
+ finish_wait(sk_sleep(sk), &wait);
}
}
@@ -126,7 +130,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
while (1) {
set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
- prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
@@ -157,7 +161,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
*timeo_p = current_timeo;
}
out:
- finish_wait(sk->sk_sleep, &wait);
+ finish_wait(sk_sleep(sk), &wait);
return err;
do_error: