diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-11 21:19:00 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-11 21:19:00 -0400 |
commit | ca321885b0511a85e2d1cd40caafedbeb18f4af6 (patch) | |
tree | 0042e8674aff7ae5785db467836d8d4101906f70 /net | |
parent | 052db7ec86dff26f734031c3ef5c2c03a94af0af (diff) | |
parent | 01d2d484e49e9bc0ed9b5fdaf345a0e2bf35ffed (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller:
"This set fixes a bunch of fallout from the changes that went in during
this merge window, particularly:
- Fix fsl_pq_mdio (Claudiu Manoil) and fm10k (Pranith Kumar) build
failures.
- Several networking drivers do atomic_set() on page counts where
that's not exactly legal. From Eric Dumazet.
- Make __skb_flow_get_ports() work cleanly with unaligned data, from
Alexander Duyck.
- Fix some kernel-doc buglets in rfkill and netlabel, from Fabian
Frederick.
- Unbalanced enable_irq_wake usage in bcmgenet and systemport
drivers, from Florian Fainelli.
- pxa168_eth needs to depend on HAS_DMA, from Geert Uytterhoeven.
- Multi-dequeue in the qdisc layer severely bypasses the fairness
limits the previous code used to enforce, reintroduce in a way that
at the same time doesn't compromise bulk dequeue opportunities.
From Jesper Dangaard Brouer.
- macvlan receive path unnecessarily hops through a softirq by using
netif_rx() instead of netif_receive_skb(). From Jason Baron"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (51 commits)
net: systemport: avoid unbalanced enable_irq_wake calls
net: bcmgenet: avoid unbalanced enable_irq_wake calls
net: bcmgenet: fix off-by-one in incrementing read pointer
net: fix races in page->_count manipulation
mlx4: fix race accessing page->_count
ixgbe: fix race accessing page->_count
igb: fix race accessing page->_count
fm10k: fix race accessing page->_count
net/phy: micrel: Add clock support for KSZ8021/KSZ8031
flow-dissector: Fix alignment issue in __skb_flow_get_ports
net: filter: fix the comments
Documentation: replace __sk_run_filter with __bpf_prog_run
macvlan: optimize the receive path
macvlan: pass 'bool' type to macvlan_count_rx()
drivers: net: xgene: Add 10GbE ethtool support
drivers: net: xgene: Add 10GbE support
drivers: net: xgene: Preparing for adding 10GbE support
dtb: Add 10GbE node to APM X-Gene SoC device tree
Documentation: dts: Update section header for APM X-Gene
MAINTAINERS: Update APM X-Gene section
...
Diffstat (limited to 'net')
-rw-r--r-- | net/Kconfig | 1 | ||||
-rw-r--r-- | net/core/filter.c | 9 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 36 | ||||
-rw-r--r-- | net/core/skbuff.c | 35 | ||||
-rw-r--r-- | net/netfilter/nft_reject.c | 10 | ||||
-rw-r--r-- | net/netlabel/netlabel_kapi.c | 1 | ||||
-rw-r--r-- | net/rfkill/core.c | 4 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 20 |
8 files changed, 69 insertions, 47 deletions
diff --git a/net/Kconfig b/net/Kconfig index d6b138e2c263..6272420a721b 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -6,6 +6,7 @@ menuconfig NET bool "Networking support" select NLATTR select GENERIC_NET_UTILS + select ANON_INODES ---help--- Unless you really know what you are doing, you should say Y here. The reason is that some programs need kernel networking support even diff --git a/net/core/filter.c b/net/core/filter.c index fcd3f6742a6a..647b12265e18 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -51,9 +51,9 @@ * @skb: buffer to filter * * Run the filter code and then cut skb->data to correct size returned by - * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller + * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller * than pkt_len we keep whole skb->data. This is the socket level - * wrapper to sk_run_filter. It returns 0 if the packet should + * wrapper to SK_RUN_FILTER. It returns 0 if the packet should * be accepted or -EPERM if the packet should be tossed. * */ @@ -566,11 +566,8 @@ err: /* Security: * - * A BPF program is able to use 16 cells of memory to store intermediate - * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()). - * * As we dont want to clear mem[] array for each packet going through - * sk_run_filter(), we check that filter loaded by user never try to read + * __bpf_prog_run(), we check that filter loaded by user never try to read * a cell if not previously written, and we check all branches to be sure * a malicious user doesn't try to abuse us. */ diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 8560dea58803..45084938c403 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -100,6 +100,13 @@ ip: if (ip_is_fragment(iph)) ip_proto = 0; + /* skip the address processing if skb is NULL. The assumption + * here is that if there is no skb we are not looking for flow + * info but lengths and protocols. + */ + if (!skb) + break; + iph_to_flow_copy_addrs(flow, iph); break; } @@ -114,17 +121,15 @@ ipv6: return false; ip_proto = iph->nexthdr; - flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); - flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); nhoff += sizeof(struct ipv6hdr); - /* skip the flow label processing if skb is NULL. The - * assumption here is that if there is no skb we are not - * looking for flow info as much as we are length. - */ + /* see comment above in IPv4 section */ if (!skb) break; + flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); + flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); + flow_label = ip6_flowlabel(iph); if (flow_label) { /* Awesome, IPv6 packet has a flow label so we can @@ -231,9 +236,13 @@ ipv6: flow->n_proto = proto; flow->ip_proto = ip_proto; - flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen); flow->thoff = (u16) nhoff; + /* unless skb is set we don't need to record port info */ + if (skb) + flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, + data, hlen); + return true; } EXPORT_SYMBOL(__skb_flow_dissect); @@ -334,15 +343,16 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data, switch (keys->ip_proto) { case IPPROTO_TCP: { - const struct tcphdr *tcph; - struct tcphdr _tcph; + /* access doff as u8 to avoid unaligned access */ + const u8 *doff; + u8 _doff; - tcph = __skb_header_pointer(skb, poff, sizeof(_tcph), - data, hlen, &_tcph); - if (!tcph) + doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff), + data, hlen, &_doff); + if (!doff) return poff; - poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4); + poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2); break; } case IPPROTO_UDP: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7b3df0d518ab..829d013745ab 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -360,18 +360,29 @@ refill: goto end; } nc->frag.size = PAGE_SIZE << order; -recycle: - atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS); + /* Even if we own the page, we do not use atomic_set(). + * This would break get_page_unless_zero() users. + */ + atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1, + &nc->frag.page->_count); nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; nc->frag.offset = 0; } if (nc->frag.offset + fragsz > nc->frag.size) { - /* avoid unnecessary locked operations if possible */ - if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) || - atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count)) - goto recycle; - goto refill; + if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) { + if (!atomic_sub_and_test(nc->pagecnt_bias, + &nc->frag.page->_count)) + goto refill; + /* OK, page count is 0, we can safely set it */ + atomic_set(&nc->frag.page->_count, + NETDEV_PAGECNT_MAX_BIAS); + } else { + atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias, + &nc->frag.page->_count); + } + nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; + nc->frag.offset = 0; } data = page_address(nc->frag.page) + nc->frag.offset; @@ -4126,11 +4137,11 @@ EXPORT_SYMBOL(skb_vlan_untag); /** * alloc_skb_with_frags - allocate skb with page frags * - * header_len: size of linear part - * data_len: needed length in frags - * max_page_order: max page order desired. - * errcode: pointer to error code if any - * gfp_mask: allocation mask + * @header_len: size of linear part + * @data_len: needed length in frags + * @max_page_order: max page order desired. + * @errcode: pointer to error code if any + * @gfp_mask: allocation mask * * This can be used to allocate a paged skb, given a maximal order for frags. */ diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index ec8a456092a7..57d3e1af5630 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -72,7 +72,7 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_reject_dump); -static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = { +static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX + 1] = { [NFT_REJECT_ICMPX_NO_ROUTE] = ICMP_NET_UNREACH, [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMP_PORT_UNREACH, [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMP_HOST_UNREACH, @@ -81,8 +81,7 @@ static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = { int nft_reject_icmp_code(u8 code) { - if (code > NFT_REJECT_ICMPX_MAX) - return -EINVAL; + BUG_ON(code > NFT_REJECT_ICMPX_MAX); return icmp_code_v4[code]; } @@ -90,7 +89,7 @@ int nft_reject_icmp_code(u8 code) EXPORT_SYMBOL_GPL(nft_reject_icmp_code); -static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = { +static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX + 1] = { [NFT_REJECT_ICMPX_NO_ROUTE] = ICMPV6_NOROUTE, [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMPV6_PORT_UNREACH, [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMPV6_ADDR_UNREACH, @@ -99,8 +98,7 @@ static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = { int nft_reject_icmpv6_code(u8 code) { - if (code > NFT_REJECT_ICMPX_MAX) - return -EINVAL; + BUG_ON(code > NFT_REJECT_ICMPX_MAX); return icmp_code_v6[code]; } diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 0b4692dd1c5e..a845cd4cf21e 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -246,7 +246,6 @@ int netlbl_cfg_unlbl_static_add(struct net *net, * @addr: IP address in network byte order (struct in[6]_addr) * @mask: address mask in network byte order (struct in[6]_addr) * @family: address family - * @secid: LSM secid value for the entry * @audit_info: NetLabel audit information * * Description: diff --git a/net/rfkill/core.c b/net/rfkill/core.c index b3b16c070a7f..fa7cd792791c 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -329,7 +329,7 @@ static atomic_t rfkill_input_disabled = ATOMIC_INIT(0); /** * __rfkill_switch_all - Toggle state of all switches of given type * @type: type of interfaces to be affected - * @state: the new state + * @blocked: the new state * * This function sets the state of all switches of given type, * unless a specific switch is claimed by userspace (in which case, @@ -353,7 +353,7 @@ static void __rfkill_switch_all(const enum rfkill_type type, bool blocked) /** * rfkill_switch_all - Toggle state of all switches of given type * @type: type of interfaces to be affected - * @state: the new state + * @blocked: the new state * * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state). * Please refer to __rfkill_switch_all() for details. diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 38d58e6cef07..6efca30894aa 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -57,7 +57,8 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) static void try_bulk_dequeue_skb(struct Qdisc *q, struct sk_buff *skb, - const struct netdev_queue *txq) + const struct netdev_queue *txq, + int *packets) { int bytelimit = qdisc_avail_bulklimit(txq) - skb->len; @@ -70,6 +71,7 @@ static void try_bulk_dequeue_skb(struct Qdisc *q, bytelimit -= nskb->len; /* covers GSO len */ skb->next = nskb; skb = nskb; + (*packets)++; /* GSO counts as one pkt */ } skb->next = NULL; } @@ -77,11 +79,13 @@ static void try_bulk_dequeue_skb(struct Qdisc *q, /* Note that dequeue_skb can possibly return a SKB list (via skb->next). * A requeued skb (via q->gso_skb) can also be a SKB list. */ -static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate) +static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, + int *packets) { struct sk_buff *skb = q->gso_skb; const struct netdev_queue *txq = q->dev_queue; + *packets = 1; *validate = true; if (unlikely(skb)) { /* check the reason of requeuing without tx lock first */ @@ -98,7 +102,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate) !netif_xmit_frozen_or_stopped(txq)) { skb = q->dequeue(q); if (skb && qdisc_may_bulk(q)) - try_bulk_dequeue_skb(q, skb, txq); + try_bulk_dequeue_skb(q, skb, txq, packets); } } return skb; @@ -204,7 +208,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, * >0 - queue is not empty. * */ -static inline int qdisc_restart(struct Qdisc *q) +static inline int qdisc_restart(struct Qdisc *q, int *packets) { struct netdev_queue *txq; struct net_device *dev; @@ -213,7 +217,7 @@ static inline int qdisc_restart(struct Qdisc *q) bool validate; /* Dequeue packet */ - skb = dequeue_skb(q, &validate); + skb = dequeue_skb(q, &validate, packets); if (unlikely(!skb)) return 0; @@ -227,14 +231,16 @@ static inline int qdisc_restart(struct Qdisc *q) void __qdisc_run(struct Qdisc *q) { int quota = weight_p; + int packets; - while (qdisc_restart(q)) { + while (qdisc_restart(q, &packets)) { /* * Ordered by possible occurrence: Postpone processing if * 1. we've exceeded packet quota * 2. another process needs the CPU; */ - if (--quota <= 0 || need_resched()) { + quota -= packets; + if (quota <= 0 || need_resched()) { __netif_schedule(q); break; } |