From f3d229c68bb47170f04f81e51c9ed5d4286cebdb Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Wed, 7 Mar 2012 23:45:44 +0000
Subject: netfilter: xt_LOG: don't use xchg() for simple assignment

At least on ia64 the (bogus) use of xchg() here results in the compiler
warning about an unused expression result. As only an assignment is
intended here, convert it to such.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/xt_log.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/netfilter/xt_log.h b/include/net/netfilter/xt_log.h
index 7e1544e8f70d..9d9756cca013 100644
--- a/include/net/netfilter/xt_log.h
+++ b/include/net/netfilter/xt_log.h
@@ -47,7 +47,7 @@ static void sb_close(struct sbuff *m)
 	if (likely(m != &emergency))
 		kfree(m);
 	else {
-		xchg(&emergency_ptr, m);
+		emergency_ptr = m;
 		local_bh_enable();
 	}
 }
-- 
cgit v1.2.3


From 5276e16bb6f35412583518d6f04651dd9dc114be Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sat, 14 Jan 2012 17:38:55 +0100
Subject: netfilter: ipset: avoid use of kernel-only types

When using the xt_set.h header in userspace, one will get these gcc
reports:

ipset/ip_set.h:184:1: error: unknown type name "u16"
In file included from libxt_SET.c:21:0:
netfilter/xt_set.h:61:2: error: unknown type name "u32"
netfilter/xt_set.h:62:2: error: unknown type name "u32"

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/xt_set.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/netfilter/xt_set.h b/include/linux/netfilter/xt_set.h
index c0405ac92870..e3a9978f259f 100644
--- a/include/linux/netfilter/xt_set.h
+++ b/include/linux/netfilter/xt_set.h
@@ -58,8 +58,8 @@ struct xt_set_info_target_v1 {
 struct xt_set_info_target_v2 {
 	struct xt_set_info add_set;
 	struct xt_set_info del_set;
-	u32 flags;
-	u32 timeout;
+	__u32 flags;
+	__u32 timeout;
 };
 
 #endif /*_XT_SET_H*/
-- 
cgit v1.2.3


From 44b52bccf855b0706de624c29fc3d82ca954bb4e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 3 Apr 2012 10:08:48 +0200
Subject: netfilter: xt_CT: remove a compile warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If CONFIG_NF_CONNTRACK_TIMEOUT=n we have following warning :

  CC [M]  net/netfilter/xt_CT.o
net/netfilter/xt_CT.c: In function ‘xt_ct_tg_check_v1’:
net/netfilter/xt_CT.c:284: warning: label ‘err4’ defined but not used

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_CT.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 0c8e43810ce3..138b75e41fdd 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -281,8 +281,10 @@ out:
 	info->ct = ct;
 	return 0;
 
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 err4:
 	rcu_read_unlock();
+#endif
 err3:
 	nf_conntrack_free(ct);
 err2:
-- 
cgit v1.2.3


From 2def16ae6b0c77571200f18ba4be049b03d75579 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 2 Apr 2012 22:33:02 +0000
Subject: net: fix /proc/net/dev regression

Commit f04565ddf52 (dev: use name hash for dev_seq_ops) added a second
regression, as some devices are missing from /proc/net/dev if many
devices are defined.

When seq_file buffer is filled, the last ->next/show() method is
canceled (pos value is reverted to value prior ->next() call)

Problem is after above commit, we dont restart the lookup at right
position in ->start() method.

Fix this by removing the internal 'pos' pointer added in commit, since
we need to use the 'loff_t *pos' provided by seq_file layer.

This also reverts commit 5cac98dd0 (net: Fix corruption
in /proc/*/net/dev_mcast), since its not needed anymore.

Reported-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Mihai Maruseac <mmaruseac@ixiacom.com>
Tested-by:  Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 --
 net/core/dev.c            | 58 +++++++++++------------------------------------
 net/core/dev_addr_lists.c |  3 ++-
 3 files changed, 15 insertions(+), 48 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1f77540bdc95..5cbaa20f1659 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2604,8 +2604,6 @@ extern void		net_disable_timestamp(void);
 extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
 extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
 extern void dev_seq_stop(struct seq_file *seq, void *v);
-extern int dev_seq_open_ops(struct inode *inode, struct file *file,
-			    const struct seq_operations *ops);
 #endif
 
 extern int netdev_class_create_file(struct class_attribute *class_attr);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6c7dc9d78e10..c25d453b2803 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4028,54 +4028,41 @@ static int dev_ifconf(struct net *net, char __user *arg)
 
 #ifdef CONFIG_PROC_FS
 
-#define BUCKET_SPACE (32 - NETDEV_HASHBITS)
-
-struct dev_iter_state {
-	struct seq_net_private p;
-	unsigned int pos; /* bucket << BUCKET_SPACE + offset */
-};
+#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
 
 #define get_bucket(x) ((x) >> BUCKET_SPACE)
 #define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
 
-static inline struct net_device *dev_from_same_bucket(struct seq_file *seq)
+static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
 {
-	struct dev_iter_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
 	struct net_device *dev;
 	struct hlist_node *p;
 	struct hlist_head *h;
-	unsigned int count, bucket, offset;
+	unsigned int count = 0, offset = get_offset(*pos);
 
-	bucket = get_bucket(state->pos);
-	offset = get_offset(state->pos);
-	h = &net->dev_name_head[bucket];
-	count = 0;
+	h = &net->dev_name_head[get_bucket(*pos)];
 	hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
-		if (count++ == offset) {
-			state->pos = set_bucket_offset(bucket, count);
+		if (++count == offset)
 			return dev;
-		}
 	}
 
 	return NULL;
 }
 
-static inline struct net_device *dev_from_new_bucket(struct seq_file *seq)
+static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
 {
-	struct dev_iter_state *state = seq->private;
 	struct net_device *dev;
 	unsigned int bucket;
 
-	bucket = get_bucket(state->pos);
 	do {
-		dev = dev_from_same_bucket(seq);
+		dev = dev_from_same_bucket(seq, pos);
 		if (dev)
 			return dev;
 
-		bucket++;
-		state->pos = set_bucket_offset(bucket, 0);
+		bucket = get_bucket(*pos) + 1;
+		*pos = set_bucket_offset(bucket, 1);
 	} while (bucket < NETDEV_HASHENTRIES);
 
 	return NULL;
@@ -4088,33 +4075,20 @@ static inline struct net_device *dev_from_new_bucket(struct seq_file *seq)
 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(RCU)
 {
-	struct dev_iter_state *state = seq->private;
-
 	rcu_read_lock();
 	if (!*pos)
 		return SEQ_START_TOKEN;
 
-	/* check for end of the hash */
-	if (state->pos == 0 && *pos > 1)
+	if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
 		return NULL;
 
-	return dev_from_new_bucket(seq);
+	return dev_from_bucket(seq, pos);
 }
 
 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct net_device *dev;
-
 	++*pos;
-
-	if (v == SEQ_START_TOKEN)
-		return dev_from_new_bucket(seq);
-
-	dev = dev_from_same_bucket(seq);
-	if (dev)
-		return dev;
-
-	return dev_from_new_bucket(seq);
+	return dev_from_bucket(seq, pos);
 }
 
 void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4213,13 +4187,7 @@ static const struct seq_operations dev_seq_ops = {
 static int dev_seq_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &dev_seq_ops,
-			    sizeof(struct dev_iter_state));
-}
-
-int dev_seq_open_ops(struct inode *inode, struct file *file,
-		     const struct seq_operations *ops)
-{
-	return seq_open_net(inode, file, ops, sizeof(struct dev_iter_state));
+			    sizeof(struct seq_net_private));
 }
 
 static const struct file_operations dev_seq_fops = {
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 29c07fef9228..626698f0db8b 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -696,7 +696,8 @@ static const struct seq_operations dev_mc_seq_ops = {
 
 static int dev_mc_seq_open(struct inode *inode, struct file *file)
 {
-	return dev_seq_open_ops(inode, file, &dev_mc_seq_ops);
+	return seq_open_net(inode, file, &dev_mc_seq_ops,
+			    sizeof(struct seq_net_private));
 }
 
 static const struct file_operations dev_mc_seq_fops = {
-- 
cgit v1.2.3


From e675f0cc9a872fd152edc0c77acfed19bf28b81e Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Mon, 26 Mar 2012 00:03:42 +0000
Subject: ppp: Don't stop and restart queue on every TX packet
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For every transmitted packet, ppp_start_xmit() will stop the netdev
queue and then, if appropriate, restart it. This causes the TX softirq
to run, entirely gratuitously.

This is "only" a waste of CPU time in the normal case, but it's actively
harmful when the PPP device is a TEQL slave — the wakeup will cause the
offending device to receive the next TX packet from the TEQL queue, when
it *should* have gone to the next slave in the list. We end up seeing
large bursts of packets on just *one* slave device, rather than using
the full available bandwidth over all slaves.

This patch fixes the problem by *not* unconditionally stopping the queue
in ppp_start_xmit(). It adds a return value from ppp_xmit_process()
which indicates whether the queue should be stopped or not.

It *doesn't* remove the call to netif_wake_queue() from
ppp_xmit_process(), because other code paths (especially from
ppp_output_wakeup()) need it there and it's messy to push it out to the
other callers to do it based on the return value. So we leave it in
place — it's a no-op in the case where the queue wasn't stopped, so it's
harmless in the TX path.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/ppp_generic.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 159da2905fe9..33f8c51968b6 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -235,7 +235,7 @@ struct ppp_net {
 /* Prototypes. */
 static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
 			struct file *file, unsigned int cmd, unsigned long arg);
-static void ppp_xmit_process(struct ppp *ppp);
+static int ppp_xmit_process(struct ppp *ppp);
 static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb);
 static void ppp_push(struct ppp *ppp);
 static void ppp_channel_push(struct channel *pch);
@@ -968,9 +968,9 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	proto = npindex_to_proto[npi];
 	put_unaligned_be16(proto, pp);
 
-	netif_stop_queue(dev);
 	skb_queue_tail(&ppp->file.xq, skb);
-	ppp_xmit_process(ppp);
+	if (!ppp_xmit_process(ppp))
+		netif_stop_queue(dev);
 	return NETDEV_TX_OK;
 
  outf:
@@ -1048,10 +1048,11 @@ static void ppp_setup(struct net_device *dev)
  * Called to do any work queued up on the transmit side
  * that can now be done.
  */
-static void
+static int
 ppp_xmit_process(struct ppp *ppp)
 {
 	struct sk_buff *skb;
+	int ret = 0;
 
 	ppp_xmit_lock(ppp);
 	if (!ppp->closing) {
@@ -1061,10 +1062,13 @@ ppp_xmit_process(struct ppp *ppp)
 			ppp_send_frame(ppp, skb);
 		/* If there's no work left to do, tell the core net
 		   code that we can accept some more. */
-		if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq))
+		if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) {
 			netif_wake_queue(ppp->dev);
+			ret = 1;
+		}
 	}
 	ppp_xmit_unlock(ppp);
+	return ret;
 }
 
 static inline struct sk_buff *
-- 
cgit v1.2.3


From 2f53384424251c06038ae612e56231b96ab610ee Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 3 Apr 2012 09:37:01 +0000
Subject: tcp: allow splice() to build full TSO packets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

vmsplice()/splice(pipe, socket) call do_tcp_sendpages() one page at a
time, adding at most 4096 bytes to an skb. (assuming PAGE_SIZE=4096)

The call to tcp_push() at the end of do_tcp_sendpages() forces an
immediate xmit when pipe is not already filled, and tso_fragment() try
to split these skb to MSS multiples.

4096 bytes are usually split in a skb with 2 MSS, and a remaining
sub-mss skb (assuming MTU=1500)

This makes slow start suboptimal because many small frames are sent to
qdisc/driver layers instead of big ones (constrained by cwnd and packets
in flight of course)

In fact, applications using sendmsg() (adding an additional memory copy)
instead of vmsplice()/splice()/sendfile() are a bit faster because of
this anomaly, especially if serving small files in environments with
large initial [c]wnd.

Call tcp_push() only if MSG_MORE is not set in the flags parameter.

This bit is automatically provided by splice() internals but for the
last page, or on all pages if user specified SPLICE_F_MORE splice()
flag.

In some workloads, this can reduce number of sent logical packets by an
order of magnitude, making zero-copy TCP actually faster than
one-copy :)

Reported-by: Tom Herbert <therbert@google.com>
Cc: Nandita Dukkipati <nanditad@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: H.K. Jerry Chu <hkchu@google.com>
Cc: Maciej Żenczykowski <maze@google.com>
Cc: Mahesh Bandewar <maheshb@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail>com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cfd7edda0a8e..2ff6f45a76f4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -860,7 +860,7 @@ wait_for_memory:
 	}
 
 out:
-	if (copied)
+	if (copied && !(flags & MSG_MORE))
 		tcp_push(sk, flags, mss_now, tp->nonagle);
 	return copied;
 
-- 
cgit v1.2.3


From 5d6bd8619db5a30668093c1b2967674645cf0736 Mon Sep 17 00:00:00 2001
From: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Date: Tue, 3 Apr 2012 08:41:40 +0000
Subject: TCP: update ip_local_port_range documentation

The explanation of ip_local_port_range in
Documentation/networking/ip-sysctl.txt contains several factual
errors:

- The default value of ip_local_port_range does not depend on the
  amount of memory available in the system.
- tcp_tw_recycle is not enabled by default.
- 1024-4999 is not the default value.
- Etc.

Clean up the mess.

Signed-off-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index ad3e80e17b4f..bd80ba5847d2 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -604,15 +604,8 @@ IP Variables:
 ip_local_port_range - 2 INTEGERS
 	Defines the local port range that is used by TCP and UDP to
 	choose the local port. The first number is the first, the
-	second the last local port number. Default value depends on
-	amount of memory available on the system:
-	> 128Mb 32768-61000
-	< 128Mb 1024-4999 or even less.
-	This number defines number of active connections, which this
-	system can issue simultaneously to systems not supporting
-	TCP extensions (timestamps). With tcp_tw_recycle enabled
-	(i.e. by default) range 1024-4999 is enough to issue up to
-	2000 connections per second to systems supporting timestamps.
+	second the last local port number. The default values are
+	32768 and 61000 respectively.
 
 ip_local_reserved_ports - list of comma separated ranges
 	Specify the ports which are reserved for known third-party
-- 
cgit v1.2.3


From f03fb3f455c6c3a3dfcef6c7f2dcab104c813f4b Mon Sep 17 00:00:00 2001
From: Jan Seiffert <kaffeemonster@googlemail.com>
Date: Fri, 30 Mar 2012 05:08:19 +0000
Subject: bpf jit: Make the filter.c::__load_pointer helper non-static for the
 jits

The function is renamed to make it a little more clear what it does.
It is not added to any .h because it is not for general consumption, only for
bpf internal use (and so by the jits).

Signed-of-by: Jan Seiffert <kaffeemonster@googlemail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index cf4989ac503b..6f755cca4520 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -39,8 +39,11 @@
 #include <linux/reciprocal_div.h>
 #include <linux/ratelimit.h>
 
-/* No hurry in this branch */
-static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
+/* No hurry in this branch
+ *
+ * Exported for the bpf jit load helper.
+ */
+void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
 {
 	u8 *ptr = NULL;
 
@@ -59,7 +62,7 @@ static inline void *load_pointer(const struct sk_buff *skb, int k,
 {
 	if (k >= 0)
 		return skb_header_pointer(skb, k, size, buffer);
-	return __load_pointer(skb, k, size);
+	return bpf_internal_load_pointer_neg_helper(skb, k, size);
 }
 
 /**
-- 
cgit v1.2.3


From a998d4342337c82dacdc0897d30a9364de1576a1 Mon Sep 17 00:00:00 2001
From: Jan Seiffert <kaffeemonster@googlemail.com>
Date: Fri, 30 Mar 2012 05:24:05 +0000
Subject: bpf jit: Let the x86 jit handle negative offsets

Now the helper function from filter.c for negative offsets is exported,
it can be used it in the jit to handle negative offsets.

First modify the asm load helper functions to handle:
- know positive offsets
- know negative offsets
- any offset

then the compiler can be modified to explicitly use these helper
when appropriate.

This fixes the case of a negative X register and allows to lift
the restriction that bpf programs with negative offsets can't
be jited.

Signed-of-by: Jan Seiffert <kaffeemonster@googlemail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/net/bpf_jit.S      | 122 +++++++++++++++++++++++++++++++++-----------
 arch/x86/net/bpf_jit_comp.c |  41 +++++++++------
 2 files changed, 115 insertions(+), 48 deletions(-)

diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 66870223f8c5..877b9a1b2152 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -18,17 +18,17 @@
  * r9d : hlen = skb->len - skb->data_len
  */
 #define SKBDATA	%r8
-
-sk_load_word_ind:
-	.globl	sk_load_word_ind
-
-	add	%ebx,%esi	/* offset += X */
-#	test    %esi,%esi	/* if (offset < 0) goto bpf_error; */
-	js	bpf_error
+#define SKF_MAX_NEG_OFF    $(-0x200000) /* SKF_LL_OFF from filter.h */
 
 sk_load_word:
 	.globl	sk_load_word
 
+	test	%esi,%esi
+	js	bpf_slow_path_word_neg
+
+sk_load_word_positive_offset:
+	.globl	sk_load_word_positive_offset
+
 	mov	%r9d,%eax		# hlen
 	sub	%esi,%eax		# hlen - offset
 	cmp	$3,%eax
@@ -37,16 +37,15 @@ sk_load_word:
 	bswap   %eax  			/* ntohl() */
 	ret
 
-
-sk_load_half_ind:
-	.globl sk_load_half_ind
-
-	add	%ebx,%esi	/* offset += X */
-	js	bpf_error
-
 sk_load_half:
 	.globl	sk_load_half
 
+	test	%esi,%esi
+	js	bpf_slow_path_half_neg
+
+sk_load_half_positive_offset:
+	.globl	sk_load_half_positive_offset
+
 	mov	%r9d,%eax
 	sub	%esi,%eax		#	hlen - offset
 	cmp	$1,%eax
@@ -55,14 +54,15 @@ sk_load_half:
 	rol	$8,%ax			# ntohs()
 	ret
 
-sk_load_byte_ind:
-	.globl sk_load_byte_ind
-	add	%ebx,%esi	/* offset += X */
-	js	bpf_error
-
 sk_load_byte:
 	.globl	sk_load_byte
 
+	test	%esi,%esi
+	js	bpf_slow_path_byte_neg
+
+sk_load_byte_positive_offset:
+	.globl	sk_load_byte_positive_offset
+
 	cmp	%esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
 	jle	bpf_slow_path_byte
 	movzbl	(SKBDATA,%rsi),%eax
@@ -73,25 +73,21 @@ sk_load_byte:
  *
  * Implements BPF_S_LDX_B_MSH : ldxb  4*([offset]&0xf)
  * Must preserve A accumulator (%eax)
- * Inputs : %esi is the offset value, already known positive
+ * Inputs : %esi is the offset value
  */
-ENTRY(sk_load_byte_msh)
-	CFI_STARTPROC
+sk_load_byte_msh:
+	.globl	sk_load_byte_msh
+	test	%esi,%esi
+	js	bpf_slow_path_byte_msh_neg
+
+sk_load_byte_msh_positive_offset:
+	.globl	sk_load_byte_msh_positive_offset
 	cmp	%esi,%r9d      /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
 	jle	bpf_slow_path_byte_msh
 	movzbl	(SKBDATA,%rsi),%ebx
 	and	$15,%bl
 	shl	$2,%bl
 	ret
-	CFI_ENDPROC
-ENDPROC(sk_load_byte_msh)
-
-bpf_error:
-# force a return 0 from jit handler
-	xor		%eax,%eax
-	mov		-8(%rbp),%rbx
-	leaveq
-	ret
 
 /* rsi contains offset and can be scratched */
 #define bpf_slow_path_common(LEN)		\
@@ -138,3 +134,67 @@ bpf_slow_path_byte_msh:
 	shl	$2,%al
 	xchg	%eax,%ebx
 	ret
+
+#define sk_negative_common(SIZE)				\
+	push	%rdi;	/* save skb */				\
+	push	%r9;						\
+	push	SKBDATA;					\
+/* rsi already has offset */					\
+	mov	$SIZE,%ecx;	/* size */			\
+	call	bpf_internal_load_pointer_neg_helper;		\
+	test	%rax,%rax;					\
+	pop	SKBDATA;					\
+	pop	%r9;						\
+	pop	%rdi;						\
+	jz	bpf_error
+
+
+bpf_slow_path_word_neg:
+	cmp	SKF_MAX_NEG_OFF, %esi	/* test range */
+	jl	bpf_error	/* offset lower -> error  */
+sk_load_word_negative_offset:
+	.globl	sk_load_word_negative_offset
+	sk_negative_common(4)
+	mov	(%rax), %eax
+	bswap	%eax
+	ret
+
+bpf_slow_path_half_neg:
+	cmp	SKF_MAX_NEG_OFF, %esi
+	jl	bpf_error
+sk_load_half_negative_offset:
+	.globl	sk_load_half_negative_offset
+	sk_negative_common(2)
+	mov	(%rax),%ax
+	rol	$8,%ax
+	movzwl	%ax,%eax
+	ret
+
+bpf_slow_path_byte_neg:
+	cmp	SKF_MAX_NEG_OFF, %esi
+	jl	bpf_error
+sk_load_byte_negative_offset:
+	.globl	sk_load_byte_negative_offset
+	sk_negative_common(1)
+	movzbl	(%rax), %eax
+	ret
+
+bpf_slow_path_byte_msh_neg:
+	cmp	SKF_MAX_NEG_OFF, %esi
+	jl	bpf_error
+sk_load_byte_msh_negative_offset:
+	.globl	sk_load_byte_msh_negative_offset
+	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
+	sk_negative_common(1)
+	movzbl	(%rax),%eax
+	and	$15,%al
+	shl	$2,%al
+	xchg	%eax,%ebx
+	ret
+
+bpf_error:
+# force a return 0 from jit handler
+	xor		%eax,%eax
+	mov		-8(%rbp),%rbx
+	leaveq
+	ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5a5b6e4dd738..0597f95b6da6 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -30,7 +30,10 @@ int bpf_jit_enable __read_mostly;
  * assembly code in arch/x86/net/bpf_jit.S
  */
 extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
-extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
+extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
+extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
+extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
 
 static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 {
@@ -117,6 +120,8 @@ static inline void bpf_flush_icache(void *start, void *end)
 	set_fs(old_fs);
 }
 
+#define CHOOSE_LOAD_FUNC(K, func) \
+	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
 void bpf_jit_compile(struct sk_filter *fp)
 {
@@ -473,44 +478,46 @@ void bpf_jit_compile(struct sk_filter *fp)
 #endif
 				break;
 			case BPF_S_LD_W_ABS:
-				func = sk_load_word;
+				func = CHOOSE_LOAD_FUNC(K, sk_load_word);
 common_load:			seen |= SEEN_DATAREF;
-				if ((int)K < 0) {
-					/* Abort the JIT because __load_pointer() is needed. */
-					goto out;
-				}
 				t_offset = func - (image + addrs[i]);
 				EMIT1_off32(0xbe, K); /* mov imm32,%esi */
 				EMIT1_off32(0xe8, t_offset); /* call */
 				break;
 			case BPF_S_LD_H_ABS:
-				func = sk_load_half;
+				func = CHOOSE_LOAD_FUNC(K, sk_load_half);
 				goto common_load;
 			case BPF_S_LD_B_ABS:
-				func = sk_load_byte;
+				func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
 				goto common_load;
 			case BPF_S_LDX_B_MSH:
-				if ((int)K < 0) {
-					/* Abort the JIT because __load_pointer() is needed. */
-					goto out;
-				}
+				func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
 				seen |= SEEN_DATAREF | SEEN_XREG;
-				t_offset = sk_load_byte_msh - (image + addrs[i]);
+				t_offset = func - (image + addrs[i]);
 				EMIT1_off32(0xbe, K);	/* mov imm32,%esi */
 				EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
 				break;
 			case BPF_S_LD_W_IND:
-				func = sk_load_word_ind;
+				func = sk_load_word;
 common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
 				t_offset = func - (image + addrs[i]);
-				EMIT1_off32(0xbe, K);	/* mov imm32,%esi   */
+				if (K) {
+					if (is_imm8(K)) {
+						EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */
+					} else {
+						EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
+						EMIT(K, 4);
+					}
+				} else {
+					EMIT2(0x89,0xde); /* mov %ebx,%esi */
+				}
 				EMIT1_off32(0xe8, t_offset);	/* call sk_load_xxx_ind */
 				break;
 			case BPF_S_LD_H_IND:
-				func = sk_load_half_ind;
+				func = sk_load_half;
 				goto common_load_ind;
 			case BPF_S_LD_B_IND:
-				func = sk_load_byte_ind;
+				func = sk_load_byte;
 				goto common_load_ind;
 			case BPF_S_JMP_JA:
 				t_offset = addrs[i + K] - addrs[i];
-- 
cgit v1.2.3


From aacc1bea190d731755a65cb8ec31dd756f4e263e Mon Sep 17 00:00:00 2001
From: "Multanen, Eric W" <eric.w.multanen@intel.com>
Date: Wed, 28 Mar 2012 07:49:09 +0000
Subject: ixgbe: driver fix for link flap

Fix up code so that changes in DCB settings
are detected only when ixgbe_dcbnl_set_all is called.
Previously, a series of 'change' commands followed by
a call to ixgbe_dcbnl_set_all() would always be handled
as a HW change - even if the net change was zero.
This patch checks for this case of no actual change and
skips going through the HW set process.

Without this fix, the link could reset and result in
a link flap.

The core change in this patch is to check for changes
in the ixgbe_copy_dcb_cfg() routine - and return
a bitmask of detected changes.  The other
places where changes were detected previously can be removed.

Signed-off-by: Eric Multanen <eric.w.multanen@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c | 164 ++++++++++++------------
 1 file changed, 80 insertions(+), 84 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
index dde65f951400..652e4b09546d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
@@ -44,62 +44,94 @@
 #define DCB_NO_HW_CHG   1  /* DCB configuration did not change */
 #define DCB_HW_CHG      2  /* DCB configuration changed, no reset */
 
-int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg,
-                       struct ixgbe_dcb_config *dst_dcb_cfg, int tc_max)
+int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *scfg,
+		       struct ixgbe_dcb_config *dcfg, int tc_max)
 {
-	struct tc_configuration *src_tc_cfg = NULL;
-	struct tc_configuration *dst_tc_cfg = NULL;
-	int i;
+	struct tc_configuration *src = NULL;
+	struct tc_configuration *dst = NULL;
+	int i, j;
+	int tx = DCB_TX_CONFIG;
+	int rx = DCB_RX_CONFIG;
+	int changes = 0;
 
-	if (!src_dcb_cfg || !dst_dcb_cfg)
-		return -EINVAL;
+	if (!scfg || !dcfg)
+		return changes;
 
 	for (i = DCB_PG_ATTR_TC_0; i < tc_max + DCB_PG_ATTR_TC_0; i++) {
-		src_tc_cfg = &src_dcb_cfg->tc_config[i - DCB_PG_ATTR_TC_0];
-		dst_tc_cfg = &dst_dcb_cfg->tc_config[i - DCB_PG_ATTR_TC_0];
+		src = &scfg->tc_config[i - DCB_PG_ATTR_TC_0];
+		dst = &dcfg->tc_config[i - DCB_PG_ATTR_TC_0];
 
-		dst_tc_cfg->path[DCB_TX_CONFIG].prio_type =
-				src_tc_cfg->path[DCB_TX_CONFIG].prio_type;
+		if (dst->path[tx].prio_type != src->path[tx].prio_type) {
+			dst->path[tx].prio_type = src->path[tx].prio_type;
+			changes |= BIT_PG_TX;
+		}
 
-		dst_tc_cfg->path[DCB_TX_CONFIG].bwg_id =
-				src_tc_cfg->path[DCB_TX_CONFIG].bwg_id;
+		if (dst->path[tx].bwg_id != src->path[tx].bwg_id) {
+			dst->path[tx].bwg_id = src->path[tx].bwg_id;
+			changes |= BIT_PG_TX;
+		}
 
-		dst_tc_cfg->path[DCB_TX_CONFIG].bwg_percent =
-				src_tc_cfg->path[DCB_TX_CONFIG].bwg_percent;
+		if (dst->path[tx].bwg_percent != src->path[tx].bwg_percent) {
+			dst->path[tx].bwg_percent = src->path[tx].bwg_percent;
+			changes |= BIT_PG_TX;
+		}
 
-		dst_tc_cfg->path[DCB_TX_CONFIG].up_to_tc_bitmap =
-				src_tc_cfg->path[DCB_TX_CONFIG].up_to_tc_bitmap;
+		if (dst->path[tx].up_to_tc_bitmap !=
+				src->path[tx].up_to_tc_bitmap) {
+			dst->path[tx].up_to_tc_bitmap =
+				src->path[tx].up_to_tc_bitmap;
+			changes |= (BIT_PG_TX | BIT_PFC | BIT_APP_UPCHG);
+		}
 
-		dst_tc_cfg->path[DCB_RX_CONFIG].prio_type =
-				src_tc_cfg->path[DCB_RX_CONFIG].prio_type;
+		if (dst->path[rx].prio_type != src->path[rx].prio_type) {
+			dst->path[rx].prio_type = src->path[rx].prio_type;
+			changes |= BIT_PG_RX;
+		}
 
-		dst_tc_cfg->path[DCB_RX_CONFIG].bwg_id =
-				src_tc_cfg->path[DCB_RX_CONFIG].bwg_id;
+		if (dst->path[rx].bwg_id != src->path[rx].bwg_id) {
+			dst->path[rx].bwg_id = src->path[rx].bwg_id;
+			changes |= BIT_PG_RX;
+		}
 
-		dst_tc_cfg->path[DCB_RX_CONFIG].bwg_percent =
-				src_tc_cfg->path[DCB_RX_CONFIG].bwg_percent;
+		if (dst->path[rx].bwg_percent != src->path[rx].bwg_percent) {
+			dst->path[rx].bwg_percent = src->path[rx].bwg_percent;
+			changes |= BIT_PG_RX;
+		}
 
-		dst_tc_cfg->path[DCB_RX_CONFIG].up_to_tc_bitmap =
-				src_tc_cfg->path[DCB_RX_CONFIG].up_to_tc_bitmap;
+		if (dst->path[rx].up_to_tc_bitmap !=
+				src->path[rx].up_to_tc_bitmap) {
+			dst->path[rx].up_to_tc_bitmap =
+				src->path[rx].up_to_tc_bitmap;
+			changes |= (BIT_PG_RX | BIT_PFC | BIT_APP_UPCHG);
+		}
 	}
 
 	for (i = DCB_PG_ATTR_BW_ID_0; i < DCB_PG_ATTR_BW_ID_MAX; i++) {
-		dst_dcb_cfg->bw_percentage[DCB_TX_CONFIG]
-			[i-DCB_PG_ATTR_BW_ID_0] = src_dcb_cfg->bw_percentage
-				[DCB_TX_CONFIG][i-DCB_PG_ATTR_BW_ID_0];
-		dst_dcb_cfg->bw_percentage[DCB_RX_CONFIG]
-			[i-DCB_PG_ATTR_BW_ID_0] = src_dcb_cfg->bw_percentage
-				[DCB_RX_CONFIG][i-DCB_PG_ATTR_BW_ID_0];
+		j = i - DCB_PG_ATTR_BW_ID_0;
+		if (dcfg->bw_percentage[tx][j] != scfg->bw_percentage[tx][j]) {
+			dcfg->bw_percentage[tx][j] = scfg->bw_percentage[tx][j];
+			changes |= BIT_PG_TX;
+		}
+		if (dcfg->bw_percentage[rx][j] != scfg->bw_percentage[rx][j]) {
+			dcfg->bw_percentage[rx][j] = scfg->bw_percentage[rx][j];
+			changes |= BIT_PG_RX;
+		}
 	}
 
 	for (i = DCB_PFC_UP_ATTR_0; i < DCB_PFC_UP_ATTR_MAX; i++) {
-		dst_dcb_cfg->tc_config[i - DCB_PFC_UP_ATTR_0].dcb_pfc =
-			src_dcb_cfg->tc_config[i - DCB_PFC_UP_ATTR_0].dcb_pfc;
+		j = i - DCB_PFC_UP_ATTR_0;
+		if (dcfg->tc_config[j].dcb_pfc != scfg->tc_config[j].dcb_pfc) {
+			dcfg->tc_config[j].dcb_pfc = scfg->tc_config[j].dcb_pfc;
+			changes |= BIT_PFC;
+		}
 	}
 
-	dst_dcb_cfg->pfc_mode_enable = src_dcb_cfg->pfc_mode_enable;
+	if (dcfg->pfc_mode_enable != scfg->pfc_mode_enable) {
+		dcfg->pfc_mode_enable = scfg->pfc_mode_enable;
+		changes |= BIT_PFC;
+	}
 
-	return 0;
+	return changes;
 }
 
 static u8 ixgbe_dcbnl_get_state(struct net_device *netdev)
@@ -179,20 +211,6 @@ static void ixgbe_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
 	if (up_map != DCB_ATTR_VALUE_UNDEFINED)
 		adapter->temp_dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap =
 			up_map;
-
-	if ((adapter->temp_dcb_cfg.tc_config[tc].path[0].prio_type !=
-	     adapter->dcb_cfg.tc_config[tc].path[0].prio_type) ||
-	    (adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_id !=
-	     adapter->dcb_cfg.tc_config[tc].path[0].bwg_id) ||
-	    (adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_percent !=
-	     adapter->dcb_cfg.tc_config[tc].path[0].bwg_percent) ||
-	    (adapter->temp_dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap !=
-	     adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap))
-		adapter->dcb_set_bitmap |= BIT_PG_TX;
-
-	if (adapter->temp_dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap !=
-	     adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap)
-		adapter->dcb_set_bitmap |= BIT_PFC | BIT_APP_UPCHG;
 }
 
 static void ixgbe_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id,
@@ -201,10 +219,6 @@ static void ixgbe_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id,
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
 	adapter->temp_dcb_cfg.bw_percentage[0][bwg_id] = bw_pct;
-
-	if (adapter->temp_dcb_cfg.bw_percentage[0][bwg_id] !=
-	    adapter->dcb_cfg.bw_percentage[0][bwg_id])
-		adapter->dcb_set_bitmap |= BIT_PG_TX;
 }
 
 static void ixgbe_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev, int tc,
@@ -223,20 +237,6 @@ static void ixgbe_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev, int tc,
 	if (up_map != DCB_ATTR_VALUE_UNDEFINED)
 		adapter->temp_dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap =
 			up_map;
-
-	if ((adapter->temp_dcb_cfg.tc_config[tc].path[1].prio_type !=
-	     adapter->dcb_cfg.tc_config[tc].path[1].prio_type) ||
-	    (adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_id !=
-	     adapter->dcb_cfg.tc_config[tc].path[1].bwg_id) ||
-	    (adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_percent !=
-	     adapter->dcb_cfg.tc_config[tc].path[1].bwg_percent) ||
-	    (adapter->temp_dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap !=
-	     adapter->dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap))
-		adapter->dcb_set_bitmap |= BIT_PG_RX;
-
-	if (adapter->temp_dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap !=
-	     adapter->dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap)
-		adapter->dcb_set_bitmap |= BIT_PFC;
 }
 
 static void ixgbe_dcbnl_set_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id,
@@ -245,10 +245,6 @@ static void ixgbe_dcbnl_set_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id,
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
 	adapter->temp_dcb_cfg.bw_percentage[1][bwg_id] = bw_pct;
-
-	if (adapter->temp_dcb_cfg.bw_percentage[1][bwg_id] !=
-	    adapter->dcb_cfg.bw_percentage[1][bwg_id])
-		adapter->dcb_set_bitmap |= BIT_PG_RX;
 }
 
 static void ixgbe_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int tc,
@@ -298,10 +294,8 @@ static void ixgbe_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority,
 
 	adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc = setting;
 	if (adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc !=
-	    adapter->dcb_cfg.tc_config[priority].dcb_pfc) {
-		adapter->dcb_set_bitmap |= BIT_PFC;
+	    adapter->dcb_cfg.tc_config[priority].dcb_pfc)
 		adapter->temp_dcb_cfg.pfc_mode_enable = true;
-	}
 }
 
 static void ixgbe_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority,
@@ -336,7 +330,8 @@ static void ixgbe_dcbnl_devreset(struct net_device *dev)
 static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	int ret, i;
+	int ret = DCB_NO_HW_CHG;
+	int i;
 #ifdef IXGBE_FCOE
 	struct dcb_app app = {
 			      .selector = DCB_APP_IDTYPE_ETHTYPE,
@@ -355,12 +350,13 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
 
 	/* Fail command if not in CEE mode */
 	if (!(adapter->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
-		return 1;
+		return ret;
 
-	ret = ixgbe_copy_dcb_cfg(&adapter->temp_dcb_cfg, &adapter->dcb_cfg,
-				 MAX_TRAFFIC_CLASS);
-	if (ret)
-		return DCB_NO_HW_CHG;
+	adapter->dcb_set_bitmap |= ixgbe_copy_dcb_cfg(&adapter->temp_dcb_cfg,
+						      &adapter->dcb_cfg,
+						      MAX_TRAFFIC_CLASS);
+	if (!adapter->dcb_set_bitmap)
+		return ret;
 
 	if (adapter->dcb_cfg.pfc_mode_enable) {
 		switch (adapter->hw.mac.type) {
@@ -420,6 +416,8 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
 
 		for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
 			netdev_set_prio_tc_map(netdev, i, prio_tc[i]);
+
+		ret = DCB_HW_CHG_RST;
 	}
 
 	if (adapter->dcb_set_bitmap & BIT_PFC) {
@@ -430,7 +428,8 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
 				     DCB_TX_CONFIG, prio_tc);
 		ixgbe_dcb_unpack_pfc(&adapter->dcb_cfg, &pfc_en);
 		ixgbe_dcb_hw_pfc_config(&adapter->hw, pfc_en, prio_tc);
-		ret = DCB_HW_CHG;
+		if (ret != DCB_HW_CHG_RST)
+			ret = DCB_HW_CHG;
 	}
 
 	if (adapter->dcb_cfg.pfc_mode_enable)
@@ -531,9 +530,6 @@ static void ixgbe_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
 	adapter->temp_dcb_cfg.pfc_mode_enable = state;
-	if (adapter->temp_dcb_cfg.pfc_mode_enable !=
-		adapter->dcb_cfg.pfc_mode_enable)
-		adapter->dcb_set_bitmap |= BIT_PFC;
 }
 
 /**
-- 
cgit v1.2.3


From bb9e44d0d0f45da356c39e485edacff6e14ba961 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Wed, 21 Mar 2012 00:39:12 +0000
Subject: e1000e: prevent oops when adapter is being closed and reset
 simultaneously

When the adapter is closed while it is simultaneously going through a
reset, it can cause a null-pointer dereference when the two different code
paths simultaneously cleanup up the Tx/Rx resources.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/e1000.h  | 6 ++++++
 drivers/net/ethernet/intel/e1000e/netdev.c | 9 +++++++++
 2 files changed, 15 insertions(+)

diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index 86cdd4793992..b83897f76ee3 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -161,6 +161,12 @@ struct e1000_info;
 /* Time to wait before putting the device into D3 if there's no link (in ms). */
 #define LINK_TIMEOUT		100
 
+/*
+ * Count for polling __E1000_RESET condition every 10-20msec.
+ * Experimentation has shown the reset can take approximately 210msec.
+ */
+#define E1000_CHECK_RESET_COUNT		25
+
 #define DEFAULT_RDTR			0
 #define DEFAULT_RADV			8
 #define BURST_RDTR			0x20
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 2c38a65ade87..6878601f20c5 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -3968,6 +3968,10 @@ static int e1000_close(struct net_device *netdev)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct pci_dev *pdev = adapter->pdev;
+	int count = E1000_CHECK_RESET_COUNT;
+
+	while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
+		usleep_range(10000, 20000);
 
 	WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
 
@@ -5472,6 +5476,11 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake,
 	netif_device_detach(netdev);
 
 	if (netif_running(netdev)) {
+		int count = E1000_CHECK_RESET_COUNT;
+
+		while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
+			usleep_range(10000, 20000);
+
 		WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
 		e1000e_down(adapter);
 		e1000_free_irq(adapter);
-- 
cgit v1.2.3


From bf03085f85112eac2d19036ea3003071220285bb Mon Sep 17 00:00:00 2001
From: Matthew Vick <matthew.vick@intel.com>
Date: Fri, 16 Mar 2012 09:03:00 +0000
Subject: e1000e: Guarantee descriptor writeback flush success.

In rare circumstances, a descriptor writeback flush may not work if it
arrives on a specific clock cycle as a writeback request is going out.

Signed-off-by: Matthew Vick <matthew.vick@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 6878601f20c5..19ab2154802c 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1059,6 +1059,13 @@ static void e1000_print_hw_hang(struct work_struct *work)
 		ew32(TIDV, adapter->tx_int_delay | E1000_TIDV_FPD);
 		/* execute the writes immediately */
 		e1e_flush();
+		/*
+		 * Due to rare timing issues, write to TIDV again to ensure
+		 * the write is successful
+		 */
+		ew32(TIDV, adapter->tx_int_delay | E1000_TIDV_FPD);
+		/* execute the writes immediately */
+		e1e_flush();
 		adapter->tx_hang_recheck = true;
 		return;
 	}
@@ -3616,6 +3623,16 @@ static void e1000e_flush_descriptors(struct e1000_adapter *adapter)
 
 	/* execute the writes immediately */
 	e1e_flush();
+
+	/*
+	 * due to rare timing issues, write to TIDV/RDTR again to ensure the
+	 * write is successful
+	 */
+	ew32(TIDV, adapter->tx_int_delay | E1000_TIDV_FPD);
+	ew32(RDTR, adapter->rx_int_delay | E1000_RDTR_FPD);
+
+	/* execute the writes immediately */
+	e1e_flush();
 }
 
 static void e1000e_update_stats(struct e1000_adapter *adapter);
-- 
cgit v1.2.3


From b3300146aa8efc5d3937fd33f3cfdc580a3843bc Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@st.com>
Date: Mon, 2 Apr 2012 00:02:09 +0000
Subject: phy:icplus:fix Auto Power Saving in ip101a_config_init.

This patch fixes Auto Power Saving configuration in ip101a_config_init
which was broken as there is no phy register write followed after
setting IP101A_APS_ON flag.

This patch also fixes the return value of ip101a_config_init.

Without this patch ip101a_config_init returns 2 which is not an error
accroding to IS_ERR and the mac driver will continue accessing 2 as
valid pointer to phy_dev resulting in memory fault.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/icplus.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c
index 0856e1b7a849..f08c85acf761 100644
--- a/drivers/net/phy/icplus.c
+++ b/drivers/net/phy/icplus.c
@@ -162,7 +162,8 @@ static int ip101a_g_config_init(struct phy_device *phydev)
 	/* Enable Auto Power Saving mode */
 	c = phy_read(phydev, IP10XX_SPEC_CTRL_STATUS);
 	c |= IP101A_G_APS_ON;
-	return c;
+
+	return phy_write(phydev, IP10XX_SPEC_CTRL_STATUS, c);
 }
 
 static int ip175c_read_status(struct phy_device *phydev)
-- 
cgit v1.2.3


From f2ed5ee1b050495be49e5a0d5df152663558ef08 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalmin@broadcom.com>
Date: Tue, 3 Apr 2012 00:07:11 +0000
Subject: bnx2x: correction to firmware interface

Commit 621b4d6 updated the bnx2x driver to a new FW version, but lacked
a commit to a header file with changes to the firmware's interface.
The missing interface change causes iscsi and fcoe to misbehave with the
updated firmware.

Signed-off-by: Yuval Mintz <yuvalmin@broadcom.com>
Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
CC: Michael Chan <mchan@broadcom.com>
CC: Bhanu Prakash Gollapudi <bprakash@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h    | 110 ++++++++++-----------
 1 file changed, 55 insertions(+), 55 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
index cd6dfa9eaa3a..b9b263323436 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
@@ -25,31 +25,31 @@
 	(IRO[149].base + ((funcId) * IRO[149].m1))
 #define CSTORM_IGU_MODE_OFFSET (IRO[157].base)
 #define CSTORM_ISCSI_CQ_SIZE_OFFSET(pfId) \
-	(IRO[315].base + ((pfId) * IRO[315].m1))
-#define CSTORM_ISCSI_CQ_SQN_SIZE_OFFSET(pfId) \
 	(IRO[316].base + ((pfId) * IRO[316].m1))
+#define CSTORM_ISCSI_CQ_SQN_SIZE_OFFSET(pfId) \
+	(IRO[317].base + ((pfId) * IRO[317].m1))
 #define CSTORM_ISCSI_EQ_CONS_OFFSET(pfId, iscsiEqId) \
-	(IRO[308].base + ((pfId) * IRO[308].m1) + ((iscsiEqId) * IRO[308].m2))
+	(IRO[309].base + ((pfId) * IRO[309].m1) + ((iscsiEqId) * IRO[309].m2))
 #define CSTORM_ISCSI_EQ_NEXT_EQE_ADDR_OFFSET(pfId, iscsiEqId) \
-	(IRO[310].base + ((pfId) * IRO[310].m1) + ((iscsiEqId) * IRO[310].m2))
+	(IRO[311].base + ((pfId) * IRO[311].m1) + ((iscsiEqId) * IRO[311].m2))
 #define CSTORM_ISCSI_EQ_NEXT_PAGE_ADDR_OFFSET(pfId, iscsiEqId) \
-	(IRO[309].base + ((pfId) * IRO[309].m1) + ((iscsiEqId) * IRO[309].m2))
+	(IRO[310].base + ((pfId) * IRO[310].m1) + ((iscsiEqId) * IRO[310].m2))
 #define CSTORM_ISCSI_EQ_NEXT_PAGE_ADDR_VALID_OFFSET(pfId, iscsiEqId) \
-	(IRO[311].base + ((pfId) * IRO[311].m1) + ((iscsiEqId) * IRO[311].m2))
+	(IRO[312].base + ((pfId) * IRO[312].m1) + ((iscsiEqId) * IRO[312].m2))
 #define CSTORM_ISCSI_EQ_PROD_OFFSET(pfId, iscsiEqId) \
-	(IRO[307].base + ((pfId) * IRO[307].m1) + ((iscsiEqId) * IRO[307].m2))
+	(IRO[308].base + ((pfId) * IRO[308].m1) + ((iscsiEqId) * IRO[308].m2))
 #define CSTORM_ISCSI_EQ_SB_INDEX_OFFSET(pfId, iscsiEqId) \
-	(IRO[313].base + ((pfId) * IRO[313].m1) + ((iscsiEqId) * IRO[313].m2))
+	(IRO[314].base + ((pfId) * IRO[314].m1) + ((iscsiEqId) * IRO[314].m2))
 #define CSTORM_ISCSI_EQ_SB_NUM_OFFSET(pfId, iscsiEqId) \
-	(IRO[312].base + ((pfId) * IRO[312].m1) + ((iscsiEqId) * IRO[312].m2))
+	(IRO[313].base + ((pfId) * IRO[313].m1) + ((iscsiEqId) * IRO[313].m2))
 #define CSTORM_ISCSI_HQ_SIZE_OFFSET(pfId) \
-	(IRO[314].base + ((pfId) * IRO[314].m1))
+	(IRO[315].base + ((pfId) * IRO[315].m1))
 #define CSTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfId) \
-	(IRO[306].base + ((pfId) * IRO[306].m1))
+	(IRO[307].base + ((pfId) * IRO[307].m1))
 #define CSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfId) \
-	(IRO[305].base + ((pfId) * IRO[305].m1))
+	(IRO[306].base + ((pfId) * IRO[306].m1))
 #define CSTORM_ISCSI_PAGE_SIZE_OFFSET(pfId) \
-	(IRO[304].base + ((pfId) * IRO[304].m1))
+	(IRO[305].base + ((pfId) * IRO[305].m1))
 #define CSTORM_RECORD_SLOW_PATH_OFFSET(funcId) \
 	(IRO[151].base + ((funcId) * IRO[151].m1))
 #define CSTORM_SP_STATUS_BLOCK_DATA_OFFSET(pfId) \
@@ -96,37 +96,37 @@
 #define TSTORM_FUNC_EN_OFFSET(funcId) \
 	(IRO[103].base + ((funcId) * IRO[103].m1))
 #define TSTORM_ISCSI_ERROR_BITMAP_OFFSET(pfId) \
-	(IRO[271].base + ((pfId) * IRO[271].m1))
-#define TSTORM_ISCSI_L2_ISCSI_OOO_CID_TABLE_OFFSET(pfId) \
 	(IRO[272].base + ((pfId) * IRO[272].m1))
-#define TSTORM_ISCSI_L2_ISCSI_OOO_CLIENT_ID_TABLE_OFFSET(pfId) \
+#define TSTORM_ISCSI_L2_ISCSI_OOO_CID_TABLE_OFFSET(pfId) \
 	(IRO[273].base + ((pfId) * IRO[273].m1))
-#define TSTORM_ISCSI_L2_ISCSI_OOO_PROD_OFFSET(pfId) \
+#define TSTORM_ISCSI_L2_ISCSI_OOO_CLIENT_ID_TABLE_OFFSET(pfId) \
 	(IRO[274].base + ((pfId) * IRO[274].m1))
+#define TSTORM_ISCSI_L2_ISCSI_OOO_PROD_OFFSET(pfId) \
+	(IRO[275].base + ((pfId) * IRO[275].m1))
 #define TSTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfId) \
-	(IRO[270].base + ((pfId) * IRO[270].m1))
+	(IRO[271].base + ((pfId) * IRO[271].m1))
 #define TSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfId) \
-	(IRO[269].base + ((pfId) * IRO[269].m1))
+	(IRO[270].base + ((pfId) * IRO[270].m1))
 #define TSTORM_ISCSI_PAGE_SIZE_OFFSET(pfId) \
-	(IRO[268].base + ((pfId) * IRO[268].m1))
+	(IRO[269].base + ((pfId) * IRO[269].m1))
 #define TSTORM_ISCSI_RQ_SIZE_OFFSET(pfId) \
-	(IRO[267].base + ((pfId) * IRO[267].m1))
+	(IRO[268].base + ((pfId) * IRO[268].m1))
 #define TSTORM_ISCSI_TCP_LOCAL_ADV_WND_OFFSET(pfId) \
-	(IRO[276].base + ((pfId) * IRO[276].m1))
+	(IRO[277].base + ((pfId) * IRO[277].m1))
 #define TSTORM_ISCSI_TCP_VARS_FLAGS_OFFSET(pfId) \
-	(IRO[263].base + ((pfId) * IRO[263].m1))
-#define TSTORM_ISCSI_TCP_VARS_LSB_LOCAL_MAC_ADDR_OFFSET(pfId) \
 	(IRO[264].base + ((pfId) * IRO[264].m1))
-#define TSTORM_ISCSI_TCP_VARS_MID_LOCAL_MAC_ADDR_OFFSET(pfId) \
+#define TSTORM_ISCSI_TCP_VARS_LSB_LOCAL_MAC_ADDR_OFFSET(pfId) \
 	(IRO[265].base + ((pfId) * IRO[265].m1))
-#define TSTORM_ISCSI_TCP_VARS_MSB_LOCAL_MAC_ADDR_OFFSET(pfId) \
+#define TSTORM_ISCSI_TCP_VARS_MID_LOCAL_MAC_ADDR_OFFSET(pfId) \
 	(IRO[266].base + ((pfId) * IRO[266].m1))
+#define TSTORM_ISCSI_TCP_VARS_MSB_LOCAL_MAC_ADDR_OFFSET(pfId) \
+	(IRO[267].base + ((pfId) * IRO[267].m1))
 #define TSTORM_MAC_FILTER_CONFIG_OFFSET(pfId) \
 	(IRO[202].base + ((pfId) * IRO[202].m1))
 #define TSTORM_RECORD_SLOW_PATH_OFFSET(funcId) \
 	(IRO[105].base + ((funcId) * IRO[105].m1))
 #define TSTORM_TCP_MAX_CWND_OFFSET(pfId) \
-	(IRO[216].base + ((pfId) * IRO[216].m1))
+	(IRO[217].base + ((pfId) * IRO[217].m1))
 #define TSTORM_VF_TO_PF_OFFSET(funcId) \
 	(IRO[104].base + ((funcId) * IRO[104].m1))
 #define USTORM_AGG_DATA_OFFSET (IRO[206].base)
@@ -140,29 +140,29 @@
 #define USTORM_ETH_PAUSE_ENABLED_OFFSET(portId) \
 	(IRO[183].base + ((portId) * IRO[183].m1))
 #define USTORM_FCOE_EQ_PROD_OFFSET(pfId) \
-	(IRO[317].base + ((pfId) * IRO[317].m1))
+	(IRO[318].base + ((pfId) * IRO[318].m1))
 #define USTORM_FUNC_EN_OFFSET(funcId) \
 	(IRO[178].base + ((funcId) * IRO[178].m1))
 #define USTORM_ISCSI_CQ_SIZE_OFFSET(pfId) \
-	(IRO[281].base + ((pfId) * IRO[281].m1))
-#define USTORM_ISCSI_CQ_SQN_SIZE_OFFSET(pfId) \
 	(IRO[282].base + ((pfId) * IRO[282].m1))
+#define USTORM_ISCSI_CQ_SQN_SIZE_OFFSET(pfId) \
+	(IRO[283].base + ((pfId) * IRO[283].m1))
 #define USTORM_ISCSI_ERROR_BITMAP_OFFSET(pfId) \
-	(IRO[286].base + ((pfId) * IRO[286].m1))
+	(IRO[287].base + ((pfId) * IRO[287].m1))
 #define USTORM_ISCSI_GLOBAL_BUF_PHYS_ADDR_OFFSET(pfId) \
-	(IRO[283].base + ((pfId) * IRO[283].m1))
+	(IRO[284].base + ((pfId) * IRO[284].m1))
 #define USTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfId) \
-	(IRO[279].base + ((pfId) * IRO[279].m1))
+	(IRO[280].base + ((pfId) * IRO[280].m1))
 #define USTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfId) \
-	(IRO[278].base + ((pfId) * IRO[278].m1))
+	(IRO[279].base + ((pfId) * IRO[279].m1))
 #define USTORM_ISCSI_PAGE_SIZE_OFFSET(pfId) \
-	(IRO[277].base + ((pfId) * IRO[277].m1))
+	(IRO[278].base + ((pfId) * IRO[278].m1))
 #define USTORM_ISCSI_R2TQ_SIZE_OFFSET(pfId) \
-	(IRO[280].base + ((pfId) * IRO[280].m1))
+	(IRO[281].base + ((pfId) * IRO[281].m1))
 #define USTORM_ISCSI_RQ_BUFFER_SIZE_OFFSET(pfId) \
-	(IRO[284].base + ((pfId) * IRO[284].m1))
-#define USTORM_ISCSI_RQ_SIZE_OFFSET(pfId) \
 	(IRO[285].base + ((pfId) * IRO[285].m1))
+#define USTORM_ISCSI_RQ_SIZE_OFFSET(pfId) \
+	(IRO[286].base + ((pfId) * IRO[286].m1))
 #define USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(pfId) \
 	(IRO[182].base + ((pfId) * IRO[182].m1))
 #define USTORM_RECORD_SLOW_PATH_OFFSET(funcId) \
@@ -188,39 +188,39 @@
 #define XSTORM_FUNC_EN_OFFSET(funcId) \
 	(IRO[47].base + ((funcId) * IRO[47].m1))
 #define XSTORM_ISCSI_HQ_SIZE_OFFSET(pfId) \
-	(IRO[294].base + ((pfId) * IRO[294].m1))
+	(IRO[295].base + ((pfId) * IRO[295].m1))
 #define XSTORM_ISCSI_LOCAL_MAC_ADDR0_OFFSET(pfId) \
-	(IRO[297].base + ((pfId) * IRO[297].m1))
-#define XSTORM_ISCSI_LOCAL_MAC_ADDR1_OFFSET(pfId) \
 	(IRO[298].base + ((pfId) * IRO[298].m1))
-#define XSTORM_ISCSI_LOCAL_MAC_ADDR2_OFFSET(pfId) \
+#define XSTORM_ISCSI_LOCAL_MAC_ADDR1_OFFSET(pfId) \
 	(IRO[299].base + ((pfId) * IRO[299].m1))
-#define XSTORM_ISCSI_LOCAL_MAC_ADDR3_OFFSET(pfId) \
+#define XSTORM_ISCSI_LOCAL_MAC_ADDR2_OFFSET(pfId) \
 	(IRO[300].base + ((pfId) * IRO[300].m1))
-#define XSTORM_ISCSI_LOCAL_MAC_ADDR4_OFFSET(pfId) \
+#define XSTORM_ISCSI_LOCAL_MAC_ADDR3_OFFSET(pfId) \
 	(IRO[301].base + ((pfId) * IRO[301].m1))
-#define XSTORM_ISCSI_LOCAL_MAC_ADDR5_OFFSET(pfId) \
+#define XSTORM_ISCSI_LOCAL_MAC_ADDR4_OFFSET(pfId) \
 	(IRO[302].base + ((pfId) * IRO[302].m1))
-#define XSTORM_ISCSI_LOCAL_VLAN_OFFSET(pfId) \
+#define XSTORM_ISCSI_LOCAL_MAC_ADDR5_OFFSET(pfId) \
 	(IRO[303].base + ((pfId) * IRO[303].m1))
+#define XSTORM_ISCSI_LOCAL_VLAN_OFFSET(pfId) \
+	(IRO[304].base + ((pfId) * IRO[304].m1))
 #define XSTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfId) \
-	(IRO[293].base + ((pfId) * IRO[293].m1))
+	(IRO[294].base + ((pfId) * IRO[294].m1))
 #define XSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfId) \
-	(IRO[292].base + ((pfId) * IRO[292].m1))
+	(IRO[293].base + ((pfId) * IRO[293].m1))
 #define XSTORM_ISCSI_PAGE_SIZE_OFFSET(pfId) \
-	(IRO[291].base + ((pfId) * IRO[291].m1))
+	(IRO[292].base + ((pfId) * IRO[292].m1))
 #define XSTORM_ISCSI_R2TQ_SIZE_OFFSET(pfId) \
-	(IRO[296].base + ((pfId) * IRO[296].m1))
+	(IRO[297].base + ((pfId) * IRO[297].m1))
 #define XSTORM_ISCSI_SQ_SIZE_OFFSET(pfId) \
-	(IRO[295].base + ((pfId) * IRO[295].m1))
+	(IRO[296].base + ((pfId) * IRO[296].m1))
 #define XSTORM_ISCSI_TCP_VARS_ADV_WND_SCL_OFFSET(pfId) \
-	(IRO[290].base + ((pfId) * IRO[290].m1))
+	(IRO[291].base + ((pfId) * IRO[291].m1))
 #define XSTORM_ISCSI_TCP_VARS_FLAGS_OFFSET(pfId) \
-	(IRO[289].base + ((pfId) * IRO[289].m1))
+	(IRO[290].base + ((pfId) * IRO[290].m1))
 #define XSTORM_ISCSI_TCP_VARS_TOS_OFFSET(pfId) \
-	(IRO[288].base + ((pfId) * IRO[288].m1))
+	(IRO[289].base + ((pfId) * IRO[289].m1))
 #define XSTORM_ISCSI_TCP_VARS_TTL_OFFSET(pfId) \
-	(IRO[287].base + ((pfId) * IRO[287].m1))
+	(IRO[288].base + ((pfId) * IRO[288].m1))
 #define XSTORM_RATE_SHAPING_PER_VN_VARS_OFFSET(pfId) \
 	(IRO[44].base + ((pfId) * IRO[44].m1))
 #define XSTORM_RECORD_SLOW_PATH_OFFSET(funcId) \
-- 
cgit v1.2.3


From cff4c16296754888b6fd8c886bc860a888e20257 Mon Sep 17 00:00:00 2001
From: Artem Savkov <artem.savkov@gmail.com>
Date: Tue, 3 Apr 2012 10:29:11 +0000
Subject: r8169: enable napi on resume.

NAPI is disabled during suspend and needs to be enabled on resume. Without
this the driver locks up during resume in rtl_reset_work() trying to disable
NAPI again.

Signed-off-by: Artem Savkov <artem.savkov@gmail.com>
Acked-by: Francois Romieu <romieu@fr.zoreil.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 7b23554f80b6..f54509377efa 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -5810,7 +5810,10 @@ static void __rtl8169_resume(struct net_device *dev)
 
 	rtl_pll_power_up(tp);
 
+	rtl_lock_work(tp);
+	napi_enable(&tp->napi);
 	set_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags);
+	rtl_unlock_work(tp);
 
 	rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
 }
-- 
cgit v1.2.3


From ca53e4405347a1e19eaf59c757ceaaaa1a784758 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 3 Apr 2012 12:32:15 +0200
Subject: netfilter: xt_CT: allocation has to be GFP_ATOMIC under rcu_read_lock
 section

Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_CT.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 138b75e41fdd..4babb278e41e 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -261,7 +261,7 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par)
 				goto err4;
 			}
 			timeout_ext = nf_ct_timeout_ext_add(ct, timeout,
-							    GFP_KERNEL);
+							    GFP_ATOMIC);
 			if (timeout_ext == NULL) {
 				ret = -ENOMEM;
 				goto err4;
-- 
cgit v1.2.3


From ee14186f8d2338227888f3c00a06caf31f94de38 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 3 Apr 2012 14:50:07 +0200
Subject: netfilter: xt_CT: fix missing put timeout object in error path

The error path misses putting the timeout object. This patch adds
new function xt_ct_tg_timeout_put() to put the timeout object.

Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_CT.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 4babb278e41e..59530e93fa58 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -150,6 +150,17 @@ err1:
 	return ret;
 }
 
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+static void __xt_ct_tg_timeout_put(struct ctnl_timeout *timeout)
+{
+	typeof(nf_ct_timeout_put_hook) timeout_put;
+
+	timeout_put = rcu_dereference(nf_ct_timeout_put_hook);
+	if (timeout_put)
+		timeout_put(timeout);
+}
+#endif
+
 static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par)
 {
 	struct xt_ct_target_info_v1 *info = par->targinfo;
@@ -158,7 +169,9 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par)
 	struct nf_conn *ct;
 	int ret = 0;
 	u8 proto;
-
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	struct ctnl_timeout *timeout;
+#endif
 	if (info->flags & ~XT_CT_NOTRACK)
 		return -EINVAL;
 
@@ -216,7 +229,6 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par)
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	if (info->timeout) {
 		typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
-		struct ctnl_timeout *timeout;
 		struct nf_conn_timeout *timeout_ext;
 
 		rcu_read_lock();
@@ -245,7 +257,7 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par)
 				pr_info("Timeout policy `%s' can only be "
 					"used by L3 protocol number %d\n",
 					info->timeout, timeout->l3num);
-				goto err4;
+				goto err5;
 			}
 			/* Make sure the timeout policy matches any existing
 			 * protocol tracker, otherwise default to generic.
@@ -258,13 +270,13 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par)
 					"used by L4 protocol number %d\n",
 					info->timeout,
 					timeout->l4proto->l4proto);
-				goto err4;
+				goto err5;
 			}
 			timeout_ext = nf_ct_timeout_ext_add(ct, timeout,
 							    GFP_ATOMIC);
 			if (timeout_ext == NULL) {
 				ret = -ENOMEM;
-				goto err4;
+				goto err5;
 			}
 		} else {
 			ret = -ENOENT;
@@ -282,6 +294,8 @@ out:
 	return 0;
 
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+err5:
+	__xt_ct_tg_timeout_put(timeout);
 err4:
 	rcu_read_unlock();
 #endif
-- 
cgit v1.2.3


From d96fc659aeb27686cef42d305cfd0c9702f8841c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 3 Apr 2012 16:45:54 +0200
Subject: netfilter: nf_conntrack: fix count leak in error path of
 __nf_conntrack_alloc

We have to decrement the conntrack counter if we fail to access the
zone extension.

Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index cbdb754dbb10..3cc4487ac349 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -735,6 +735,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 out_free:
+	atomic_dec(&net->ct.count);
 	kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
 	return ERR_PTR(-ENOMEM);
 #endif
-- 
cgit v1.2.3


From acdd5985364f8dc511a0762fab2e683f29d9d692 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@infradead.org>
Date: Tue, 3 Apr 2012 22:17:53 +0000
Subject: sctp: Allow struct sctp_event_subscribe to grow without breaking
 binaries

getsockopt(..., SCTP_EVENTS, ...) performs a length check and returns
an error if the user provides less bytes than the size of struct
sctp_event_subscribe.

Struct sctp_event_subscribe needs to be extended by an u8 for every
new event or notification type that is added.

This obviously makes getsockopt fail for binaries that are compiled
against an older versions of <net/sctp/user.h> which do not contain
all event types.

This patch changes getsockopt behaviour to no longer return an error
if not enough bytes are being provided by the user. Instead, it
returns as much of sctp_event_subscribe as fits into the provided buffer.

This leads to the new behavior that users see what they have been aware
of at compile time.

The setsockopt(..., SCTP_EVENTS, ...) API is already behaving like this.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 06b42b7f5a02..92ba71dfe080 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4133,9 +4133,10 @@ static int sctp_getsockopt_disable_fragments(struct sock *sk, int len,
 static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval,
 				  int __user *optlen)
 {
-	if (len < sizeof(struct sctp_event_subscribe))
+	if (len <= 0)
 		return -EINVAL;
-	len = sizeof(struct sctp_event_subscribe);
+	if (len > sizeof(struct sctp_event_subscribe))
+		len = sizeof(struct sctp_event_subscribe);
 	if (put_user(len, optlen))
 		return -EFAULT;
 	if (copy_to_user(optval, &sctp_sk(sk)->subscribe, len))
-- 
cgit v1.2.3


From 2af73d4b2afe826d23e83f3747f850eefbd867ff Mon Sep 17 00:00:00 2001
From: Shlomo Pongratz <shlomop@mellanox.com>
Date: Tue, 3 Apr 2012 22:56:19 +0000
Subject: net/bonding: emit address change event also in bond_release

commit 7d26bb103c4 "bonding: emit event when bonding changes MAC" didn't
take care to emit the NETDEV_CHANGEADDR event in bond_release, where bonding
actually changes the mac address (to all zeroes). As a result the neighbours
aren't deleted by the core networking code (which does so upon getting that
event).

Signed-off-by: Shlomo Pongratz <shlomop@mellanox.com>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 941b4e189adf..5cb85cb0be4f 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2034,6 +2034,9 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 	write_unlock_bh(&bond->lock);
 	unblock_netpoll_tx();
 
+	if (bond->slave_cnt == 0)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev);
+
 	bond_compute_features(bond);
 	if (!(bond_dev->features & NETIF_F_VLAN_CHALLENGED) &&
 	    (old_features & NETIF_F_VLAN_CHALLENGED))
-- 
cgit v1.2.3


From 234bcf8a499ee206145c7007d12d9706a254f790 Mon Sep 17 00:00:00 2001
From: Shlomo Pongratz <shlomop@mellanox.com>
Date: Tue, 3 Apr 2012 22:56:20 +0000
Subject: net/bonding: correctly proxy slave neigh param setup ndo function

The current implemenation was buggy for slaves who use ndo_neigh_setup,
since the networking stack invokes the bonding device ndo entry (from
neigh_params_alloc) before any devices are enslaved, and the bonding
driver can't further delegate the call at that point in time. As a
result when bonding IPoIB devices, the neigh_cleanup hasn't been called.

Fix that by deferring the actual call into the slave ndo_neigh_setup
from the time the bonding neigh_setup is called.

Signed-off-by: Shlomo Pongratz <shlomop@mellanox.com>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 51 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 43 insertions(+), 8 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 5cb85cb0be4f..fc8a8d5c4dbd 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3704,17 +3704,52 @@ static void bond_set_multicast_list(struct net_device *bond_dev)
 	read_unlock(&bond->lock);
 }
 
-static int bond_neigh_setup(struct net_device *dev, struct neigh_parms *parms)
+static int bond_neigh_init(struct neighbour *n)
 {
-	struct bonding *bond = netdev_priv(dev);
+	struct bonding *bond = netdev_priv(n->dev);
 	struct slave *slave = bond->first_slave;
+	const struct net_device_ops *slave_ops;
+	struct neigh_parms parms;
+	int ret;
+
+	if (!slave)
+		return 0;
+
+	slave_ops = slave->dev->netdev_ops;
+
+	if (!slave_ops->ndo_neigh_setup)
+		return 0;
+
+	parms.neigh_setup = NULL;
+	parms.neigh_cleanup = NULL;
+	ret = slave_ops->ndo_neigh_setup(slave->dev, &parms);
+	if (ret)
+		return ret;
+
+	/*
+	 * Assign slave's neigh_cleanup to neighbour in case cleanup is called
+	 * after the last slave has been detached.  Assumes that all slaves
+	 * utilize the same neigh_cleanup (true at this writing as only user
+	 * is ipoib).
+	 */
+	n->parms->neigh_cleanup = parms.neigh_cleanup;
+
+	if (!parms.neigh_setup)
+		return 0;
+
+	return parms.neigh_setup(n);
+}
+
+/*
+ * The bonding ndo_neigh_setup is called at init time beofre any
+ * slave exists. So we must declare proxy setup function which will
+ * be used at run time to resolve the actual slave neigh param setup.
+ */
+static int bond_neigh_setup(struct net_device *dev,
+			    struct neigh_parms *parms)
+{
+	parms->neigh_setup   = bond_neigh_init;
 
-	if (slave) {
-		const struct net_device_ops *slave_ops
-			= slave->dev->netdev_ops;
-		if (slave_ops->ndo_neigh_setup)
-			return slave_ops->ndo_neigh_setup(slave->dev, parms);
-	}
 	return 0;
 }
 
-- 
cgit v1.2.3


From 857504d06d565ad5b401d505937932775ddf1c47 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 4 Apr 2012 12:10:27 +0000
Subject: sky2: copy received packets on inefficient unaligned architecture

Modified from original patch from Chris.

The sky2 driver has to have 8 byte alignment of receive buffer
on some chip versions. On architectures which don't support efficient
unaligned access this doesn't work very well. The solution is to
just copy all received packets which is what the driver already
does for small packets.

This allows the driver to be used on the Tilera TILEmpower-Gx, since
the tile architecture doesn't currently handle kernel unaligned accesses,
just userspace.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/sky2.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index b806d9b4defb..c9b504e2dfc3 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -2469,6 +2469,17 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu)
 	return err;
 }
 
+static inline bool needs_copy(const struct rx_ring_info *re,
+			      unsigned length)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	/* Some architectures need the IP header to be aligned */
+	if (!IS_ALIGNED(re->data_addr + ETH_HLEN, sizeof(u32)))
+		return true;
+#endif
+	return length < copybreak;
+}
+
 /* For small just reuse existing skb for next receive */
 static struct sk_buff *receive_copy(struct sky2_port *sky2,
 				    const struct rx_ring_info *re,
@@ -2599,7 +2610,7 @@ static struct sk_buff *sky2_receive(struct net_device *dev,
 		goto error;
 
 okay:
-	if (length < copybreak)
+	if (needs_copy(re, length))
 		skb = receive_copy(sky2, re, length);
 	else
 		skb = receive_new(sky2, re, length);
-- 
cgit v1.2.3


From 27d9129f5ae830cc031a898e0c220e1cdda69b34 Mon Sep 17 00:00:00 2001
From: Yaniv Rosner <yanivr@broadcom.com>
Date: Wed, 4 Apr 2012 01:28:54 +0000
Subject: bnx2x: PFC fix

Fix a problem in which PFC frames are not honored, due to incorrect link
attributes synchronization following PMF migration, and verify PFC XON is not
stuck from previous link change.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 18 +++++++++++++++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h  |  1 +
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index efa557b76ac7..7e2ebbad7587 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -1371,7 +1371,14 @@ static void bnx2x_update_pfc_xmac(struct link_params *params,
 		pfc1_val |= XMAC_PFC_CTRL_HI_REG_PFC_REFRESH_EN |
 			XMAC_PFC_CTRL_HI_REG_PFC_STATS_EN |
 			XMAC_PFC_CTRL_HI_REG_RX_PFC_EN |
-			XMAC_PFC_CTRL_HI_REG_TX_PFC_EN;
+			XMAC_PFC_CTRL_HI_REG_TX_PFC_EN |
+			XMAC_PFC_CTRL_HI_REG_FORCE_PFC_XON;
+		/* Write pause and PFC registers */
+		REG_WR(bp, xmac_base + XMAC_REG_PAUSE_CTRL, pause_val);
+		REG_WR(bp, xmac_base + XMAC_REG_PFC_CTRL, pfc0_val);
+		REG_WR(bp, xmac_base + XMAC_REG_PFC_CTRL_HI, pfc1_val);
+		pfc1_val &= ~XMAC_PFC_CTRL_HI_REG_FORCE_PFC_XON;
+
 	}
 
 	/* Write pause and PFC registers */
@@ -6843,6 +6850,12 @@ int bnx2x_link_update(struct link_params *params, struct link_vars *vars)
 			  SINGLE_MEDIA_DIRECT(params)) &&
 			 (phy_vars[active_external_phy].fault_detected == 0));
 
+	/* Update the PFC configuration in case it was changed */
+	if (params->feature_config_flags & FEATURE_CONFIG_PFC_ENABLED)
+		vars->link_status |= LINK_STATUS_PFC_ENABLED;
+	else
+		vars->link_status &= ~LINK_STATUS_PFC_ENABLED;
+
 	if (vars->link_up)
 		rc = bnx2x_update_link_up(params, vars, link_10g_plus);
 	else
@@ -12049,6 +12062,9 @@ int bnx2x_phy_init(struct link_params *params, struct link_vars *vars)
 
 	bnx2x_emac_init(params, vars);
 
+	if (params->feature_config_flags & FEATURE_CONFIG_PFC_ENABLED)
+		vars->link_status |= LINK_STATUS_PFC_ENABLED;
+
 	if (params->num_phys == 0) {
 		DP(NETIF_MSG_LINK, "No phy found for initialization !!\n");
 		return -EINVAL;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index ab0a250f95fa..ecc7fa6a6ca5 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -5354,6 +5354,7 @@
 #define XMAC_CTRL_REG_TX_EN					 (0x1<<0)
 #define XMAC_PAUSE_CTRL_REG_RX_PAUSE_EN				 (0x1<<18)
 #define XMAC_PAUSE_CTRL_REG_TX_PAUSE_EN				 (0x1<<17)
+#define XMAC_PFC_CTRL_HI_REG_FORCE_PFC_XON			 (0x1<<1)
 #define XMAC_PFC_CTRL_HI_REG_PFC_REFRESH_EN			 (0x1<<0)
 #define XMAC_PFC_CTRL_HI_REG_PFC_STATS_EN			 (0x1<<3)
 #define XMAC_PFC_CTRL_HI_REG_RX_PFC_EN				 (0x1<<4)
-- 
cgit v1.2.3


From ca05f29cf515ac4a8e162c8e0eee886727f5dcc7 Mon Sep 17 00:00:00 2001
From: Yaniv Rosner <yanivr@broadcom.com>
Date: Wed, 4 Apr 2012 01:28:55 +0000
Subject: bnx2x: Fix BCM57810-KR FC

Fix 57810-KR flow-control handling link is achieved via CL37 AN.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 27 ++++++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h  |  4 ++++
 2 files changed, 31 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 7e2ebbad7587..8c00bbc94038 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -3655,6 +3655,33 @@ static void bnx2x_ext_phy_update_adv_fc(struct bnx2x_phy *phy,
 	if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54618SE) {
 		bnx2x_cl22_read(bp, phy, 0x4, &ld_pause);
 		bnx2x_cl22_read(bp, phy, 0x5, &lp_pause);
+	} else if (CHIP_IS_E3(bp) &&
+		SINGLE_MEDIA_DIRECT(params)) {
+		u8 lane = bnx2x_get_warpcore_lane(phy, params);
+		u16 gp_status, gp_mask;
+		bnx2x_cl45_read(bp, phy,
+				MDIO_AN_DEVAD, MDIO_WC_REG_GP2_STATUS_GP_2_4,
+				&gp_status);
+		gp_mask = (MDIO_WC_REG_GP2_STATUS_GP_2_4_CL73_AN_CMPL |
+			   MDIO_WC_REG_GP2_STATUS_GP_2_4_CL37_LP_AN_CAP) <<
+			lane;
+		if ((gp_status & gp_mask) == gp_mask) {
+			bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+					MDIO_AN_REG_ADV_PAUSE, &ld_pause);
+			bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+					MDIO_AN_REG_LP_AUTO_NEG, &lp_pause);
+		} else {
+			bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+					MDIO_AN_REG_CL37_FC_LD, &ld_pause);
+			bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+					MDIO_AN_REG_CL37_FC_LP, &lp_pause);
+			ld_pause = ((ld_pause &
+				     MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH)
+				    << 3);
+			lp_pause = ((lp_pause &
+				     MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH)
+				    << 3);
+		}
 	} else {
 		bnx2x_cl45_read(bp, phy,
 				MDIO_AN_DEVAD,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index ecc7fa6a6ca5..1ea2b956e194 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -6944,6 +6944,10 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_WC_REG_GP2_STATUS_GP_2_2			0x81d2
 #define MDIO_WC_REG_GP2_STATUS_GP_2_3			0x81d3
 #define MDIO_WC_REG_GP2_STATUS_GP_2_4			0x81d4
+#define MDIO_WC_REG_GP2_STATUS_GP_2_4_CL73_AN_CMPL 0x1000
+#define MDIO_WC_REG_GP2_STATUS_GP_2_4_CL37_AN_CMPL 0x0100
+#define MDIO_WC_REG_GP2_STATUS_GP_2_4_CL37_LP_AN_CAP 0x0010
+#define MDIO_WC_REG_GP2_STATUS_GP_2_4_CL37_AN_CAP 0x1
 #define MDIO_WC_REG_UC_INFO_B0_DEAD_TRAP		0x81EE
 #define MDIO_WC_REG_UC_INFO_B1_VERSION			0x81F0
 #define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE		0x81F2
-- 
cgit v1.2.3


From 6a51c0d17b8fb6ae300ba5bc42a020160944e1b2 Mon Sep 17 00:00:00 2001
From: Yaniv Rosner <yanivr@broadcom.com>
Date: Wed, 4 Apr 2012 01:28:56 +0000
Subject: bnx2x: Fix BCM57810-KR AN speed transition

BCM57810-KR link may not come up in 1G after running loopback test, so set
the relevant registers to their default values before starting KR autoneg.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 8c00bbc94038..ce0b0c220e64 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -3732,7 +3732,23 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 	u16 val16 = 0, lane, bam37 = 0;
 	struct bnx2x *bp = params->bp;
 	DP(NETIF_MSG_LINK, "Enable Auto Negotiation for KR\n");
-
+	/* Set to default registers that may be overriden by 10G force */
+	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
+			 MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2, 0x7);
+	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+			 MDIO_WC_REG_PAR_DET_10G_CTRL, 0);
+	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
+			 MDIO_WC_REG_CL72_USERB0_CL72_MISC1_CONTROL, 0);
+	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
+			MDIO_WC_REG_XGXSBLK1_LANECTRL0, 0xff);
+	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
+			MDIO_WC_REG_XGXSBLK1_LANECTRL1, 0x5555);
+	bnx2x_cl45_write(bp, phy, MDIO_PMA_DEVAD,
+			 MDIO_WC_REG_IEEE0BLK_AUTONEGNP, 0x0);
+	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
+			 MDIO_WC_REG_RX66_CONTROL, 0x7415);
+	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
+			 MDIO_WC_REG_SERDESDIGITAL_MISC2, 0x6190);
 	/* Disable Autoneg: re-enable it after adv is done. */
 	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
 			 MDIO_WC_REG_IEEE0BLK_MIICNTL, 0);
@@ -4402,7 +4418,7 @@ static void bnx2x_warpcore_config_init(struct bnx2x_phy *phy,
 		switch (serdes_net_if) {
 		case PORT_HW_CFG_NET_SERDES_IF_KR:
 			/* Enable KR Auto Neg */
-			if (params->loopback_mode == LOOPBACK_NONE)
+			if (params->loopback_mode != LOOPBACK_EXT)
 				bnx2x_warpcore_enable_AN_KR(phy, params, vars);
 			else {
 				DP(NETIF_MSG_LINK, "Setting KR 10G-Force\n");
-- 
cgit v1.2.3


From 25182fc22237f0fb1789c7ac9a79e871a1898ae5 Mon Sep 17 00:00:00 2001
From: Yaniv Rosner <yanivr@broadcom.com>
Date: Wed, 4 Apr 2012 01:28:57 +0000
Subject: bnx2x: Fix BCM578x0-SFI pre-emphasis settings

Fix 578x0-SFI pre-emphasis settings per HW recommendations to achieve better
link strength.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index ce0b0c220e64..1e2fea37eec9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -3994,13 +3994,13 @@ static void bnx2x_warpcore_set_10G_XFI(struct bnx2x_phy *phy,
 
 	} else {
 		misc1_val |= 0x9;
-		tap_val = ((0x12 << MDIO_WC_REG_TX_FIR_TAP_POST_TAP_OFFSET) |
-			   (0x2d << MDIO_WC_REG_TX_FIR_TAP_MAIN_TAP_OFFSET) |
-			   (0x00 << MDIO_WC_REG_TX_FIR_TAP_PRE_TAP_OFFSET));
+		tap_val = ((0x0f << MDIO_WC_REG_TX_FIR_TAP_POST_TAP_OFFSET) |
+			   (0x2b << MDIO_WC_REG_TX_FIR_TAP_MAIN_TAP_OFFSET) |
+			   (0x02 << MDIO_WC_REG_TX_FIR_TAP_PRE_TAP_OFFSET));
 		tx_driver_val =
-		      ((0x02 << MDIO_WC_REG_TX0_TX_DRIVER_POST2_COEFF_OFFSET) |
+		      ((0x03 << MDIO_WC_REG_TX0_TX_DRIVER_POST2_COEFF_OFFSET) |
 		       (0x02 << MDIO_WC_REG_TX0_TX_DRIVER_IDRIVER_OFFSET) |
-		       (0x02 << MDIO_WC_REG_TX0_TX_DRIVER_IPRE_DRIVER_OFFSET));
+		       (0x06 << MDIO_WC_REG_TX0_TX_DRIVER_IPRE_DRIVER_OFFSET));
 	}
 	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
 			 MDIO_WC_REG_SERDESDIGITAL_MISC1, misc1_val);
-- 
cgit v1.2.3


From 9379c9be4b20d5cb7bde577f402b749cd7d3caa2 Mon Sep 17 00:00:00 2001
From: Yaniv Rosner <yanivr@broadcom.com>
Date: Wed, 4 Apr 2012 01:28:58 +0000
Subject: bnx2x: Restore 1G LED on BCM57712+BCM8727 designs.

Fix no-LED problem when link speed is 1G on BCM57712 + BCM8727 designs, by
removing a logic error checking for a different PHY.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 33 +++++++++++++-----------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 1e2fea37eec9..1438da858678 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -6216,12 +6216,14 @@ int bnx2x_set_led(struct link_params *params,
 
 		tmp = EMAC_RD(bp, EMAC_REG_EMAC_LED);
 		if (params->phy[EXT_PHY1].type ==
-			  PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54618SE)
-			EMAC_WR(bp, EMAC_REG_EMAC_LED, tmp & 0xfff1);
-		else {
-			EMAC_WR(bp, EMAC_REG_EMAC_LED,
-				(tmp | EMAC_LED_OVERRIDE));
-		}
+			PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54618SE)
+			tmp &= ~(EMAC_LED_1000MB_OVERRIDE |
+				EMAC_LED_100MB_OVERRIDE |
+				EMAC_LED_10MB_OVERRIDE);
+		else
+			tmp |= EMAC_LED_OVERRIDE;
+
+		EMAC_WR(bp, EMAC_REG_EMAC_LED, tmp);
 		break;
 
 	case LED_MODE_OPER:
@@ -6276,10 +6278,15 @@ int bnx2x_set_led(struct link_params *params,
 				       hw_led_mode);
 		} else if ((params->phy[EXT_PHY1].type ==
 			    PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54618SE) &&
-			   (mode != LED_MODE_OPER)) {
+			   (mode == LED_MODE_ON)) {
 			REG_WR(bp, NIG_REG_LED_MODE_P0 + port*4, 0);
 			tmp = EMAC_RD(bp, EMAC_REG_EMAC_LED);
-			EMAC_WR(bp, EMAC_REG_EMAC_LED, tmp | 0x3);
+			EMAC_WR(bp, EMAC_REG_EMAC_LED, tmp |
+				EMAC_LED_OVERRIDE | EMAC_LED_1000MB_OVERRIDE);
+			/* Break here; otherwise, it'll disable the
+			 * intended override.
+			 */
+			break;
 		} else
 			REG_WR(bp, NIG_REG_LED_MODE_P0 + port*4,
 			       hw_led_mode);
@@ -6294,13 +6301,9 @@ int bnx2x_set_led(struct link_params *params,
 			       LED_BLINK_RATE_VAL_E1X_E2);
 		REG_WR(bp, NIG_REG_LED_CONTROL_BLINK_RATE_ENA_P0 +
 		       port*4, 1);
-		if ((params->phy[EXT_PHY1].type !=
-		     PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54618SE) &&
-		    (mode != LED_MODE_OPER)) {
-			tmp = EMAC_RD(bp, EMAC_REG_EMAC_LED);
-			EMAC_WR(bp, EMAC_REG_EMAC_LED,
-				(tmp & (~EMAC_LED_OVERRIDE)));
-		}
+		tmp = EMAC_RD(bp, EMAC_REG_EMAC_LED);
+		EMAC_WR(bp, EMAC_REG_EMAC_LED,
+			(tmp & (~EMAC_LED_OVERRIDE)));
 
 		if (CHIP_IS_E1(bp) &&
 		    ((speed == SPEED_2500) ||
-- 
cgit v1.2.3


From 59a2e53b826103be8c22d9820355320b749b38ef Mon Sep 17 00:00:00 2001
From: Yaniv Rosner <yanivr@broadcom.com>
Date: Wed, 4 Apr 2012 01:28:59 +0000
Subject: bnx2x: Fix link issue for BCM8727 boards.

This patch fixes a link problem on BCM57712 + BCM8727 designs in which the TX
laser is controller by GPIO, after 1.60.xx drivers were previously loaded.
On these designs the TX_LASER is enabled by logic AND between the PHY
(through MDIO), and the GPIO. When an old driver is used, it disables the
MDIO part, hence the GPIO control had no affect de facto.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 1438da858678..732b4c80f116 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -8089,7 +8089,9 @@ static int bnx2x_verify_sfp_module(struct bnx2x_phy *phy,
 	netdev_err(bp->dev,  "Warning: Unqualified SFP+ module detected,"
 			      " Port %d from %s part number %s\n",
 			 params->port, vendor_name, vendor_pn);
-	phy->flags |= FLAGS_SFP_NOT_APPROVED;
+	if ((val & PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_MASK) !=
+	    PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_WARNING_MSG)
+		phy->flags |= FLAGS_SFP_NOT_APPROVED;
 	return -EINVAL;
 }
 
@@ -9149,6 +9151,12 @@ static int bnx2x_8727_config_init(struct bnx2x_phy *phy,
 		tmp2 &= 0xFFEF;
 		bnx2x_cl45_write(bp, phy,
 			MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_OPT_CFG_REG, tmp2);
+		bnx2x_cl45_read(bp, phy,
+				MDIO_PMA_DEVAD, MDIO_PMA_REG_PHY_IDENTIFIER,
+				&tmp2);
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_PMA_DEVAD, MDIO_PMA_REG_PHY_IDENTIFIER,
+				 (tmp2 & 0x7fff));
 	}
 
 	return 0;
@@ -9329,12 +9337,11 @@ static u8 bnx2x_8727_read_status(struct bnx2x_phy *phy,
 				 MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL,
 				 ((1<<5) | (1<<2)));
 	}
-	DP(NETIF_MSG_LINK, "Enabling 8727 TX laser if SFP is approved\n");
-	bnx2x_8727_specific_func(phy, params, ENABLE_TX);
-	/* If transmitter is disabled, ignore false link up indication */
-	bnx2x_cl45_read(bp, phy,
-			MDIO_PMA_DEVAD, MDIO_PMA_REG_PHY_IDENTIFIER, &val1);
-	if (val1 & (1<<15)) {
+
+	if (!(phy->flags & FLAGS_SFP_NOT_APPROVED)) {
+		DP(NETIF_MSG_LINK, "Enabling 8727 TX laser\n");
+		bnx2x_sfp_set_transmitter(params, phy, 1);
+	} else {
 		DP(NETIF_MSG_LINK, "Tx is disabled\n");
 		return 0;
 	}
-- 
cgit v1.2.3


From 8267bbb01f005fa8d0c8d046ecc24ae0b52e4d70 Mon Sep 17 00:00:00 2001
From: Yaniv Rosner <yanivr@broadcom.com>
Date: Wed, 4 Apr 2012 01:29:00 +0000
Subject: bnx2x: Fix BCM84833 PHY FW version presentation

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 732b4c80f116..b576a3ae8e91 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -9435,8 +9435,7 @@ static void bnx2x_save_848xx_spirom_version(struct bnx2x_phy *phy,
 
 	if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) {
 		bnx2x_cl45_read(bp, phy, MDIO_CTL_DEVAD, 0x400f, &fw_ver1);
-		bnx2x_save_spirom_version(bp, port,
-				((fw_ver1 & 0xf000)>>5) | (fw_ver1 & 0x7f),
+		bnx2x_save_spirom_version(bp, port, fw_ver1 & 0xfff,
 				phy->ver_addr);
 	} else {
 		/* For 32-bit registers in 848xx, access via MDIO2ARM i/f. */
-- 
cgit v1.2.3


From 99bf7f34368aac9b54dfa8801ae490a2326704f9 Mon Sep 17 00:00:00 2001
From: Yaniv Rosner <yanivr@broadcom.com>
Date: Wed, 4 Apr 2012 01:29:01 +0000
Subject: bnx2x: Clear BCM84833 LED after fan failure

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 9 +++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h  | 3 +++
 2 files changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index b576a3ae8e91..ce95ef71f96f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -9858,6 +9858,15 @@ static int bnx2x_84833_hw_reset_phy(struct bnx2x_phy *phy,
 				other_shmem_base_addr));
 
 	u32 shmem_base_path[2];
+
+	/* Work around for 84833 LED failure inside RESET status */
+	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+		MDIO_AN_REG_8481_LEGACY_MII_CTRL,
+		MDIO_AN_REG_8481_MII_CTRL_FORCE_1G);
+	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+		MDIO_AN_REG_8481_1G_100T_EXT_CTRL,
+		MIDO_AN_REG_8481_EXT_CTRL_FORCE_LEDS_OFF);
+
 	shmem_base_path[0] = params->shmem_base;
 	shmem_base_path[1] = other_shmem_base_addr;
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index 1ea2b956e194..c25803b9c0ca 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -6821,10 +6821,13 @@ Theotherbitsarereservedandshouldbezero*/
 
 #define MDIO_AN_REG_8481_10GBASE_T_AN_CTRL	0x0020
 #define MDIO_AN_REG_8481_LEGACY_MII_CTRL	0xffe0
+#define MDIO_AN_REG_8481_MII_CTRL_FORCE_1G	0x40
 #define MDIO_AN_REG_8481_LEGACY_MII_STATUS	0xffe1
 #define MDIO_AN_REG_8481_LEGACY_AN_ADV		0xffe4
 #define MDIO_AN_REG_8481_LEGACY_AN_EXPANSION	0xffe6
 #define MDIO_AN_REG_8481_1000T_CTRL		0xffe9
+#define MDIO_AN_REG_8481_1G_100T_EXT_CTRL	0xfff0
+#define MIDO_AN_REG_8481_EXT_CTRL_FORCE_LEDS_OFF	0x0008
 #define MDIO_AN_REG_8481_EXPANSION_REG_RD_RW	0xfff5
 #define MDIO_AN_REG_8481_EXPANSION_REG_ACCESS	0xfff7
 #define MDIO_AN_REG_8481_AUX_CTRL		0xfff8
-- 
cgit v1.2.3


From f93fb01628c00d1f26e8b45d2f10b8feb650dd4b Mon Sep 17 00:00:00 2001
From: Yaniv Rosner <yanivr@broadcom.com>
Date: Wed, 4 Apr 2012 01:29:02 +0000
Subject: bnx2x: Fix BCM57711+BCM84823 link issue

Fix a link problem on the second port of BCM57711 + BCM84823 boards due to
incorrect macro usage.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index ce95ef71f96f..7e41682bc2dd 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -10177,7 +10177,7 @@ static void bnx2x_848x3_link_reset(struct bnx2x_phy *phy,
 	u8 port;
 	u16 val16;
 
-	if (!(CHIP_IS_E1(bp)))
+	if (!(CHIP_IS_E1x(bp)))
 		port = BP_PATH(bp);
 	else
 		port = params->port;
@@ -10204,7 +10204,7 @@ static void bnx2x_848xx_set_link_led(struct bnx2x_phy *phy,
 	u16 val;
 	u8 port;
 
-	if (!(CHIP_IS_E1(bp)))
+	if (!(CHIP_IS_E1x(bp)))
 		port = BP_PATH(bp);
 	else
 		port = params->port;
-- 
cgit v1.2.3


From ca7b91bbd1f150216e6354cc20818aa993f331f2 Mon Sep 17 00:00:00 2001
From: Yaniv Rosner <yanivr@broadcom.com>
Date: Wed, 4 Apr 2012 01:40:02 +0000
Subject: bnx2x: Clear MDC/MDIO warning message

This patch clears a warning message of "MDC/MDIO access timeout" which may
appear when interface is loaded due to missing clock setting before resetting
the LED, and starting periodic function too early.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c  | 1 -
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 44556b719e81..4b054812713a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1874,7 +1874,6 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 		 * bnx2x_periodic_task().
 		 */
 		smp_mb();
-		queue_delayed_work(bnx2x_wq, &bp->period_task, 0);
 	} else
 		bp->port.pmf = 0;
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 7e41682bc2dd..ad95324dc042 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -12205,10 +12205,10 @@ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars,
 	 * Hold it as vars low
 	 */
 	 /* clear link led */
+	bnx2x_set_mdio_clk(bp, params->chip_id, port);
 	bnx2x_set_led(params, vars, LED_MODE_OFF, 0);
 
 	if (reset_ext_phy) {
-		bnx2x_set_mdio_clk(bp, params->chip_id, port);
 		for (phy_index = EXT_PHY1; phy_index < params->num_phys;
 		      phy_index++) {
 			if (params->phy[phy_index].link_reset) {
-- 
cgit v1.2.3


From 03f2eecdfc8aadae395ac593c099006390c8085c Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Tue, 3 Apr 2012 22:13:01 +0000
Subject: stmmac: re-add IFF_UNICAST_FLT for dwmac1000

In commit (bfab27a stmmac: add the experimental PCI support) the
IFF_UNICAST_FLT flag has been removed from the stmmac_mac_device_setup()
function. This patch re-adds the flag.

Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index e85ffbd54830..48d56da62f08 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1737,10 +1737,12 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 	struct mac_device_info *mac;
 
 	/* Identify the MAC HW device */
-	if (priv->plat->has_gmac)
+	if (priv->plat->has_gmac) {
+		priv->dev->priv_flags |= IFF_UNICAST_FLT;
 		mac = dwmac1000_setup(priv->ioaddr);
-	else
+	} else {
 		mac = dwmac100_setup(priv->ioaddr);
+	}
 	if (!mac)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 117980c4c994b6fe58e873fe803c9bcdcb4337a3 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
Date: Wed, 4 Apr 2012 09:40:40 +0000
Subject: mlx4: allocate just enough pages instead of always 4 pages

The driver uses a 2-order allocation, which is too much on architectures
like ppc64, which has a 64KiB page. This particular allocation is used
for large packet fragments that may have a size of 512, 1024, 4096 or
fill the whole allocation. So, a minimum size of 16384 is good enough
and will be the same size that is used in architectures of 4KiB sized
pages.

This will avoid allocation failures that we see when the system is under
stress, but still has plenty of memory, like the one below.

This will also allow us to set the interface MTU to higher values like
9000, which was not possible on ppc64 without this patch.

Node 1 DMA: 737*64kB 37*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB 0*8192kB 0*16384kB = 51904kB
83137 total pagecache pages
0 pages in swap cache
Swap cache stats: add 0, delete 0, find 0/0
Free swap  = 10420096kB
Total swap = 10420096kB
107776 pages RAM
1184 pages reserved
147343 pages shared
28152 pages non-shared
netstat: page allocation failure. order:2, mode:0x4020
Call Trace:
[c0000001a4fa3770] [c000000000012f04] .show_stack+0x74/0x1c0 (unreliable)
[c0000001a4fa3820] [c00000000016af38] .__alloc_pages_nodemask+0x618/0x930
[c0000001a4fa39a0] [c0000000001a71a0] .alloc_pages_current+0xb0/0x170
[c0000001a4fa3a40] [d00000000dcc3e00] .mlx4_en_alloc_frag+0x200/0x240 [mlx4_en]
[c0000001a4fa3b10] [d00000000dcc3f8c] .mlx4_en_complete_rx_desc+0x14c/0x250 [mlx4_en]
[c0000001a4fa3be0] [d00000000dcc4eec] .mlx4_en_process_rx_cq+0x62c/0x850 [mlx4_en]
[c0000001a4fa3d20] [d00000000dcc5150] .mlx4_en_poll_rx_cq+0x40/0x90 [mlx4_en]
[c0000001a4fa3dc0] [c0000000004e2bb8] .net_rx_action+0x178/0x450
[c0000001a4fa3eb0] [c00000000009c9b8] .__do_softirq+0x118/0x290
[c0000001a4fa3f90] [c000000000031df8] .call_do_softirq+0x14/0x24
[c000000184c3b520] [c00000000000e700] .do_softirq+0xf0/0x110
[c000000184c3b5c0] [c00000000009c6d4] .irq_exit+0xb4/0xc0
[c000000184c3b640] [c00000000000e964] .do_IRQ+0x144/0x230

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
Signed-off-by: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
Tested-by: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 9e2b911a1230..d69fee41f24a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -83,8 +83,9 @@
 
 #define MLX4_EN_WATCHDOG_TIMEOUT	(15 * HZ)
 
-#define MLX4_EN_ALLOC_ORDER	2
-#define MLX4_EN_ALLOC_SIZE	(PAGE_SIZE << MLX4_EN_ALLOC_ORDER)
+/* Use the maximum between 16384 and a single page */
+#define MLX4_EN_ALLOC_SIZE	PAGE_ALIGN(16384)
+#define MLX4_EN_ALLOC_ORDER	get_order(MLX4_EN_ALLOC_SIZE)
 
 #define MLX4_EN_MAX_LRO_DESCRIPTORS	32
 
-- 
cgit v1.2.3


From 78d50217baf36093ab320f95bae0d6452daec85c Mon Sep 17 00:00:00 2001
From: "RongQing.Li" <roy.qing.li@gmail.com>
Date: Wed, 4 Apr 2012 16:47:04 +0000
Subject: ipv6: fix array index in ip6_mc_add_src()

Convert array index from the loop bound to the loop index.

And remove the void type conversion to ip6_mc_del1_src() return
code, seem it is unnecessary, since ip6_mc_del1_src() does not
use __must_check similar attribute, no compiler will report the
warning when it is removed.

v2: enrich the commit header

Signed-off-by: RongQing.Li <roy.qing.li@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 16c33e308121..b2869cab2092 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2044,7 +2044,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 		if (!delta)
 			pmc->mca_sfcount[sfmode]--;
 		for (j=0; j<i; j++)
-			(void) ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]);
+			ip6_mc_del1_src(pmc, sfmode, &psfsrc[j]);
 	} else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) {
 		struct ip6_sf_list *psf;
 
-- 
cgit v1.2.3


From 35f9c09fe9c72eb8ca2b8e89a593e1c151f28fc2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 5 Apr 2012 03:05:35 +0000
Subject: tcp: tcp_sendpages() should call tcp_push() once
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 2f533844242 (tcp: allow splice() to build full TSO packets) added
a regression for splice() calls using SPLICE_F_MORE.

We need to call tcp_flush() at the end of the last page processed in
tcp_sendpages(), or else transmits can be deferred and future sends
stall.

Add a new internal flag, MSG_SENDPAGE_NOTLAST, acting like MSG_MORE, but
with different semantic.

For all sendpage() providers, its a transparent change. Only
sock_sendpage() and tcp_sendpages() can differentiate the two different
flags provided by pipe_to_sendpage()

Reported-by: Tom Herbert <therbert@google.com>
Cc: Nandita Dukkipati <nanditad@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: H.K. Jerry Chu <hkchu@google.com>
Cc: Maciej Żenczykowski <maze@google.com>
Cc: Mahesh Bandewar <maheshb@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail>com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/splice.c            | 5 ++++-
 include/linux/socket.h | 2 +-
 net/ipv4/tcp.c         | 2 +-
 net/socket.c           | 6 +++---
 4 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/fs/splice.c b/fs/splice.c
index 5f883de7ef3a..f8476841eb04 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -30,6 +30,7 @@
 #include <linux/uio.h>
 #include <linux/security.h>
 #include <linux/gfp.h>
+#include <linux/socket.h>
 
 /*
  * Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -690,7 +691,9 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
 	if (!likely(file->f_op && file->f_op->sendpage))
 		return -EINVAL;
 
-	more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
+	more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
+	if (sd->len < sd->total_len)
+		more |= MSG_SENDPAGE_NOTLAST;
 	return file->f_op->sendpage(file, buf->page, buf->offset,
 				    sd->len, &pos, more);
 }
diff --git a/include/linux/socket.h b/include/linux/socket.h
index da2d3e2543f3..b84bbd48b874 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -265,7 +265,7 @@ struct ucred {
 #define MSG_NOSIGNAL	0x4000	/* Do not generate SIGPIPE */
 #define MSG_MORE	0x8000	/* Sender will send more */
 #define MSG_WAITFORONE	0x10000	/* recvmmsg(): block until 1+ packets avail */
-
+#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
 #define MSG_EOF         MSG_FIN
 
 #define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exit for file
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2ff6f45a76f4..5d54ed30e821 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -860,7 +860,7 @@ wait_for_memory:
 	}
 
 out:
-	if (copied && !(flags & MSG_MORE))
+	if (copied && !(flags & MSG_SENDPAGE_NOTLAST))
 		tcp_push(sk, flags, mss_now, tp->nonagle);
 	return copied;
 
diff --git a/net/socket.c b/net/socket.c
index 484cc6953fc6..851edcd6b098 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -811,9 +811,9 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
 
 	sock = file->private_data;
 
-	flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
-	if (more)
-		flags |= MSG_MORE;
+	flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+	/* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
+	flags |= more;
 
 	return kernel_sendpage(sock, page, offset, size, flags);
 }
-- 
cgit v1.2.3


From bcf1b70ac6eb0ed8286c66e6bf37cb747cbaa04c Mon Sep 17 00:00:00 2001
From: Sasha Levin <levinsasha928@gmail.com>
Date: Thu, 5 Apr 2012 12:07:45 +0000
Subject: phonet: Check input from user before allocating
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A phonet packet is limited to USHRT_MAX bytes, this is never checked during
tx which means that the user can specify any size he wishes, and the kernel
will attempt to allocate that size.

In the good case, it'll lead to the following warning, but it may also cause
the kernel to kick in the OOM and kill a random task on the server.

[ 8921.744094] WARNING: at mm/page_alloc.c:2255 __alloc_pages_slowpath+0x65/0x730()
[ 8921.749770] Pid: 5081, comm: trinity Tainted: G        W    3.4.0-rc1-next-20120402-sasha #46
[ 8921.756672] Call Trace:
[ 8921.758185]  [<ffffffff810b2ba7>] warn_slowpath_common+0x87/0xb0
[ 8921.762868]  [<ffffffff810b2be5>] warn_slowpath_null+0x15/0x20
[ 8921.765399]  [<ffffffff8117eae5>] __alloc_pages_slowpath+0x65/0x730
[ 8921.769226]  [<ffffffff81179c8a>] ? zone_watermark_ok+0x1a/0x20
[ 8921.771686]  [<ffffffff8117d045>] ? get_page_from_freelist+0x625/0x660
[ 8921.773919]  [<ffffffff8117f3a8>] __alloc_pages_nodemask+0x1f8/0x240
[ 8921.776248]  [<ffffffff811c03e0>] kmalloc_large_node+0x70/0xc0
[ 8921.778294]  [<ffffffff811c4bd4>] __kmalloc_node_track_caller+0x34/0x1c0
[ 8921.780847]  [<ffffffff821b0e3c>] ? sock_alloc_send_pskb+0xbc/0x260
[ 8921.783179]  [<ffffffff821b3c65>] __alloc_skb+0x75/0x170
[ 8921.784971]  [<ffffffff821b0e3c>] sock_alloc_send_pskb+0xbc/0x260
[ 8921.787111]  [<ffffffff821b002e>] ? release_sock+0x7e/0x90
[ 8921.788973]  [<ffffffff821b0ff0>] sock_alloc_send_skb+0x10/0x20
[ 8921.791052]  [<ffffffff824cfc20>] pep_sendmsg+0x60/0x380
[ 8921.792931]  [<ffffffff824cb4a6>] ? pn_socket_bind+0x156/0x180
[ 8921.794917]  [<ffffffff824cb50f>] ? pn_socket_autobind+0x3f/0x90
[ 8921.797053]  [<ffffffff824cb63f>] pn_socket_sendmsg+0x4f/0x70
[ 8921.798992]  [<ffffffff821ab8e7>] sock_aio_write+0x187/0x1b0
[ 8921.801395]  [<ffffffff810e325e>] ? sub_preempt_count+0xae/0xf0
[ 8921.803501]  [<ffffffff8111842c>] ? __lock_acquire+0x42c/0x4b0
[ 8921.805505]  [<ffffffff821ab760>] ? __sock_recv_ts_and_drops+0x140/0x140
[ 8921.807860]  [<ffffffff811e07cc>] do_sync_readv_writev+0xbc/0x110
[ 8921.809986]  [<ffffffff811958e7>] ? might_fault+0x97/0xa0
[ 8921.811998]  [<ffffffff817bd99e>] ? security_file_permission+0x1e/0x90
[ 8921.814595]  [<ffffffff811e17e2>] do_readv_writev+0xe2/0x1e0
[ 8921.816702]  [<ffffffff810b8dac>] ? do_setitimer+0x1ac/0x200
[ 8921.818819]  [<ffffffff810e2ec1>] ? get_parent_ip+0x11/0x50
[ 8921.820863]  [<ffffffff810e325e>] ? sub_preempt_count+0xae/0xf0
[ 8921.823318]  [<ffffffff811e1926>] vfs_writev+0x46/0x60
[ 8921.825219]  [<ffffffff811e1a3f>] sys_writev+0x4f/0xb0
[ 8921.827127]  [<ffffffff82658039>] system_call_fastpath+0x16/0x1b
[ 8921.829384] ---[ end trace dffe390f30db9eb7 ]---

Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
Acked-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pep.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 9f60008740e3..9726fe684ab8 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1130,6 +1130,9 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
 	int flags = msg->msg_flags;
 	int err, done;
 
+	if (len > USHRT_MAX)
+		return -EMSGSIZE;
+
 	if ((msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|
 				MSG_CMSG_COMPAT)) ||
 			!(msg->msg_flags & MSG_EOR))
-- 
cgit v1.2.3


From 5a4309746cd74734daa964acb02690c22b3c8911 Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Thu, 5 Apr 2012 03:47:43 +0000
Subject: bonding: properly unset current_arp_slave on slave link up

When a slave comes up, we're unsetting the current_arp_slave without
removing active flags from it, which can lead to situations where we have
more than one slave with active flags in active-backup mode.

To avoid this situation we must remove the active flags from a slave before
removing it as a current_arp_slave.

Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index fc8a8d5c4dbd..62d2409bb293 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3010,7 +3010,11 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
 					   trans_start + delta_in_ticks)) ||
 			    bond->curr_active_slave != slave) {
 				slave->link = BOND_LINK_UP;
-				bond->current_arp_slave = NULL;
+				if (bond->current_arp_slave) {
+					bond_set_slave_inactive_flags(
+						bond->current_arp_slave);
+					bond->current_arp_slave = NULL;
+				}
 
 				pr_info("%s: link status definitely up for interface %s.\n",
 					bond->dev->name, slave->dev->name);
-- 
cgit v1.2.3


From 44c14c1d4cf9b6ef4993c4a69f479d1f13cb8b21 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 2 Apr 2012 12:59:47 +0000
Subject: MAINTAINERS: update for Marvell Ethernet drivers

Marvell has agreed to do maintenance on the sky2 driver.
   * Add the developer to the maintainers file
   * Remove the old reference to the long gone (sk98lin) driver
   * Rearrange to fit current topic organization

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 962232d62781..c411dd16e59a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4309,6 +4309,13 @@ W:	http://www.kernel.org/doc/man-pages
 L:	linux-man@vger.kernel.org
 S:	Maintained
 
+MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2)
+M:	Mirko Lindner <mlindner@marvell.com>
+M:	Stephen Hemminger <shemminger@vyatta.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/marvell/sk*
+
 MARVELL LIBERTAS WIRELESS DRIVER
 M:	Dan Williams <dcbw@redhat.com>
 L:	libertas-dev@lists.infradead.org
@@ -4339,12 +4346,6 @@ M:	Nicolas Pitre <nico@fluxnic.net>
 S:	Odd Fixes
 F:	drivers/mmc/host/mvsdio.*
 
-MARVELL YUKON / SYSKONNECT DRIVER
-M:	Mirko Lindner <mlindner@syskonnect.de>
-M:	Ralph Roesler <rroesler@syskonnect.de>
-W:	http://www.syskonnect.com
-S:	Supported
-
 MATROX FRAMEBUFFER DRIVER
 L:	linux-fbdev@vger.kernel.org
 S:	Orphan
@@ -6116,12 +6117,6 @@ W:	http://www.winischhofer.at/linuxsisusbvga.shtml
 S:	Maintained
 F:	drivers/usb/misc/sisusbvga/
 
-SKGE, SKY2 10/100/1000 GIGABIT ETHERNET DRIVERS
-M:	Stephen Hemminger <shemminger@vyatta.com>
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	drivers/net/ethernet/marvell/sk*
-
 SLAB ALLOCATOR
 M:	Christoph Lameter <cl@linux-foundation.org>
 M:	Pekka Enberg <penberg@kernel.org>
-- 
cgit v1.2.3


From b4f79e5cb2182f27d151da6e223186f287a615d6 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 5 Apr 2012 14:38:49 +0000
Subject: ethtool: Remove exception to the requirement of holding RTNL lock

Commit e52ac3398c3d772d372b9b62ab408fd5eec96840 ('net: Use device
model to get driver name in skb_gso_segment()') removed the only
in-tree caller of ethtool ops that doesn't hold the RTNL lock.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index e1d9e0ede309..f5647b59a90e 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -896,8 +896,7 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
- * hold the RTNL, except that for @get_drvinfo the caller may or may
- * not hold the RTNL.
+ * hold the RTNL lock.
  *
  * See the structures used by these operations for further documentation.
  *
-- 
cgit v1.2.3


From 93b6a3adbd159174772702744b142d60e3891dfa Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 5 Apr 2012 14:39:10 +0000
Subject: doc, net: Remove obsolete reference to dev->poll

Commit bea3348eef27e6044b6161fd04c3152215f96411 ('[NET]: Make NAPI
polling independent of struct net_device objects.') removed the
automatic disabling of NAPI polling by dev_close(), and drivers
must now do this themselves.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/netdevices.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
index 89358341682a..336fe8e85b20 100644
--- a/Documentation/networking/netdevices.txt
+++ b/Documentation/networking/netdevices.txt
@@ -54,8 +54,7 @@ dev->open:
 dev->stop:
 	Synchronization: rtnl_lock() semaphore.
 	Context: process
-	Note1: netif_running() is guaranteed false
-	Note2: dev->poll() is guaranteed to be stopped
+	Note: netif_running() is guaranteed false
 
 dev->do_ioctl:
 	Synchronization: rtnl_lock() semaphore.
-- 
cgit v1.2.3


From 04fd3d3515612b71f96b851db7888bfe58ef2142 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 5 Apr 2012 14:39:30 +0000
Subject: doc, net: Update documentation of synchronisation for TX multiqueue

Commits e308a5d806c852f56590ffdd3834d0df0cbed8d7 ('netdev: Add
netdev->addr_list_lock protection.') and
e8a0464cc950972824e2e128028ae3db666ec1ed ('netdev: Allocate multiple
queues for TX.') introduced more fine-grained locks.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/netdevices.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
index 336fe8e85b20..b107733cfdcd 100644
--- a/Documentation/networking/netdevices.txt
+++ b/Documentation/networking/netdevices.txt
@@ -65,7 +65,7 @@ dev->get_stats:
 	Context: nominally process, but don't sleep inside an rwlock
 
 dev->hard_start_xmit:
-	Synchronization: netif_tx_lock spinlock.
+	Synchronization: __netif_tx_lock spinlock.
 
 	When the driver sets NETIF_F_LLTX in dev->features this will be
 	called without holding netif_tx_lock. In this case the driver
@@ -87,12 +87,12 @@ dev->hard_start_xmit:
 	  Only valid when NETIF_F_LLTX is set.
 
 dev->tx_timeout:
-	Synchronization: netif_tx_lock spinlock.
+	Synchronization: netif_tx_lock spinlock; all TX queues frozen.
 	Context: BHs disabled
 	Notes: netif_queue_stopped() is guaranteed true
 
 dev->set_rx_mode:
-	Synchronization: netif_tx_lock spinlock.
+	Synchronization: netif_addr_lock spinlock.
 	Context: BHs disabled
 
 struct napi_struct synchronization rules
-- 
cgit v1.2.3


From b3cf65457fc0c8d183bdb9bc4358e5706aa63cc5 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 5 Apr 2012 14:39:47 +0000
Subject: doc, net: Update netdev operation names

Commits d314774cf2cd5dfeb39a00d37deee65d4c627927 ('netdev: network
device operations infrastructure') and
008298231abbeb91bc7be9e8b078607b816d1a4a ('netdev: add more functions
to netdevice ops') moved and renamed net device operation pointers.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/driver.txt     | 12 ++++++------
 Documentation/networking/netdevices.txt | 16 ++++++++--------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/Documentation/networking/driver.txt b/Documentation/networking/driver.txt
index 03283daa64fe..83ce06080ceb 100644
--- a/Documentation/networking/driver.txt
+++ b/Documentation/networking/driver.txt
@@ -2,7 +2,7 @@ Document about softnet driver issues
 
 Transmit path guidelines:
 
-1) The hard_start_xmit method must never return '1' under any
+1) The ndo_start_xmit method must never return '1' under any
    normal circumstances.  It is considered a hard error unless
    there is no way your device can tell ahead of time when it's
    transmit function will become busy.
@@ -61,10 +61,10 @@ Transmit path guidelines:
 2) Do not forget to update netdev->trans_start to jiffies after
    each new tx packet is given to the hardware.
 
-3) A hard_start_xmit method must not modify the shared parts of a
+3) An ndo_start_xmit method must not modify the shared parts of a
    cloned SKB.
 
-4) Do not forget that once you return 0 from your hard_start_xmit
+4) Do not forget that once you return 0 from your ndo_start_xmit
    method, it is your driver's responsibility to free up the SKB
    and in some finite amount of time.
 
@@ -74,7 +74,7 @@ Transmit path guidelines:
    This error can deadlock sockets waiting for send buffer room
    to be freed up.
 
-   If you return 1 from the hard_start_xmit method, you must not keep
+   If you return 1 from the ndo_start_xmit method, you must not keep
    any reference to that SKB and you must not attempt to free it up.
 
 Probing guidelines:
@@ -85,10 +85,10 @@ Probing guidelines:
 
 Close/stop guidelines:
 
-1) After the dev->stop routine has been called, the hardware must
+1) After the ndo_stop routine has been called, the hardware must
    not receive or transmit any data.  All in flight packets must
    be aborted. If necessary, poll or wait for completion of 
    any reset commands.
 
-2) The dev->stop routine will be called by unregister_netdevice
+2) The ndo_stop routine will be called by unregister_netdevice
    if device is still UP.
diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
index b107733cfdcd..c7ecc7080494 100644
--- a/Documentation/networking/netdevices.txt
+++ b/Documentation/networking/netdevices.txt
@@ -47,24 +47,24 @@ packets is preferred.
 
 struct net_device synchronization rules
 =======================================
-dev->open:
+ndo_open:
 	Synchronization: rtnl_lock() semaphore.
 	Context: process
 
-dev->stop:
+ndo_stop:
 	Synchronization: rtnl_lock() semaphore.
 	Context: process
 	Note: netif_running() is guaranteed false
 
-dev->do_ioctl:
+ndo_do_ioctl:
 	Synchronization: rtnl_lock() semaphore.
 	Context: process
 
-dev->get_stats:
+ndo_get_stats:
 	Synchronization: dev_base_lock rwlock.
 	Context: nominally process, but don't sleep inside an rwlock
 
-dev->hard_start_xmit:
+ndo_start_xmit:
 	Synchronization: __netif_tx_lock spinlock.
 
 	When the driver sets NETIF_F_LLTX in dev->features this will be
@@ -86,12 +86,12 @@ dev->hard_start_xmit:
 	o NETDEV_TX_LOCKED Locking failed, please retry quickly.
 	  Only valid when NETIF_F_LLTX is set.
 
-dev->tx_timeout:
+ndo_tx_timeout:
 	Synchronization: netif_tx_lock spinlock; all TX queues frozen.
 	Context: BHs disabled
 	Notes: netif_queue_stopped() is guaranteed true
 
-dev->set_rx_mode:
+ndo_set_rx_mode:
 	Synchronization: netif_addr_lock spinlock.
 	Context: BHs disabled
 
@@ -99,7 +99,7 @@ struct napi_struct synchronization rules
 ========================================
 napi->poll:
 	Synchronization: NAPI_STATE_SCHED bit in napi->state.  Device
-		driver's dev->close method will invoke napi_disable() on
+		driver's ndo_stop method will invoke napi_disable() on
 		all NAPI instances which will do a sleeping poll on the
 		NAPI_STATE_SCHED napi->state bit, waiting for all pending
 		NAPI activity to cease.
-- 
cgit v1.2.3


From de7aca16fd6c32719b6a7d4480b8f4685f69f7ff Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 5 Apr 2012 14:40:06 +0000
Subject: doc, net: Remove instruction to set net_device::trans_start

Commit 08baf561083bc27a953aa087dd8a664bb2b88e8e ('net:
txq_trans_update() helper') made it unnecessary for most drivers to
set net_device::trans_start (or netdev_queue::trans_start).

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/driver.txt | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/Documentation/networking/driver.txt b/Documentation/networking/driver.txt
index 83ce06080ceb..2128e4169c5b 100644
--- a/Documentation/networking/driver.txt
+++ b/Documentation/networking/driver.txt
@@ -58,13 +58,10 @@ Transmit path guidelines:
             TX_BUFFS_AVAIL(dp) > 0)
 		netif_wake_queue(dp->dev);
 
-2) Do not forget to update netdev->trans_start to jiffies after
-   each new tx packet is given to the hardware.
-
-3) An ndo_start_xmit method must not modify the shared parts of a
+2) An ndo_start_xmit method must not modify the shared parts of a
    cloned SKB.
 
-4) Do not forget that once you return 0 from your ndo_start_xmit
+3) Do not forget that once you return 0 from your ndo_start_xmit
    method, it is your driver's responsibility to free up the SKB
    and in some finite amount of time.
 
-- 
cgit v1.2.3


From e34fac1c2e9ec531c2d63a5e3aa9a6d0ef36a1d3 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 5 Apr 2012 14:40:25 +0000
Subject: doc, net: Update ndo_start_xmit return type and values

Commit dc1f8bf68b311b1537cb65893430b6796118498a ('netdev: change
transmit to limited range type') changed the required return type and
9a1654ba0b50402a6bd03c7b0fe9b0200a5ea7b1 ('net: Optimize
hard_start_xmit() return checking') changed the valid numerical
return values.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/driver.txt | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/Documentation/networking/driver.txt b/Documentation/networking/driver.txt
index 2128e4169c5b..da59e2884130 100644
--- a/Documentation/networking/driver.txt
+++ b/Documentation/networking/driver.txt
@@ -2,16 +2,16 @@ Document about softnet driver issues
 
 Transmit path guidelines:
 
-1) The ndo_start_xmit method must never return '1' under any
-   normal circumstances.  It is considered a hard error unless
+1) The ndo_start_xmit method must not return NETDEV_TX_BUSY under
+   any normal circumstances.  It is considered a hard error unless
    there is no way your device can tell ahead of time when it's
    transmit function will become busy.
 
    Instead it must maintain the queue properly.  For example,
    for a driver implementing scatter-gather this means:
 
-	static int drv_hard_start_xmit(struct sk_buff *skb,
-		   		       struct net_device *dev)
+	static netdev_tx_t drv_hard_start_xmit(struct sk_buff *skb,
+					       struct net_device *dev)
 	{
 		struct drv *dp = netdev_priv(dev);
 
@@ -23,7 +23,7 @@ Transmit path guidelines:
 			unlock_tx(dp);
 			printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
 			       dev->name);
-			return 1;
+			return NETDEV_TX_BUSY;
 		}
 
 		... queue packet to card ...
@@ -35,6 +35,7 @@ Transmit path guidelines:
 		...
 		unlock_tx(dp);
 		...
+		return NETDEV_TX_OK;
 	}
 
    And then at the end of your TX reclamation event handling:
@@ -61,9 +62,9 @@ Transmit path guidelines:
 2) An ndo_start_xmit method must not modify the shared parts of a
    cloned SKB.
 
-3) Do not forget that once you return 0 from your ndo_start_xmit
-   method, it is your driver's responsibility to free up the SKB
-   and in some finite amount of time.
+3) Do not forget that once you return NETDEV_TX_OK from your
+   ndo_start_xmit method, it is your driver's responsibility to free
+   up the SKB and in some finite amount of time.
 
    For example, this means that it is not allowed for your TX
    mitigation scheme to let TX packets "hang out" in the TX
@@ -71,8 +72,9 @@ Transmit path guidelines:
    This error can deadlock sockets waiting for send buffer room
    to be freed up.
 
-   If you return 1 from the ndo_start_xmit method, you must not keep
-   any reference to that SKB and you must not attempt to free it up.
+   If you return NETDEV_TX_BUSY from the ndo_start_xmit method, you
+   must not keep any reference to that SKB and you must not attempt
+   to free it up.
 
 Probing guidelines:
 
-- 
cgit v1.2.3


From 4a7e7c2ad540e54c75489a70137bf0ec15d3a127 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 5 Apr 2012 22:17:46 +0000
Subject: netlink: fix races after skb queueing

As soon as an skb is queued into socket receive_queue, another thread
can consume it, so we are not allowed to reference skb anymore, or risk
use after free.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 32bb75324e76..faa48f70b7c9 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -829,12 +829,19 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 	return 0;
 }
 
-int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
+static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
 {
 	int len = skb->len;
 
 	skb_queue_tail(&sk->sk_receive_queue, skb);
 	sk->sk_data_ready(sk, len);
+	return len;
+}
+
+int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
+{
+	int len = __netlink_sendskb(sk, skb);
+
 	sock_put(sk);
 	return len;
 }
@@ -957,8 +964,7 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
 	    !test_bit(0, &nlk->state)) {
 		skb_set_owner_r(skb, sk);
-		skb_queue_tail(&sk->sk_receive_queue, skb);
-		sk->sk_data_ready(sk, skb->len);
+		__netlink_sendskb(sk, skb);
 		return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
 	}
 	return -1;
@@ -1698,10 +1704,8 @@ static int netlink_dump(struct sock *sk)
 
 		if (sk_filter(sk, skb))
 			kfree_skb(skb);
-		else {
-			skb_queue_tail(&sk->sk_receive_queue, skb);
-			sk->sk_data_ready(sk, skb->len);
-		}
+		else
+			__netlink_sendskb(sk, skb);
 		return 0;
 	}
 
@@ -1715,10 +1719,8 @@ static int netlink_dump(struct sock *sk)
 
 	if (sk_filter(sk, skb))
 		kfree_skb(skb);
-	else {
-		skb_queue_tail(&sk->sk_receive_queue, skb);
-		sk->sk_data_ready(sk, skb->len);
-	}
+	else
+		__netlink_sendskb(sk, skb);
 
 	if (cb->done)
 		cb->done(cb);
-- 
cgit v1.2.3


From 110c43304db6f06490961529536c362d9ac5732f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 6 Apr 2012 10:49:10 +0200
Subject: net: fix a race in sock_queue_err_skb()

As soon as an skb is queued into socket error queue, another thread
can consume it, so we are not allowed to reference skb anymore, or risk
use after free.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f223cdc75da6..baf8d281152c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3161,6 +3161,8 @@ static void sock_rmem_free(struct sk_buff *skb)
  */
 int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
 {
+	int len = skb->len;
+
 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
 	    (unsigned)sk->sk_rcvbuf)
 		return -ENOMEM;
@@ -3175,7 +3177,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
 
 	skb_queue_tail(&sk->sk_error_queue, skb);
 	if (!sock_flag(sk, SOCK_DEAD))
-		sk->sk_data_ready(sk, skb->len);
+		sk->sk_data_ready(sk, len);
 	return 0;
 }
 EXPORT_SYMBOL(sock_queue_err_skb);
-- 
cgit v1.2.3