From aabdcb0b553b9c9547b1a506b34d55a764745870 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Fri, 23 Sep 2011 08:23:47 +0000
Subject: can bcm: fix tx_setup off-by-one errors

This patch fixes two off-by-one errors that canceled each other out.
Checking for the same condition two times in bcm_tx_timeout_tsklet() reduced
the count of frames to be sent by one. This did not show up the first time
tx_setup is invoked as an additional frame is sent due to TX_ANNONCE.
Invoking a second tx_setup on the same item led to a reduced (by 1) number of
sent frames.

Reported-by: Andre Naujoks <nautsch@gmail.com>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/bcm.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/can/bcm.c b/net/can/bcm.c
index d6c8ae5b2e6a..c9cdb8d78e70 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -365,9 +365,6 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
 
 			bcm_send_to_user(op, &msg_head, NULL, 0);
 		}
-	}
-
-	if (op->kt_ival1.tv64 && (op->count > 0)) {
 
 		/* send (next) frame */
 		bcm_can_tx(op);
@@ -970,8 +967,9 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		/* spec: send can_frame when starting timer */
 		op->flags |= TX_ANNOUNCE;
 
-		if (op->kt_ival1.tv64 && (op->count > 0)) {
-			/* op->count-- is done in bcm_tx_timeout_handler */
+		/* only start timer when having more frames than sent below */
+		if (op->kt_ival1.tv64 && (op->count > 1)) {
+			/* op->count-- is done in bcm_tx_timeout_tsklet */
 			hrtimer_start(&op->timer, op->kt_ival1,
 				      HRTIMER_MODE_REL);
 		} else
@@ -979,8 +977,11 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 				      HRTIMER_MODE_REL);
 	}
 
-	if (op->flags & TX_ANNOUNCE)
+	if (op->flags & TX_ANNOUNCE) {
 		bcm_can_tx(op);
+		if (op->kt_ival1.tv64 && (op->count > 0))
+			op->count--;
+	}
 
 	return msg_head->nframes * CFSIZ + MHSIZ;
 }
-- 
cgit v1.2.3


From 676a1184e8afd4fed7948232df1ff91517400859 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Sun, 25 Sep 2011 02:21:30 +0000
Subject: ipv6: nullify ipv6_ac_list and ipv6_fl_list when creating new socket

ipv6_ac_list and ipv6_fl_list from listening socket are inadvertently
shared with new socket created for connection.

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3c9fa618b69d..79cc6469508d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1383,6 +1383,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
 #endif
 
+		newnp->ipv6_ac_list = NULL;
+		newnp->ipv6_fl_list = NULL;
 		newnp->pktoptions  = NULL;
 		newnp->opt	   = NULL;
 		newnp->mcast_oif   = inet6_iif(skb);
@@ -1447,6 +1449,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	   First: no IPv4 options.
 	 */
 	newinet->inet_opt = NULL;
+	newnp->ipv6_ac_list = NULL;
 	newnp->ipv6_fl_list = NULL;
 
 	/* Clone RX bits */
-- 
cgit v1.2.3


From 85a64889492b45f931ddac87ec09d84aa7347ee1 Mon Sep 17 00:00:00 2001
From: Jonathan Lallinger <jonathan@ogc.us>
Date: Thu, 29 Sep 2011 07:58:41 +0000
Subject: RDSRDMA: Fix cleanup of rds_iw_mr_pool

In the rds_iw_mr_pool struct the free_pinned field keeps track of
memory pinned by free MRs. While this field is incremented properly
upon allocation, it is never decremented upon unmapping. This would
cause the rds_rdma module to crash the kernel upon unloading, by
triggering the BUG_ON in the rds_iw_destroy_mr_pool function.

This change keeps track of the MRs that become unpinned, so that
free_pinned can be decremented appropriately.

Signed-off-by: Jonathan Lallinger <jonathan@ogc.us>
Signed-off-by: Steve Wise <swise@ogc.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/iw_rdma.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 8b77edbab272..4e1de171866c 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -84,7 +84,8 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
 static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
 static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
 			struct list_head *unmap_list,
-			struct list_head *kill_list);
+			struct list_head *kill_list,
+			int *unpinned);
 static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
 
 static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id)
@@ -499,7 +500,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
 	LIST_HEAD(unmap_list);
 	LIST_HEAD(kill_list);
 	unsigned long flags;
-	unsigned int nfreed = 0, ncleaned = 0, free_goal;
+	unsigned int nfreed = 0, ncleaned = 0, unpinned = 0, free_goal;
 	int ret = 0;
 
 	rds_iw_stats_inc(s_iw_rdma_mr_pool_flush);
@@ -524,7 +525,8 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
 	 * will be destroyed by the unmap function.
 	 */
 	if (!list_empty(&unmap_list)) {
-		ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, &kill_list);
+		ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list,
+						     &kill_list, &unpinned);
 		/* If we've been asked to destroy all MRs, move those
 		 * that were simply cleaned to the kill list */
 		if (free_all)
@@ -548,6 +550,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
 		spin_unlock_irqrestore(&pool->list_lock, flags);
 	}
 
+	atomic_sub(unpinned, &pool->free_pinned);
 	atomic_sub(ncleaned, &pool->dirty_count);
 	atomic_sub(nfreed, &pool->item_count);
 
@@ -828,7 +831,8 @@ static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool,
 
 static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
 				struct list_head *unmap_list,
-				struct list_head *kill_list)
+				struct list_head *kill_list,
+				int *unpinned)
 {
 	struct rds_iw_mapping *mapping, *next;
 	unsigned int ncleaned = 0;
@@ -855,6 +859,7 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
 
 		spin_lock_irqsave(&pool->list_lock, flags);
 		list_for_each_entry_safe(mapping, next, unmap_list, m_list) {
+			*unpinned += mapping->m_sg.len;
 			list_move(&mapping->m_list, &laundered);
 			ncleaned++;
 		}
-- 
cgit v1.2.3


From 12d0d0d3a7349daa95dbfd5d7df8146255bc7c67 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Thu, 29 Sep 2011 15:33:47 -0400
Subject: can bcm: fix incomplete tx_setup fix

The commit aabdcb0b553b9c9547b1a506b34d55a764745870 ("can bcm: fix tx_setup
off-by-one errors") fixed only a part of the original problem reported by
Andre Naujoks. It turned out that the original code needed to be re-ordered
to reduce complexity and to finally fix the reported frame counting issues.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/bcm.c | 48 +++++++++++++++++++++---------------------------
 1 file changed, 21 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/can/bcm.c b/net/can/bcm.c
index c9cdb8d78e70..c84963d2dee6 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -344,6 +344,18 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
 	}
 }
 
+static void bcm_tx_start_timer(struct bcm_op *op)
+{
+	if (op->kt_ival1.tv64 && op->count)
+		hrtimer_start(&op->timer,
+			      ktime_add(ktime_get(), op->kt_ival1),
+			      HRTIMER_MODE_ABS);
+	else if (op->kt_ival2.tv64)
+		hrtimer_start(&op->timer,
+			      ktime_add(ktime_get(), op->kt_ival2),
+			      HRTIMER_MODE_ABS);
+}
+
 static void bcm_tx_timeout_tsklet(unsigned long data)
 {
 	struct bcm_op *op = (struct bcm_op *)data;
@@ -365,23 +377,12 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
 
 			bcm_send_to_user(op, &msg_head, NULL, 0);
 		}
-
-		/* send (next) frame */
 		bcm_can_tx(op);
-		hrtimer_start(&op->timer,
-			      ktime_add(ktime_get(), op->kt_ival1),
-			      HRTIMER_MODE_ABS);
 
-	} else {
-		if (op->kt_ival2.tv64) {
+	} else if (op->kt_ival2.tv64)
+		bcm_can_tx(op);
 
-			/* send (next) frame */
-			bcm_can_tx(op);
-			hrtimer_start(&op->timer,
-				      ktime_add(ktime_get(), op->kt_ival2),
-				      HRTIMER_MODE_ABS);
-		}
-	}
+	bcm_tx_start_timer(op);
 }
 
 /*
@@ -961,28 +962,21 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 			hrtimer_cancel(&op->timer);
 	}
 
-	if ((op->flags & STARTTIMER) &&
-	    ((op->kt_ival1.tv64 && op->count) || op->kt_ival2.tv64)) {
-
+	if (op->flags & STARTTIMER) {
+		hrtimer_cancel(&op->timer);
 		/* spec: send can_frame when starting timer */
 		op->flags |= TX_ANNOUNCE;
-
-		/* only start timer when having more frames than sent below */
-		if (op->kt_ival1.tv64 && (op->count > 1)) {
-			/* op->count-- is done in bcm_tx_timeout_tsklet */
-			hrtimer_start(&op->timer, op->kt_ival1,
-				      HRTIMER_MODE_REL);
-		} else
-			hrtimer_start(&op->timer, op->kt_ival2,
-				      HRTIMER_MODE_REL);
 	}
 
 	if (op->flags & TX_ANNOUNCE) {
 		bcm_can_tx(op);
-		if (op->kt_ival1.tv64 && (op->count > 0))
+		if (op->count)
 			op->count--;
 	}
 
+	if (op->flags & STARTTIMER)
+		bcm_tx_start_timer(op);
+
 	return msg_head->nframes * CFSIZ + MHSIZ;
 }
 
-- 
cgit v1.2.3


From 7091fbd82cd5686444ffe9935ed6a8190101fe9d Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 30 Sep 2011 10:38:28 +0000
Subject: make PACKET_STATISTICS getsockopt report consistently between ring
 and non-ring

This is a minor change.

Up until kernel 2.6.32, getsockopt(fd, SOL_PACKET, PACKET_STATISTICS,
...) would return total and dropped packets since its last invocation. The
introduction of socket queue overflow reporting [1] changed drop
rate calculation in the normal packet socket path, but not when using a
packet ring. As a result, the getsockopt now returns different statistics
depending on the reception method used. With a ring, it still returns the
count since the last call, as counts are incremented in tpacket_rcv and
reset in getsockopt. Without a ring, it returns 0 if no drops occurred
since the last getsockopt and the total drops over the lifespan of
the socket otherwise. The culprit is this line in packet_rcv, executed
on a drop:

drop_n_acct:
        po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);

As it shows, the new drop number it taken from the socket drop counter,
which is not reset at getsockopt. I put together a small example
that demonstrates the issue [2]. It runs for 10 seconds and overflows
the queue/ring on every odd second. The reported drop rates are:
ring: 16, 0, 16, 0, 16, ...
non-ring: 0, 15, 0, 30, 0, 46, 0, 60, 0 , 74.

Note how the even ring counts monotonically increase. Because the
getsockopt adds tp_drops to tp_packets, total counts are similarly
reported cumulatively. Long story short, reinstating the original code, as
the below patch does, fixes the issue at the cost of additional per-packet
cycles. Another solution that does not introduce per-packet overhead
is be to keep the current data path, record the value of sk_drops at
getsockopt() at call N in a new field in struct packetsock and subtract
that when reporting at call N+1. I'll be happy to code that, instead,
it's just more messy.

[1] http://patchwork.ozlabs.org/patch/35665/
[2] http://kernel.googlecode.com/files/test-packetsock-getstatistics.c

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c698cec0a445..fabb4fafa281 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -961,7 +961,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 	return 0;
 
 drop_n_acct:
-	po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
+	spin_lock(&sk->sk_receive_queue.lock);
+	po->stats.tp_drops++;
+	atomic_inc(&sk->sk_drops);
+	spin_unlock(&sk->sk_receive_queue.lock);
 
 drop_n_restore:
 	if (skb_head != skb->data && skb_shared(skb)) {
-- 
cgit v1.2.3