From 7143dfac692cd25d48a24dbe8323bc17af95b4ec Mon Sep 17 00:00:00 2001
From: Li RongQing <roy.qing.li@gmail.com>
Date: Fri, 28 Dec 2012 16:07:16 +0800
Subject: ah4/esp4: set transport header correctly for IPsec tunnel mode.

IPsec tunnel does not set ECN field to CE in inner header when
the ECN field in the outer header is CE, and the ECN field in
the inner header is ECT(0) or ECT(1).

The cause is ipip_hdr() does not return the correct address of
inner header since skb->transport-header is not the inner header
after esp_input_done2(), or ah_input().

Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/ah4.c  | 11 +++++++++--
 net/ipv4/esp4.c |  5 ++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index a0d8392491c3..a154d0a08c79 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -269,7 +269,11 @@ static void ah_input_done(struct crypto_async_request *base, int err)
 	skb->network_header += ah_hlen;
 	memcpy(skb_network_header(skb), work_iph, ihl);
 	__skb_pull(skb, ah_hlen + ihl);
-	skb_set_transport_header(skb, -ihl);
+
+	if (x->props.mode == XFRM_MODE_TUNNEL)
+		skb_reset_transport_header(skb);
+	else
+		skb_set_transport_header(skb, -ihl);
 out:
 	kfree(AH_SKB_CB(skb)->tmp);
 	xfrm_input_resume(skb, err);
@@ -381,7 +385,10 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb->network_header += ah_hlen;
 	memcpy(skb_network_header(skb), work_iph, ihl);
 	__skb_pull(skb, ah_hlen + ihl);
-	skb_set_transport_header(skb, -ihl);
+	if (x->props.mode == XFRM_MODE_TUNNEL)
+		skb_reset_transport_header(skb);
+	else
+		skb_set_transport_header(skb, -ihl);
 
 	err = nexthdr;
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b61e9deb7c7e..fd26ff4f3eac 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -346,7 +346,10 @@ static int esp_input_done2(struct sk_buff *skb, int err)
 
 	pskb_trim(skb, skb->len - alen - padlen - 2);
 	__skb_pull(skb, hlen);
-	skb_set_transport_header(skb, -ihl);
+	if (x->props.mode == XFRM_MODE_TUNNEL)
+		skb_reset_transport_header(skb);
+	else
+		skb_set_transport_header(skb, -ihl);
 
 	err = nexthdr[1];
 
-- 
cgit v1.2.3


From a9403f8aeb3e7dba6988d6cbe436e6521894e427 Mon Sep 17 00:00:00 2001
From: Li RongQing <roy.qing.li@gmail.com>
Date: Tue, 8 Jan 2013 15:41:12 +0800
Subject: ah6/esp6: set transport header correctly for IPsec tunnel mode.

IPsec tunnel does not set ECN field to CE in inner header when
the ECN field in the outer header is CE, and the ECN field in
the inner header is ECT(0) or ECT(1).

The cause is ipip6_hdr() does not return the correct address of
inner header since skb->transport-header is not the inner header
after esp6_input_done2(), or ah6_input().

Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/ah6.c  | 11 +++++++++--
 net/ipv6/esp6.c |  5 ++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index ecc35b93314b..384233188ac1 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -472,7 +472,10 @@ static void ah6_input_done(struct crypto_async_request *base, int err)
 	skb->network_header += ah_hlen;
 	memcpy(skb_network_header(skb), work_iph, hdr_len);
 	__skb_pull(skb, ah_hlen + hdr_len);
-	skb_set_transport_header(skb, -hdr_len);
+	if (x->props.mode == XFRM_MODE_TUNNEL)
+		skb_reset_transport_header(skb);
+	else
+		skb_set_transport_header(skb, -hdr_len);
 out:
 	kfree(AH_SKB_CB(skb)->tmp);
 	xfrm_input_resume(skb, err);
@@ -593,9 +596,13 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb->network_header += ah_hlen;
 	memcpy(skb_network_header(skb), work_iph, hdr_len);
-	skb->transport_header = skb->network_header;
 	__skb_pull(skb, ah_hlen + hdr_len);
 
+	if (x->props.mode == XFRM_MODE_TUNNEL)
+		skb_reset_transport_header(skb);
+	else
+		skb_set_transport_header(skb, -hdr_len);
+
 	err = nexthdr;
 
 out_free:
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 282f3723ee19..40ffd72243a4 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -300,7 +300,10 @@ static int esp_input_done2(struct sk_buff *skb, int err)
 
 	pskb_trim(skb, skb->len - alen - padlen - 2);
 	__skb_pull(skb, hlen);
-	skb_set_transport_header(skb, -hdr_len);
+	if (x->props.mode == XFRM_MODE_TUNNEL)
+		skb_reset_transport_header(skb);
+	else
+		skb_set_transport_header(skb, -hdr_len);
 
 	err = nexthdr[1];
 
-- 
cgit v1.2.3


From dbccd791a3fbbdac12c33834b73beff3984988e9 Mon Sep 17 00:00:00 2001
From: Szymon Janc <szymon.janc@tieto.com>
Date: Tue, 11 Dec 2012 08:51:19 +0100
Subject: Bluetooth: Fix sending HCI commands after reset

After sending reset command wait for its command complete event before
sending next command. Some chips sends CC event for command received
before reset if reset was send before chip replied with CC.

This is also required by specification that host shall not send
additional HCI commands before receiving CC for reset.

< HCI Command: Reset (0x03|0x0003) plen 0                              [hci0] 18.404612
> HCI Event: Command Complete (0x0e) plen 4                            [hci0] 18.405850
      Write Extended Inquiry Response (0x03|0x0052) ncmd 1
        Status: Success (0x00)
< HCI Command: Read Local Supported Features (0x04|0x0003) plen 0      [hci0] 18.406079
> HCI Event: Command Complete (0x0e) plen 4                            [hci0] 18.407864
      Reset (0x03|0x0003) ncmd 1
        Status: Success (0x00)
< HCI Command: Read Local Supported Features (0x04|0x0003) plen 0      [hci0] 18.408062
> HCI Event: Command Complete (0x0e) plen 12                           [hci0] 18.408835

Signed-off-by: Szymon Janc <szymon.janc@tieto.com>
Cc: stable@vger.kernel.org
Acked-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/hci_event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 705078a0cc39..81b44481d0d9 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2688,7 +2688,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	if (ev->opcode != HCI_OP_NOP)
 		del_timer(&hdev->cmd_timer);
 
-	if (ev->ncmd) {
+	if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
 		atomic_set(&hdev->cmd_cnt, 1);
 		if (!skb_queue_empty(&hdev->cmd_q))
 			queue_work(hdev->workqueue, &hdev->cmd_work);
-- 
cgit v1.2.3


From 0a9ab9bdb3e891762553f667066190c1d22ad62b Mon Sep 17 00:00:00 2001
From: Anderson Lizardo <anderson.lizardo@openbossa.org>
Date: Sun, 6 Jan 2013 18:28:53 -0400
Subject: Bluetooth: Fix incorrect strncpy() in hidp_setup_hid()

The length parameter should be sizeof(req->name) - 1 because there is no
guarantee that string provided by userspace will contain the trailing
'\0'.

Can be easily reproduced by manually setting req->name to 128 non-zero
bytes prior to ioctl(HIDPCONNADD) and checking the device name setup on
input subsystem:

$ cat /sys/devices/pnp0/00\:04/tty/ttyS0/hci0/hci0\:1/input8/name
AAAAAA[...]AAAAAAAAf0:af:f0:af:f0:af

("f0:af:f0:af:f0:af" is the device bluetooth address, taken from "phys"
field in struct hid_device due to overflow.)

Cc: stable@vger.kernel.org
Signed-off-by: Anderson Lizardo <anderson.lizardo@openbossa.org>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/hidp/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index b2bcbe2dc328..a7352ff3fd1e 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -931,7 +931,7 @@ static int hidp_setup_hid(struct hidp_session *session,
 	hid->version = req->version;
 	hid->country = req->country;
 
-	strncpy(hid->name, req->name, 128);
+	strncpy(hid->name, req->name, sizeof(req->name) - 1);
 
 	snprintf(hid->phys, sizeof(hid->phys), "%pMR",
 		 &bt_sk(session->ctrl_sock->sk)->src);
-- 
cgit v1.2.3


From 7b064edae38d62d8587a8c574f93b53ce75ae749 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k@samsung.com>
Date: Thu, 10 Jan 2013 10:28:35 +0530
Subject: Bluetooth: Fix authentication if acl data comes before remote feature
 evt

If remote device sends l2cap info request before read_remote_ext_feature
completes then mgmt_connected will be sent in hci_acldata_packet() and
remote name request wont be sent and eventually authentication wont happen

Hcidump log of the issue

< HCI Command: Create Connection (0x01|0x0005) plen 13
    bdaddr BC:85:1F:74:7F:29 ptype 0xcc18 rswitch 0x01 clkoffset 0x4bf7 (valid)
    Packet type: DM1 DM3 DM5 DH1 DH3 DH5
> HCI Event: Command Status (0x0f) plen 4
    Create Connection (0x01|0x0005) status 0x00 ncmd 1
> HCI Event: Connect Complete (0x03) plen 11
    status 0x00 handle 12 bdaddr BC:85:1F:74:7F:29 type ACL encrypt 0x00
< HCI Command: Read Remote Supported Features (0x01|0x001b) plen 2
    handle 12
> HCI Event: Command Status (0x0f) plen 4
    Read Remote Supported Features (0x01|0x001b) status 0x00 ncmd 1
> HCI Event: Read Remote Supported Features (0x0b) plen 11
    status 0x00 handle 12
    Features: 0xbf 0xfe 0xcf 0xfe 0xdb 0xff 0x7b 0x87
> HCI Event: Max Slots Change (0x1b) plen 3
    handle 12 slots 5
< HCI Command: Read Remote Extended Features (0x01|0x001c) plen 3
    handle 12 page 1
> HCI Event: Command Status (0x0f) plen 4
    Read Remote Extended Features (0x01|0x001c) status 0x00 ncmd 1
> ACL data: handle 12 flags 0x02 dlen 10
    L2CAP(s): Info req: type 2
< ACL data: handle 12 flags 0x00 dlen 16
    L2CAP(s): Info rsp: type 2 result 0
      Extended feature mask 0x00b8
        Enhanced Retransmission mode
        Streaming mode
        FCS Option
        Fixed Channels
> HCI Event: Read Remote Extended Features (0x23) plen 13
    status 0x00 handle 12 page 1 max 1
    Features: 0x01 0x00 0x00 0x00 0x00 0x00 0x00 0x00
> ACL data: handle 12 flags 0x02 dlen 10
    L2CAP(s): Info req: type 3
< ACL data: handle 12 flags 0x00 dlen 20
    L2CAP(s): Info rsp: type 3 result 0
      Fixed channel list 0x00000002
        L2CAP Signalling Channel
> HCI Event: Number of Completed Packets (0x13) plen 5
    handle 12 packets 2

This patch moves sending mgmt_connected from hci_acldata_packet() to
l2cap_connect_req() since this code is to handle the scenario remote
device sends l2cap connect req too fast

Signed-off-by: Jaganath Kanakkassery <jaganath.k@samsung.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/hci_core.c   |  8 --------
 net/bluetooth/l2cap_core.c | 11 +++++++++++
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 596660d37c5e..0f78e34220c9 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2810,14 +2810,6 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
 	if (conn) {
 		hci_conn_enter_active_mode(conn, BT_POWER_FORCE_ACTIVE_OFF);
 
-		hci_dev_lock(hdev);
-		if (test_bit(HCI_MGMT, &hdev->dev_flags) &&
-		    !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
-			mgmt_device_connected(hdev, &conn->dst, conn->type,
-					      conn->dst_type, 0, NULL, 0,
-					      conn->dev_class);
-		hci_dev_unlock(hdev);
-
 		/* Send to upper protocol */
 		l2cap_recv_acldata(conn, skb, flags);
 		return;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 2c78208d793e..22e658322845 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -3727,6 +3727,17 @@ sendresp:
 static int l2cap_connect_req(struct l2cap_conn *conn,
 			     struct l2cap_cmd_hdr *cmd, u8 *data)
 {
+	struct hci_dev *hdev = conn->hcon->hdev;
+	struct hci_conn *hcon = conn->hcon;
+
+	hci_dev_lock(hdev);
+	if (test_bit(HCI_MGMT, &hdev->dev_flags) &&
+	    !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags))
+		mgmt_device_connected(hdev, &hcon->dst, hcon->type,
+				      hcon->dst_type, 0, NULL, 0,
+				      hcon->dev_class);
+	hci_dev_unlock(hdev);
+
 	l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP, 0);
 	return 0;
 }
-- 
cgit v1.2.3


From b7e98b5100aad9290d7f06fcb9d1e80f7f62f05f Mon Sep 17 00:00:00 2001
From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Date: Thu, 3 Jan 2013 19:59:28 -0200
Subject: Bluetooth: Check if the hci connection exists in SCO shutdown

Checking only for sco_conn seems to not be enough and lead to NULL
dereferences in the code, check for hcon instead.

<1>[11340.226404] BUG: unable to handle kernel NULL pointer dereference at
0000000
8
<4>[11340.226619] EIP is at __sco_sock_close+0xe8/0x1a0
<4>[11340.226629] EAX: f063a740 EBX: 00000000 ECX: f58f4544 EDX: 00000000
<4>[11340.226640] ESI: dec83e00 EDI: 5f9a081f EBP: e0fdff38 ESP: e0fdff1c
<0>[11340.226674] Stack:
<4>[11340.226682]  c184db87 c1251028 dec83e00 e0fdff38 c1754aef dec83e00
00000000
e0fdff5c
<4>[11340.226718]  c184f587 e0fdff64 e0fdff68 5f9a081f e0fdff5c c1751852
d7813800
62262f10
<4>[11340.226752]  e0fdff70 c1753c00 00000000 00000001 0000000d e0fdffac
c175425c
00000041
<0>[11340.226793] Call Trace:
<4>[11340.226813]  [<c184db87>] ? sco_sock_clear_timer+0x27/0x60
<4>[11340.226831]  [<c1251028>] ? local_bh_enable+0x68/0xd0
<4>[11340.226846]  [<c1754aef>] ? lock_sock_nested+0x4f/0x60
<4>[11340.226862]  [<c184f587>] sco_sock_shutdown+0x67/0xb0
<4>[11340.226879]  [<c1751852>] ? sockfd_lookup_light+0x22/0x80
<4>[11340.226897]  [<c1753c00>] sys_shutdown+0x30/0x60
<4>[11340.226912]  [<c175425c>] sys_socketcall+0x1dc/0x2a0
<4>[11340.226929]  [<c149ba78>] ? trace_hardirqs_on_thunk+0xc/0x10
<4>[11340.226944]  [<c18860f1>] syscall_call+0x7/0xb
<4>[11340.226960]  [<c1880000>] ? restore_cur+0x5e/0xd7
<0>[11340.226969] Code: <f0> ff 4b 08 0f 94 c0 84 c0 74 20 80 7b 19 01 74
2f b8 0a 00 00

Reported-by: Chuansheng Liu <chuansheng.liu@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/sco.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 531a93d613d4..57f250c20e39 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -352,7 +352,7 @@ static void __sco_sock_close(struct sock *sk)
 
 	case BT_CONNECTED:
 	case BT_CONFIG:
-		if (sco_pi(sk)->conn) {
+		if (sco_pi(sk)->conn->hcon) {
 			sk->sk_state = BT_DISCONN;
 			sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
 			hci_conn_put(sco_pi(sk)->conn->hcon);
-- 
cgit v1.2.3


From 4610476d89d53714ca94aae081fa035908bc137a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 10 Jan 2013 12:42:15 +0100
Subject: netfilter: xt_CT: fix unset return value if conntrack zone are
 disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/netfilter/xt_CT.c: In function ‘xt_ct_tg_check_v1’:
net/netfilter/xt_CT.c:250:6: warning: ‘ret’ may be used uninitialized in this function [-Wmaybe-uninitialized]
net/netfilter/xt_CT.c: In function ‘xt_ct_tg_check_v0’:
net/netfilter/xt_CT.c:112:6: warning: ‘ret’ may be used uninitialized in this function [-Wmaybe-uninitialized]

Reported-by: Borislav Petkov <bp@alien8.de>
Acked-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_CT.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 2a0843081840..bde009ed8d3b 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -109,7 +109,7 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par)
 	struct xt_ct_target_info *info = par->targinfo;
 	struct nf_conntrack_tuple t;
 	struct nf_conn *ct;
-	int ret;
+	int ret = -EOPNOTSUPP;
 
 	if (info->flags & ~XT_CT_NOTRACK)
 		return -EINVAL;
@@ -247,7 +247,7 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par)
 	struct xt_ct_target_info_v1 *info = par->targinfo;
 	struct nf_conntrack_tuple t;
 	struct nf_conn *ct;
-	int ret;
+	int ret = -EOPNOTSUPP;
 
 	if (info->flags & ~XT_CT_NOTRACK)
 		return -EINVAL;
-- 
cgit v1.2.3


From 1e47ee8367babe6a5e8adf44a714c7086657b87e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 10 Jan 2013 16:12:01 +0100
Subject: netfilter: nf_conntrack: fix BUG_ON while removing nf_conntrack with
 netns

canqun zhang reported that we're hitting BUG_ON in the
nf_conntrack_destroy path when calling kfree_skb while
rmmod'ing the nf_conntrack module.

Currently, the nf_ct_destroy hook is being set to NULL in the
destroy path of conntrack.init_net. However, this is a problem
since init_net may be destroyed before any other existing netns
(we cannot assume any specific ordering while releasing existing
netns according to what I read in recent emails).

Thanks to Gao feng for initial patch to address this issue.

Reported-by: canqun zhang <canqunzhang@gmail.com>
Acked-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_core.c       | 9 +++++----
 net/netfilter/nf_conntrack_standalone.c | 1 +
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 016d95ead930..e4a0c4fb3a7c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1376,11 +1376,12 @@ void nf_conntrack_cleanup(struct net *net)
 	synchronize_net();
 	nf_conntrack_proto_fini(net);
 	nf_conntrack_cleanup_net(net);
+}
 
-	if (net_eq(net, &init_net)) {
-		RCU_INIT_POINTER(nf_ct_destroy, NULL);
-		nf_conntrack_cleanup_init_net();
-	}
+void nf_conntrack_cleanup_end(void)
+{
+	RCU_INIT_POINTER(nf_ct_destroy, NULL);
+	nf_conntrack_cleanup_init_net();
 }
 
 void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 363285d544a1..e7185c684816 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -575,6 +575,7 @@ static int __init nf_conntrack_standalone_init(void)
 static void __exit nf_conntrack_standalone_fini(void)
 {
 	unregister_pernet_subsys(&nf_conntrack_net_ops);
+	nf_conntrack_cleanup_end();
 }
 
 module_init(nf_conntrack_standalone_init);
-- 
cgit v1.2.3


From 5b76c4948fe6977bead2359c2054f3e6a2dcf3d0 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@inai.de>
Date: Thu, 10 Jan 2013 12:30:05 +0000
Subject: netfilter: x_tables: print correct hook names for ARP

arptables 0.0.4 (released on 10th Jan 2013) supports calling the
CLASSIFY target, but on adding a rule to the wrong chain, the
diagnostic is as follows:

	# arptables -A INPUT -j CLASSIFY --set-class 0:0
	arptables: Invalid argument
	# dmesg | tail -n1
	x_tables: arp_tables: CLASSIFY target: used from hooks
	PREROUTING, but only usable from INPUT/FORWARD

This is incorrect, since xt_CLASSIFY.c does specify
(1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD).

This patch corrects the x_tables diagnostic message to print the
proper hook names for the NFPROTO_ARP case.

Affects all kernels down to and including v2.6.31.

Signed-off-by: Jan Engelhardt <jengelh@inai.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/x_tables.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8d987c3573fd..7b3a9e5999c0 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -345,19 +345,27 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target,
 }
 EXPORT_SYMBOL_GPL(xt_find_revision);
 
-static char *textify_hooks(char *buf, size_t size, unsigned int mask)
+static char *
+textify_hooks(char *buf, size_t size, unsigned int mask, uint8_t nfproto)
 {
-	static const char *const names[] = {
+	static const char *const inetbr_names[] = {
 		"PREROUTING", "INPUT", "FORWARD",
 		"OUTPUT", "POSTROUTING", "BROUTING",
 	};
-	unsigned int i;
+	static const char *const arp_names[] = {
+		"INPUT", "FORWARD", "OUTPUT",
+	};
+	const char *const *names;
+	unsigned int i, max;
 	char *p = buf;
 	bool np = false;
 	int res;
 
+	names = (nfproto == NFPROTO_ARP) ? arp_names : inetbr_names;
+	max   = (nfproto == NFPROTO_ARP) ? ARRAY_SIZE(arp_names) :
+	                                   ARRAY_SIZE(inetbr_names);
 	*p = '\0';
-	for (i = 0; i < ARRAY_SIZE(names); ++i) {
+	for (i = 0; i < max; ++i) {
 		if (!(mask & (1 << i)))
 			continue;
 		res = snprintf(p, size, "%s%s", np ? "/" : "", names[i]);
@@ -402,8 +410,10 @@ int xt_check_match(struct xt_mtchk_param *par,
 		pr_err("%s_tables: %s match: used from hooks %s, but only "
 		       "valid from %s\n",
 		       xt_prefix[par->family], par->match->name,
-		       textify_hooks(used, sizeof(used), par->hook_mask),
-		       textify_hooks(allow, sizeof(allow), par->match->hooks));
+		       textify_hooks(used, sizeof(used), par->hook_mask,
+		                     par->family),
+		       textify_hooks(allow, sizeof(allow), par->match->hooks,
+		                     par->family));
 		return -EINVAL;
 	}
 	if (par->match->proto && (par->match->proto != proto || inv_proto)) {
@@ -575,8 +585,10 @@ int xt_check_target(struct xt_tgchk_param *par,
 		pr_err("%s_tables: %s target: used from hooks %s, but only "
 		       "usable from %s\n",
 		       xt_prefix[par->family], par->target->name,
-		       textify_hooks(used, sizeof(used), par->hook_mask),
-		       textify_hooks(allow, sizeof(allow), par->target->hooks));
+		       textify_hooks(used, sizeof(used), par->hook_mask,
+		                     par->family),
+		       textify_hooks(allow, sizeof(allow), par->target->hooks,
+		                     par->family));
 		return -EINVAL;
 	}
 	if (par->target->proto && (par->target->proto != proto || inv_proto)) {
-- 
cgit v1.2.3


From cce894bb824429fd312706c7012acae43e725865 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 13 Jan 2013 18:21:51 +0000
Subject: tcp: fix a panic on UP machines in reqsk_fastopen_remove

spin_is_locked() on a non !SMP build is kind of useless.

BUG_ON(!spin_is_locked(xx)) is guaranteed to crash.

Just remove this check in reqsk_fastopen_remove() as
the callers do hold the socket lock.

Reported-by: Ketan Kulkarni <ketkulka@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jerry Chu <hkchu@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Dave Taht <dave.taht@gmail.com>
Acked-by: H.K. Jerry Chu <hkchu@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/request_sock.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index c31d9e8668c3..4425148d2b51 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -186,8 +186,6 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
 	struct fastopen_queue *fastopenq =
 	    inet_csk(lsk)->icsk_accept_queue.fastopenq;
 
-	BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk));
-
 	tcp_sk(sk)->fastopen_rsk = NULL;
 	spin_lock_bh(&fastopenq->lock);
 	fastopenq->qlen--;
-- 
cgit v1.2.3


From 1626e0fa740dec8665a973cf2349405cdfeb46dc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 11 Jan 2013 14:34:25 +0100
Subject: mac80211: fix FT roaming
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During FT roaming, wpa_supplicant attempts to set the
key before association. This used to be rejected, but
as a side effect of my commit 66e67e418908442389d3a9e
("mac80211: redesign auth/assoc") the key was accepted
causing hardware crypto to not be used for it as the
station isn't added to the driver yet.

It would be possible to accept the key and then add it
to the driver when the station has been added. However,
this may run into issues with drivers using the state-
based station adding if they accept the key only after
association like it used to be.

For now, revert to the behaviour from before the auth
and assoc change.

Cc: stable@vger.kernel.org
Reported-by: Cédric Debarge <cedric.debarge@acksys.fr>
Tested-by: Cédric Debarge <cedric.debarge@acksys.fr>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 47e0aca614b7..516fbc96feff 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -164,7 +164,17 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 			sta = sta_info_get(sdata, mac_addr);
 		else
 			sta = sta_info_get_bss(sdata, mac_addr);
-		if (!sta) {
+		/*
+		 * The ASSOC test makes sure the driver is ready to
+		 * receive the key. When wpa_supplicant has roamed
+		 * using FT, it attempts to set the key before
+		 * association has completed, this rejects that attempt
+		 * so it will set the key again after assocation.
+		 *
+		 * TODO: accept the key if we have a station entry and
+		 *       add it to the device after the station.
+		 */
+		if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) {
 			ieee80211_key_free(sdata->local, key);
 			err = -ENOENT;
 			goto out_unlock;
-- 
cgit v1.2.3


From aacde9ee45225f7e0b90960f479aef83c66bfdc0 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Thu, 20 Dec 2012 14:41:18 +0100
Subject: mac80211: synchronize scan off/on-channel and PS states

Since:

commit b23b025fe246f3acc2988eb6d400df34c27cb8ae
Author: Ben Greear <greearb@candelatech.com>
Date:   Fri Feb 4 11:54:17 2011 -0800

    mac80211: Optimize scans on current operating channel.

we do not disable PS while going back to operational channel (on
ieee80211_scan_state_suspend) and deffer that until scan finish.
But since we are allowed to send frames, we can send a frame to AP
without PM bit set, so disable PS on AP side. Then when we switch
to off-channel (in ieee80211_scan_state_resume) we do not enable PS.
Hence we are off-channel with PS disabled, frames are not buffered
by AP.

To fix remove offchannel_ps_disable argument and always enable PS when
going off-channel and disable it when going on-channel, like it was
before.

Cc: stable@vger.kernel.org # 2.6.39+
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Tested-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h |  6 ++----
 net/mac80211/offchannel.c  | 19 +++++++------------
 net/mac80211/scan.c        | 15 +++++----------
 3 files changed, 14 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8563b9a5cac3..2ed065c09562 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1358,10 +1358,8 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sched_scan_stopped_work(struct work_struct *work);
 
 /* off-channel helpers */
-void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local,
-				    bool offchannel_ps_enable);
-void ieee80211_offchannel_return(struct ieee80211_local *local,
-				 bool offchannel_ps_disable);
+void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local);
+void ieee80211_offchannel_return(struct ieee80211_local *local);
 void ieee80211_roc_setup(struct ieee80211_local *local);
 void ieee80211_start_next_roc(struct ieee80211_local *local);
 void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata);
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index a5379aea7d09..a3ad4c3c80a3 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -102,8 +102,7 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
 	ieee80211_sta_reset_conn_monitor(sdata);
 }
 
-void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local,
-				    bool offchannel_ps_enable)
+void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)
 {
 	struct ieee80211_sub_if_data *sdata;
 
@@ -134,8 +133,7 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local,
 
 		if (sdata->vif.type != NL80211_IFTYPE_MONITOR) {
 			netif_tx_stop_all_queues(sdata->dev);
-			if (offchannel_ps_enable &&
-			    (sdata->vif.type == NL80211_IFTYPE_STATION) &&
+			if (sdata->vif.type == NL80211_IFTYPE_STATION &&
 			    sdata->u.mgd.associated)
 				ieee80211_offchannel_ps_enable(sdata);
 		}
@@ -143,8 +141,7 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local,
 	mutex_unlock(&local->iflist_mtx);
 }
 
-void ieee80211_offchannel_return(struct ieee80211_local *local,
-				 bool offchannel_ps_disable)
+void ieee80211_offchannel_return(struct ieee80211_local *local)
 {
 	struct ieee80211_sub_if_data *sdata;
 
@@ -163,11 +160,9 @@ void ieee80211_offchannel_return(struct ieee80211_local *local,
 			continue;
 
 		/* Tell AP we're back */
-		if (offchannel_ps_disable &&
-		    sdata->vif.type == NL80211_IFTYPE_STATION) {
-			if (sdata->u.mgd.associated)
-				ieee80211_offchannel_ps_disable(sdata);
-		}
+		if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+		    sdata->u.mgd.associated)
+			ieee80211_offchannel_ps_disable(sdata);
 
 		if (sdata->vif.type != NL80211_IFTYPE_MONITOR) {
 			/*
@@ -385,7 +380,7 @@ void ieee80211_sw_roc_work(struct work_struct *work)
 			local->tmp_channel = NULL;
 			ieee80211_hw_config(local, 0);
 
-			ieee80211_offchannel_return(local, true);
+			ieee80211_offchannel_return(local);
 		}
 
 		ieee80211_recalc_idle(local);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index d59fc6818b1c..bf82e69d0601 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -292,7 +292,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted,
 	if (!was_hw_scan) {
 		ieee80211_configure_filter(local);
 		drv_sw_scan_complete(local);
-		ieee80211_offchannel_return(local, true);
+		ieee80211_offchannel_return(local);
 	}
 
 	ieee80211_recalc_idle(local);
@@ -341,7 +341,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
 	local->next_scan_state = SCAN_DECISION;
 	local->scan_channel_idx = 0;
 
-	ieee80211_offchannel_stop_vifs(local, true);
+	ieee80211_offchannel_stop_vifs(local);
 
 	ieee80211_configure_filter(local);
 
@@ -678,12 +678,8 @@ static void ieee80211_scan_state_suspend(struct ieee80211_local *local,
 	local->scan_channel = NULL;
 	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
 
-	/*
-	 * Re-enable vifs and beaconing.  Leave PS
-	 * in off-channel state..will put that back
-	 * on-channel at the end of scanning.
-	 */
-	ieee80211_offchannel_return(local, false);
+	/* disable PS */
+	ieee80211_offchannel_return(local);
 
 	*next_delay = HZ / 5;
 	/* afterwards, resume scan & go to next channel */
@@ -693,8 +689,7 @@ static void ieee80211_scan_state_suspend(struct ieee80211_local *local,
 static void ieee80211_scan_state_resume(struct ieee80211_local *local,
 					unsigned long *next_delay)
 {
-	/* PS already is in off-channel mode */
-	ieee80211_offchannel_stop_vifs(local, false);
+	ieee80211_offchannel_stop_vifs(local);
 
 	if (local->ops->flush) {
 		drv_flush(local, false);
-- 
cgit v1.2.3


From b4a7ff75ba3545b061d4fe63f0bb9136ccfe8b19 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 13 Jan 2013 23:10:26 +0100
Subject: mac80211: fix monitor mode injection

Channel contexts are not always used with monitor interfaces. If no channel
context is set, use the oper channel, otherwise tx fails.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
[check local->use_chanctx]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e9eadc40c09c..467c1d1b66f2 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1673,10 +1673,13 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
 			chanctx_conf =
 				rcu_dereference(tmp_sdata->vif.chanctx_conf);
 	}
-	if (!chanctx_conf)
-		goto fail_rcu;
 
-	chan = chanctx_conf->def.chan;
+	if (chanctx_conf)
+		chan = chanctx_conf->def.chan;
+	else if (!local->use_chanctx)
+		chan = local->_oper_channel;
+	else
+		goto fail_rcu;
 
 	/*
 	 * Frame injection is not allowed if beaconing is not allowed
-- 
cgit v1.2.3


From 9cbbffe2ded494429b0d005a51a88242bd9b3095 Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Wed, 9 Jan 2013 12:34:55 -0500
Subject: mac80211: set NEED_TXPROCESSING for PERR frames

A user reported warnings in ath5k due to transmitting frames with no
rates set up.  The frames were Mesh PERR frames, and some debugging
showed an empty control block with just the vif pointer:

>  [  562.522682] XXX txinfo: 00000000: 00 00 00 00 00 00 00 00 00 00 00
>  00 00 00 00 00  ................
>  [  562.522688] XXX txinfo: 00000010: 00 00 00 00 00 00 00 00 54 b8 f2
>  db 00 00 00 00  ........T.......
>  [  562.522693] XXX txinfo: 00000020: 00 00 00 00 00 00 00 00 00 00 00
>  00 00 00 00 00  ................

Set the IEEE80211_TX_INTFL_NEED_TXPROCESSING flag to ensure that
rate control gets run before the frame is sent.

Signed-off-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh_hwmp.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 47aeee2d8db1..40b390581b01 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -215,6 +215,7 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata,
 	skb->priority = 7;
 
 	info->control.vif = &sdata->vif;
+	info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
 	ieee80211_set_qos_hdr(sdata, skb);
 }
 
-- 
cgit v1.2.3


From 8680451f38a64bd270233b3c0eeb7c45f2b5efe3 Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Wed, 9 Jan 2013 12:34:56 -0500
Subject: mac80211: add encrypt headroom to PERR frames

Mesh PERR action frames are robust and thus may be encrypted, so add
proper head/tailroom to allow this.  Fixes this warning when operating
a Mesh STA on ath5k:

WARNING: at net/mac80211/wpa.c:427 ccmp_encrypt_skb.isra.5+0x7b/0x1a0 [mac80211]()
Call Trace:
 [<c011c5e7>] warn_slowpath_common+0x63/0x78
 [<c011c60b>] warn_slowpath_null+0xf/0x13
 [<e090621d>] ccmp_encrypt_skb.isra.5+0x7b/0x1a0 [mac80211]
 [<e090685c>] ieee80211_crypto_ccmp_encrypt+0x1f/0x37 [mac80211]
 [<e0917113>] invoke_tx_handlers+0xcad/0x10bd [mac80211]
 [<e0917665>] ieee80211_tx+0x87/0xb3 [mac80211]
 [<e0918932>] ieee80211_tx_pending+0xcc/0x170 [mac80211]
 [<c0121c43>] tasklet_action+0x3e/0x65

Signed-off-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh_hwmp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 40b390581b01..2659e428b80c 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -247,11 +247,13 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn,
 		return -EAGAIN;
 
 	skb = dev_alloc_skb(local->tx_headroom +
+			    IEEE80211_ENCRYPT_HEADROOM +
+			    IEEE80211_ENCRYPT_TAILROOM +
 			    hdr_len +
 			    2 + 15 /* PERR IE */);
 	if (!skb)
 		return -1;
-	skb_reserve(skb, local->tx_headroom);
+	skb_reserve(skb, local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM);
 	mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len);
 	memset(mgmt, 0, hdr_len);
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
-- 
cgit v1.2.3


From 7efdba5bd9a2f3e2059beeb45c9fa55eefe1bced Mon Sep 17 00:00:00 2001
From: Romain KUNTZ <r.kuntz@ipflavors.com>
Date: Wed, 16 Jan 2013 12:47:40 +0000
Subject: ipv6: fix header length calculation in ip6_append_data()

Commit 299b0767 (ipv6: Fix IPsec slowpath fragmentation problem)
has introduced a error in the header length calculation that
provokes corrupted packets when non-fragmentable extensions
headers (Destination Option or Routing Header Type 2) are used.

rt->rt6i_nfheader_len is the length of the non-fragmentable
extension header, and it should be substracted to
rt->dst.header_len, and not to exthdrlen, as it was done before
commit 299b0767.

This patch reverts to the original and correct behavior. It has
been successfully tested with and without IPsec on packets
that include non-fragmentable extensions headers.

Signed-off-by: Romain Kuntz <r.kuntz@ipflavors.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5552d13ae92f..0c7c03d50dc0 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1213,10 +1213,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		if (dst_allfrag(rt->dst.path))
 			cork->flags |= IPCORK_ALLFRAG;
 		cork->length = 0;
-		exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
+		exthdrlen = (opt ? opt->opt_flen : 0);
 		length += exthdrlen;
 		transhdrlen += exthdrlen;
-		dst_exthdrlen = rt->dst.header_len;
+		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
 	} else {
 		rt = (struct rt6_info *)cork->dst;
 		fl6 = &inet->cork.fl.u.ip6;
-- 
cgit v1.2.3


From 38d523e2948162776903349c89d65f7b9370dadb Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 16 Jan 2013 20:55:01 +0000
Subject: ipv4: Remove output route check in ipv4_mtu
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The output route check was introduced with git commit 261663b0
(ipv4: Don't use the cached pmtu informations for input routes)
during times when we cached the pmtu informations on the
inetpeer. Now the pmtu informations are back in the routes,
so this check is obsolete. It also had some unwanted side effects,
as reported by Timo Teras and Lukas Tribus.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Timo Teräs <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 844a9ef60dbd..6e4a89c5e27e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1120,7 +1120,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 	if (!mtu || time_after_eq(jiffies, rt->dst.expires))
 		mtu = dst_metric_raw(dst, RTAX_MTU);
 
-	if (mtu && rt_is_output_route(rt))
+	if (mtu)
 		return mtu;
 
 	mtu = dst->dev->mtu;
-- 
cgit v1.2.3


From fa1e492aa3cbafba9f8fc6d05e5b08a3091daf4a Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 16 Jan 2013 20:58:10 +0000
Subject: ipv4: Don't update the pmtu on mtu locked routes

Routes with locked mtu should not use learned pmtu informations,
so do not update the pmtu on these routes.

Reported-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6e4a89c5e27e..259cbeee9a8b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -912,6 +912,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 	struct dst_entry *dst = &rt->dst;
 	struct fib_result res;
 
+	if (dst_metric_locked(dst, RTAX_MTU))
+		return;
+
 	if (dst->dev->mtu < mtu)
 		return;
 
-- 
cgit v1.2.3


From 2f94aabd9f6c925d77aecb3ff020f1cc12ed8f86 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Thu, 17 Jan 2013 11:15:08 +0000
Subject: sctp: refactor sctp_outq_teardown to insure proper re-initalization

Jamie Parsons reported a problem recently, in which the re-initalization of an
association (The duplicate init case), resulted in a loss of receive window
space.  He tracked down the root cause to sctp_outq_teardown, which discarded
all the data on an outq during a re-initalization of the corresponding
association, but never reset the outq->outstanding_data field to zero.  I wrote,
and he tested this fix, which does a proper full re-initalization of the outq,
fixing this problem, and hopefully future proofing us from simmilar issues down
the road.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Reported-by: Jamie Parsons <Jamie.Parsons@metaswitch.com>
Tested-by: Jamie Parsons <Jamie.Parsons@metaswitch.com>
CC: Jamie Parsons <Jamie.Parsons@metaswitch.com>
CC: Vlad Yasevich <vyasevich@gmail.com>
CC: "David S. Miller" <davem@davemloft.net>
CC: netdev@vger.kernel.org
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/outqueue.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 379c81dee9d1..9bcdbd02d777 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -224,7 +224,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
 
 /* Free the outqueue structure and any related pending chunks.
  */
-void sctp_outq_teardown(struct sctp_outq *q)
+static void __sctp_outq_teardown(struct sctp_outq *q)
 {
 	struct sctp_transport *transport;
 	struct list_head *lchunk, *temp;
@@ -277,8 +277,6 @@ void sctp_outq_teardown(struct sctp_outq *q)
 		sctp_chunk_free(chunk);
 	}
 
-	q->error = 0;
-
 	/* Throw away any leftover control chunks. */
 	list_for_each_entry_safe(chunk, tmp, &q->control_chunk_list, list) {
 		list_del_init(&chunk->list);
@@ -286,11 +284,17 @@ void sctp_outq_teardown(struct sctp_outq *q)
 	}
 }
 
+void sctp_outq_teardown(struct sctp_outq *q)
+{
+	__sctp_outq_teardown(q);
+	sctp_outq_init(q->asoc, q);
+}
+
 /* Free the outqueue structure and any related pending chunks.  */
 void sctp_outq_free(struct sctp_outq *q)
 {
 	/* Throw away leftover chunks. */
-	sctp_outq_teardown(q);
+	__sctp_outq_teardown(q);
 
 	/* If we were kmalloc()'d, free the memory.  */
 	if (q->malloced)
-- 
cgit v1.2.3


From e2f6725917ed525f4111c33c31ab53397b70f9d2 Mon Sep 17 00:00:00 2001
From: Nickolai Zeldovich <nickolai@csail.mit.edu>
Date: Thu, 17 Jan 2013 13:58:28 -0500
Subject: net/xfrm/xfrm_replay: avoid division by zero

All of the xfrm_replay->advance functions in xfrm_replay.c check if
x->replay_esn->replay_window is zero (and return if so).  However,
one of them, xfrm_replay_advance_bmp(), divides by that value (in the
'%' operator) before doing the check, which can potentially trigger
a divide-by-zero exception.  Some compilers will also assume that the
earlier division means the value cannot be zero later, and thus will
eliminate the subsequent zero check as dead code.

This patch moves the division to after the check.

Signed-off-by: Nickolai Zeldovich <nickolai@csail.mit.edu>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_replay.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 765f6fe951eb..35754cc8a9e5 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -242,11 +242,13 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq)
 	u32 diff;
 	struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
 	u32 seq = ntohl(net_seq);
-	u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
+	u32 pos;
 
 	if (!replay_esn->replay_window)
 		return;
 
+	pos = (replay_esn->seq - 1) % replay_esn->replay_window;
+
 	if (seq > replay_esn->seq) {
 		diff = seq - replay_esn->seq;
 
-- 
cgit v1.2.3


From 6f809da27c94425e07be4a64d5093e1df95188e9 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 16 Jan 2013 22:09:49 +0000
Subject: ipv6: Add an error handler for icmp6

pmtu and redirect events are now handled in the protocols error handler,
so add an error handler for icmp6 to do this. It is needed in the case
when we have no socket context. Based on a patch by Duan Jiong.

Reported-by: Duan Jiong <djduanjiong@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/icmp.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index b4a9fd51dae7..fff5bdd8b680 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -81,10 +81,22 @@ static inline struct sock *icmpv6_sk(struct net *net)
 	return net->ipv6.icmp_sk[smp_processor_id()];
 }
 
+static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		       u8 type, u8 code, int offset, __be32 info)
+{
+	struct net *net = dev_net(skb->dev);
+
+	if (type == ICMPV6_PKT_TOOBIG)
+		ip6_update_pmtu(skb, net, info, 0, 0);
+	else if (type == NDISC_REDIRECT)
+		ip6_redirect(skb, net, 0, 0);
+}
+
 static int icmpv6_rcv(struct sk_buff *skb);
 
 static const struct inet6_protocol icmpv6_protocol = {
 	.handler	=	icmpv6_rcv,
+	.err_handler	=	icmpv6_err,
 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
 
-- 
cgit v1.2.3


From b74aa930ef49a3c0d8e4c1987f89decac768fb2c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 19 Jan 2013 16:10:37 +0000
Subject: tcp: fix incorrect LOCKDROPPEDICMPS counter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 563d34d057 (tcp: dont drop MTU reduction indications)
added an error leading to incorrect accounting of
LINUX_MIB_LOCKDROPPEDICMPS

If socket is owned by the user, we want to increment
this SNMP counter, unless the message is a
(ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED) one.

Reported-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 54139fa514e6..70b09ef2463b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -369,11 +369,10 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 	 * We do take care of PMTU discovery (RFC1191) special case :
 	 * we can receive locally generated ICMP messages while socket is held.
 	 */
-	if (sock_owned_by_user(sk) &&
-	    type != ICMP_DEST_UNREACH &&
-	    code != ICMP_FRAG_NEEDED)
-		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
-
+	if (sock_owned_by_user(sk)) {
+		if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
+			NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
+	}
 	if (sk->sk_state == TCP_CLOSE)
 		goto out;
 
-- 
cgit v1.2.3


From 82bda6195615891181115f579a480aa5001ce7e9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 5 Jan 2013 21:31:18 +0000
Subject: net: splice: avoid high order page splitting

splice() can handle pages of any order, but network code tries hard to
split them in PAGE_SIZE units. Not quite successfully anyway, as
__splice_segment() assumed poff < PAGE_SIZE. This is true for
the skb->data part, not necessarily for the fragments.

This patch removes this logic to give the pages as they are in the skb.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 38 +++++++++-----------------------------
 1 file changed, 9 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3ab989b0de42..f5dfdf7727c4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1706,20 +1706,6 @@ static bool spd_fill_page(struct splice_pipe_desc *spd,
 	return false;
 }
 
-static inline void __segment_seek(struct page **page, unsigned int *poff,
-				  unsigned int *plen, unsigned int off)
-{
-	unsigned long n;
-
-	*poff += off;
-	n = *poff / PAGE_SIZE;
-	if (n)
-		*page = nth_page(*page, n);
-
-	*poff = *poff % PAGE_SIZE;
-	*plen -= off;
-}
-
 static bool __splice_segment(struct page *page, unsigned int poff,
 			     unsigned int plen, unsigned int *off,
 			     unsigned int *len, struct sk_buff *skb,
@@ -1727,6 +1713,8 @@ static bool __splice_segment(struct page *page, unsigned int poff,
 			     struct sock *sk,
 			     struct pipe_inode_info *pipe)
 {
+	unsigned int flen;
+
 	if (!*len)
 		return true;
 
@@ -1737,24 +1725,16 @@ static bool __splice_segment(struct page *page, unsigned int poff,
 	}
 
 	/* ignore any bits we already processed */
-	if (*off) {
-		__segment_seek(&page, &poff, &plen, *off);
-		*off = 0;
-	}
-
-	do {
-		unsigned int flen = min(*len, plen);
+	poff += *off;
+	plen -= *off;
+	*off = 0;
 
-		/* the linear region may spread across several pages  */
-		flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
+	flen = min(*len, plen);
 
-		if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk))
-			return true;
-
-		__segment_seek(&page, &poff, &plen, flen);
-		*len -= flen;
+	if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk))
+		return true;
 
-	} while (*len && plen);
+	*len -= flen;
 
 	return false;
 }
-- 
cgit v1.2.3


From bc9540c637c3d8712ccbf9dcf28621f380ed5e64 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 11 Jan 2013 14:46:37 +0000
Subject: net: splice: fix __splice_segment()

commit 9ca1b22d6d2 (net: splice: avoid high order page splitting)
forgot that skb->head could need a copy into several page frags.

This could be the case for loopback traffic mostly.

Also remove now useless skb argument from linear_to_page()
and __splice_segment() prototypes.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f5dfdf7727c4..a9a2ae3e2213 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1649,7 +1649,7 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
 
 static struct page *linear_to_page(struct page *page, unsigned int *len,
 				   unsigned int *offset,
-				   struct sk_buff *skb, struct sock *sk)
+				   struct sock *sk)
 {
 	struct page_frag *pfrag = sk_page_frag(sk);
 
@@ -1682,14 +1682,14 @@ static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
 static bool spd_fill_page(struct splice_pipe_desc *spd,
 			  struct pipe_inode_info *pipe, struct page *page,
 			  unsigned int *len, unsigned int offset,
-			  struct sk_buff *skb, bool linear,
+			  bool linear,
 			  struct sock *sk)
 {
 	if (unlikely(spd->nr_pages == MAX_SKB_FRAGS))
 		return true;
 
 	if (linear) {
-		page = linear_to_page(page, len, &offset, skb, sk);
+		page = linear_to_page(page, len, &offset, sk);
 		if (!page)
 			return true;
 	}
@@ -1708,13 +1708,11 @@ static bool spd_fill_page(struct splice_pipe_desc *spd,
 
 static bool __splice_segment(struct page *page, unsigned int poff,
 			     unsigned int plen, unsigned int *off,
-			     unsigned int *len, struct sk_buff *skb,
+			     unsigned int *len,
 			     struct splice_pipe_desc *spd, bool linear,
 			     struct sock *sk,
 			     struct pipe_inode_info *pipe)
 {
-	unsigned int flen;
-
 	if (!*len)
 		return true;
 
@@ -1729,12 +1727,16 @@ static bool __splice_segment(struct page *page, unsigned int poff,
 	plen -= *off;
 	*off = 0;
 
-	flen = min(*len, plen);
-
-	if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk))
-		return true;
+	do {
+		unsigned int flen = min(*len, plen);
 
-	*len -= flen;
+		if (spd_fill_page(spd, pipe, page, &flen, poff,
+				  linear, sk))
+			return true;
+		poff += flen;
+		plen -= flen;
+		*len -= flen;
+	} while (*len && plen);
 
 	return false;
 }
@@ -1757,7 +1759,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
 	if (__splice_segment(virt_to_page(skb->data),
 			     (unsigned long) skb->data & (PAGE_SIZE - 1),
 			     skb_headlen(skb),
-			     offset, len, skb, spd,
+			     offset, len, spd,
 			     skb_head_is_locked(skb),
 			     sk, pipe))
 		return true;
@@ -1770,7 +1772,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
 
 		if (__splice_segment(skb_frag_page(f),
 				     f->page_offset, skb_frag_size(f),
-				     offset, len, skb, spd, false, sk, pipe))
+				     offset, len, spd, false, sk, pipe))
 			return true;
 	}
 
-- 
cgit v1.2.3


From 5b653b2a1c3b5634368fde2df958a1398481e580 Mon Sep 17 00:00:00 2001
From: Michal Kubecek <mkubecek@suse.cz>
Date: Fri, 18 Jan 2013 16:03:48 +0100
Subject: xfrm: fix freed block size calculation in xfrm_policy_fini()

Missing multiplication of block size by sizeof(struct hlist_head)
can cause xfrm_hash_free() to be called with wrong second argument
so that kfree() is called on a block allocated with vzalloc() or
__get_free_pages() or free_pages() is called with wrong order when
a namespace with enough policies is removed.

Bug introduced by commit a35f6c5d, i.e. versions >= 2.6.29 are
affected.

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 41eabc46f110..07c585756d2a 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2656,7 +2656,7 @@ static void xfrm_policy_fini(struct net *net)
 		WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
 
 		htab = &net->xfrm.policy_bydst[dir];
-		sz = (htab->hmask + 1);
+		sz = (htab->hmask + 1) * sizeof(struct hlist_head);
 		WARN_ON(!hlist_empty(htab->table));
 		xfrm_hash_free(htab->table, sz);
 	}
-- 
cgit v1.2.3


From 05ab86c55683410593720003442dde629782aaac Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 15 Jan 2013 13:38:53 +0100
Subject: xfrm4: Invalidate all ipv4 routes on IPsec pmtu events

On IPsec pmtu events we can't access the transport headers of
the original packet, so we can't find the socket that sent
the packet. The only chance to notify the socket about the
pmtu change is to force a relookup for all routes. This
patch implenents this for the IPsec protocols.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/ah4.c    | 7 +++++--
 net/ipv4/esp4.c   | 7 +++++--
 net/ipv4/ipcomp.c | 7 +++++--
 3 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index a154d0a08c79..a69b4e4a02b5 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -420,9 +420,12 @@ static void ah4_err(struct sk_buff *skb, u32 info)
 	if (!x)
 		return;
 
-	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
+		atomic_inc(&flow_cache_genid);
+		rt_genid_bump(net);
+
 		ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);
-	else
+	} else
 		ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0);
 	xfrm_state_put(x);
 }
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index fd26ff4f3eac..3b4f0cd2e63e 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -502,9 +502,12 @@ static void esp4_err(struct sk_buff *skb, u32 info)
 	if (!x)
 		return;
 
-	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
+		atomic_inc(&flow_cache_genid);
+		rt_genid_bump(net);
+
 		ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
-	else
+	} else
 		ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0);
 	xfrm_state_put(x);
 }
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index d3ab47e19a89..9a46daed2f3c 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -47,9 +47,12 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
 	if (!x)
 		return;
 
-	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
+		atomic_inc(&flow_cache_genid);
+		rt_genid_bump(net);
+
 		ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0);
-	else
+	} else
 		ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0);
 	xfrm_state_put(x);
 }
-- 
cgit v1.2.3


From 9cb3a50c5f63ed745702972f66eaee8767659acd Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 21 Jan 2013 01:59:11 +0000
Subject: ipv4: Invalidate the socket cached route on pmtu events if possible

The route lookup in ipv4_sk_update_pmtu() might return a route
different from the route we cached at the socket. This is because
standart routes are per cpu, so each cpu has it's own struct rtable.
This means that we do not invalidate the socket cached route if the
NET_RX_SOFTIRQ is not served by the same cpu that the sending socket
uses. As a result, the cached route reused until we disconnect.

With this patch we invalidate the socket cached route if possible.
If the socket is owened by the user, we can't update the cached
route directly. A followup patch will implement socket release
callback functions for datagram sockets to handle this case.

Reported-by: Yurij M. Plotnikov <Yurij.Plotnikov@oktetlabs.ru>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 42 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 259cbeee9a8b..132737a7c83a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -965,7 +965,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
 }
 EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
 
-void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
+static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
 {
 	const struct iphdr *iph = (const struct iphdr *) skb->data;
 	struct flowi4 fl4;
@@ -978,6 +978,46 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
 		ip_rt_put(rt);
 	}
 }
+
+void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
+{
+	const struct iphdr *iph = (const struct iphdr *) skb->data;
+	struct flowi4 fl4;
+	struct rtable *rt;
+	struct dst_entry *dst;
+
+	bh_lock_sock(sk);
+	rt = (struct rtable *) __sk_dst_get(sk);
+
+	if (sock_owned_by_user(sk) || !rt) {
+		__ipv4_sk_update_pmtu(skb, sk, mtu);
+		goto out;
+	}
+
+	__build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+
+	if (!__sk_dst_check(sk, 0)) {
+		rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
+		if (IS_ERR(rt))
+			goto out;
+	}
+
+	__ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
+
+	dst = dst_check(&rt->dst, 0);
+	if (!dst) {
+		rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
+		if (IS_ERR(rt))
+			goto out;
+
+		dst = &rt->dst;
+	}
+
+	 __sk_dst_set(sk, dst);
+
+out:
+	bh_unlock_sock(sk);
+}
 EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
 
 void ipv4_redirect(struct sk_buff *skb, struct net *net,
-- 
cgit v1.2.3


From 8141ed9fcedb278f4a3a78680591bef1e55f75fb Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 21 Jan 2013 02:00:03 +0000
Subject: ipv4: Add a socket release callback for datagram sockets

This implements a socket release callback function to check
if the socket cached route got invalid during the time
we owned the socket. The function is used from udp, raw
and ping sockets.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/datagram.c | 25 +++++++++++++++++++++++++
 net/ipv4/ping.c     |  1 +
 net/ipv4/raw.c      |  1 +
 net/ipv4/udp.c      |  1 +
 4 files changed, 28 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 424fafbc8cb0..b28e863fe0a7 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -85,3 +85,28 @@ out:
 	return err;
 }
 EXPORT_SYMBOL(ip4_datagram_connect);
+
+void ip4_datagram_release_cb(struct sock *sk)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct ip_options_rcu *inet_opt;
+	__be32 daddr = inet->inet_daddr;
+	struct flowi4 fl4;
+	struct rtable *rt;
+
+	if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0))
+		return;
+
+	rcu_read_lock();
+	inet_opt = rcu_dereference(inet->inet_opt);
+	if (inet_opt && inet_opt->opt.srr)
+		daddr = inet_opt->opt.faddr;
+	rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr,
+				   inet->inet_saddr, inet->inet_dport,
+				   inet->inet_sport, sk->sk_protocol,
+				   RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
+	if (!IS_ERR(rt))
+		__sk_dst_set(sk, &rt->dst);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(ip4_datagram_release_cb);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 8f3d05424a3e..6f9c07268cf6 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -738,6 +738,7 @@ struct proto ping_prot = {
 	.recvmsg =	ping_recvmsg,
 	.bind =		ping_bind,
 	.backlog_rcv =	ping_queue_rcv_skb,
+	.release_cb =	ip4_datagram_release_cb,
 	.hash =		ping_v4_hash,
 	.unhash =	ping_v4_unhash,
 	.get_port =	ping_v4_get_port,
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 73d1e4df4bf6..6f08991409c3 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -894,6 +894,7 @@ struct proto raw_prot = {
 	.recvmsg	   = raw_recvmsg,
 	.bind		   = raw_bind,
 	.backlog_rcv	   = raw_rcv_skb,
+	.release_cb	   = ip4_datagram_release_cb,
 	.hash		   = raw_hash_sk,
 	.unhash		   = raw_unhash_sk,
 	.obj_size	   = sizeof(struct raw_sock),
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 79c8dbe59b54..1f4d405eafba 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1952,6 +1952,7 @@ struct proto udp_prot = {
 	.recvmsg	   = udp_recvmsg,
 	.sendpage	   = udp_sendpage,
 	.backlog_rcv	   = __udp_queue_rcv_skb,
+	.release_cb	   = ip4_datagram_release_cb,
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
 	.rehash		   = udp_v4_rehash,
-- 
cgit v1.2.3


From d9d59089c43fc33eb403cbb928e56c131f191dd5 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Mon, 21 Jan 2013 23:57:26 -0800
Subject: openvswitch: Move LRO check from transmit to receive.

The check for LRO packets was incorrectly put in the transmit path
instead of on receive.  Since this check is supposed to protect OVS
(and other parts of the system) from packets that it cannot handle
it is obviously not useful on egress.  Therefore, this commit moves
it back to the receive side.

The primary problem that this caused is upcalls to userspace tried
to segment the packet even though no segmentation information is
available.  This would later cause NULL pointer dereferences when
skb_gso_segment() did nothing.

Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/vport-netdev.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index a9327e2e48ce..670cbc3518de 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -35,10 +35,11 @@
 /* Must be called with rcu_read_lock. */
 static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
 {
-	if (unlikely(!vport)) {
-		kfree_skb(skb);
-		return;
-	}
+	if (unlikely(!vport))
+		goto error;
+
+	if (unlikely(skb_warn_if_lro(skb)))
+		goto error;
 
 	/* Make our own copy of the packet.  Otherwise we will mangle the
 	 * packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
@@ -50,6 +51,10 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
 
 	skb_push(skb, ETH_HLEN);
 	ovs_vport_receive(vport, skb);
+	return;
+
+error:
+	kfree_skb(skb);
 }
 
 /* Called with rcu_read_lock and bottom-halves disabled. */
@@ -169,9 +174,6 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb)
 		goto error;
 	}
 
-	if (unlikely(skb_warn_if_lro(skb)))
-		goto error;
-
 	skb->dev = netdev_vport->dev;
 	len = skb->len;
 	dev_queue_xmit(skb);
-- 
cgit v1.2.3


From d84295067fc7e95660d84c014aa528f4409c070d Mon Sep 17 00:00:00 2001
From: Daniel Wagner <daniel.wagner@bmw-carit.de>
Date: Mon, 21 Jan 2013 21:09:00 +0000
Subject: net: net_cls: fd passed in SCM_RIGHTS datagram not set correctly

Commit 6a328d8c6f03501657ad580f6f98bf9a42583ff7 changed the update
logic for the socket but it does not update the SCM_RIGHTS update
as well. This patch is based on the net_prio fix commit

48a87cc26c13b68f6cce4e9d769fcb17a6b3e4b8

    net: netprio: fd passed in SCM_RIGHTS datagram not set correctly

    A socket fd passed in a SCM_RIGHTS datagram was not getting
    updated with the new tasks cgrp prioidx. This leaves IO on
    the socket tagged with the old tasks priority.

    To fix this add a check in the scm recvmsg path to update the
    sock cgrp prioidx with the new tasks value.

Let's apply the same fix for net_cls.

Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Reported-by: Li Zefan <lizefan@huawei.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: netdev@vger.kernel.org
Cc: cgroups@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/scm.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/scm.c b/net/core/scm.c
index 57fb1ee6649f..905dcc6ad1e3 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -35,6 +35,7 @@
 #include <net/sock.h>
 #include <net/compat.h>
 #include <net/scm.h>
+#include <net/cls_cgroup.h>
 
 
 /*
@@ -302,8 +303,10 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 		}
 		/* Bump the usage count and install the file. */
 		sock = sock_from_file(fp[i], &err);
-		if (sock)
+		if (sock) {
 			sock_update_netprioidx(sock->sk, current);
+			sock_update_classid(sock->sk, current);
+		}
 		fd_install(new_fd, get_file(fp[i]));
 	}
 
-- 
cgit v1.2.3


From b44108dbdbaa07c609bb5755e8dd6c2035236251 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 22 Jan 2013 00:01:28 +0000
Subject: ipv4: Fix route refcount on pmtu discovery

git commit 9cb3a50c (ipv4: Invalidate the socket cached route on
pmtu events if possible) introduced a refcount problem. We don't
get a refcount on the route if we get it from__sk_dst_get(), but
we need one if we want to reuse this route because __sk_dst_set()
releases the refcount of the old route. This patch adds proper
refcount handling for that case. We introduce a 'new' flag to
indicate that we are going to use a new route and we release the
old route only if we replace it by a new one.

Reported-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 132737a7c83a..a0fcc47fee73 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -985,6 +985,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
 	struct flowi4 fl4;
 	struct rtable *rt;
 	struct dst_entry *dst;
+	bool new = false;
 
 	bh_lock_sock(sk);
 	rt = (struct rtable *) __sk_dst_get(sk);
@@ -1000,20 +1001,26 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
 		rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
 		if (IS_ERR(rt))
 			goto out;
+
+		new = true;
 	}
 
 	__ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
 
 	dst = dst_check(&rt->dst, 0);
 	if (!dst) {
+		if (new)
+			dst_release(&rt->dst);
+
 		rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
 		if (IS_ERR(rt))
 			goto out;
 
-		dst = &rt->dst;
+		new = true;
 	}
 
-	 __sk_dst_set(sk, dst);
+	if (new)
+		__sk_dst_set(sk, &rt->dst);
 
 out:
 	bh_unlock_sock(sk);
-- 
cgit v1.2.3


From c49dc9008b1c641a86837297df7c90cef070571b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 24 Jan 2013 09:40:00 +0300
Subject: cfg80211: off by one in ieee80211_bss()

We do a:

	sprintf(buf, " Last beacon: %ums ago",
		elapsed_jiffies_msecs(bss->ts));

elapsed_jiffies_msecs() can return a 10 digit number so "buf" needs to
be 31 characters long.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/scan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 01592d7d4789..45f1618c8e23 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1358,7 +1358,7 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info,
 						  &iwe, IW_EV_UINT_LEN);
 	}
 
-	buf = kmalloc(30, GFP_ATOMIC);
+	buf = kmalloc(31, GFP_ATOMIC);
 	if (buf) {
 		memset(&iwe, 0, sizeof(iwe));
 		iwe.cmd = IWEVCUSTOM;
-- 
cgit v1.2.3


From 0d15becee56fdfc2e9a4374c46ea7cf7562a6f32 Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Wed, 23 Jan 2013 18:11:53 +0100
Subject: batman-adv: fix skb leak in batadv_dat_snoop_incoming_arp_reply()

The callers of batadv_dat_snoop_incoming_arp_reply() assume the skb has been
freed when it returns true; fix this by calling kfree_skb before returning as
it is done in batadv_dat_snoop_incoming_arp_request().

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
Acked-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
 net/batman-adv/distributed-arp-table.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 8e1d89d2b1c1..ce0d2992381a 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1012,6 +1012,8 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
 	 */
 	ret = !batadv_is_my_client(bat_priv, hw_dst);
 out:
+	if (ret)
+		kfree_skb(skb);
 	/* if ret == false -> packet has to be delivered to the interface */
 	return ret;
 }
-- 
cgit v1.2.3


From 757dd82ea7008ddaccfecff3397bec3e3203a89e Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Thu, 24 Jan 2013 18:18:26 +0100
Subject: batman-adv: check for more types of invalid IP addresses in DAT

There are more types of IP addresses that may appear in ARP packets that we
don't want to process. While some of these should never appear in sane ARP
packets, a 0.0.0.0 source is used for duplicate address detection and thus seen
quite often.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Acked-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
 net/batman-adv/distributed-arp-table.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index ce0d2992381a..ccb3c6c96ba7 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -777,7 +777,9 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv,
 	ip_src = batadv_arp_ip_src(skb, hdr_size);
 	ip_dst = batadv_arp_ip_dst(skb, hdr_size);
 	if (ipv4_is_loopback(ip_src) || ipv4_is_multicast(ip_src) ||
-	    ipv4_is_loopback(ip_dst) || ipv4_is_multicast(ip_dst))
+	    ipv4_is_loopback(ip_dst) || ipv4_is_multicast(ip_dst) ||
+	    ipv4_is_zeronet(ip_src) || ipv4_is_lbcast(ip_src) ||
+	    ipv4_is_zeronet(ip_dst) || ipv4_is_lbcast(ip_dst))
 		goto out;
 
 	type = ntohs(arphdr->ar_op);
-- 
cgit v1.2.3


From b618ad1103c9ea0c4a69b44f42fc3c7b4e231e22 Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Thu, 24 Jan 2013 18:18:27 +0100
Subject: batman-adv: filter ARP packets with invalid MAC addresses in DAT

We never want multicast MAC addresses in the Distributed ARP Table, so it's
best to completely ignore ARP packets containing them where we expect unicast
addresses.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Acked-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
 net/batman-adv/distributed-arp-table.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'net')

diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index ccb3c6c96ba7..183f97a86bb2 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -738,6 +738,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv,
 	struct arphdr *arphdr;
 	struct ethhdr *ethhdr;
 	__be32 ip_src, ip_dst;
+	uint8_t *hw_src, *hw_dst;
 	uint16_t type = 0;
 
 	/* pull the ethernet header */
@@ -782,6 +783,18 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv,
 	    ipv4_is_zeronet(ip_dst) || ipv4_is_lbcast(ip_dst))
 		goto out;
 
+	hw_src = batadv_arp_hw_src(skb, hdr_size);
+	if (is_zero_ether_addr(hw_src) || is_multicast_ether_addr(hw_src))
+		goto out;
+
+	/* we don't care about the destination MAC address in ARP requests */
+	if (arphdr->ar_op != htons(ARPOP_REQUEST)) {
+		hw_dst = batadv_arp_hw_dst(skb, hdr_size);
+		if (is_zero_ether_addr(hw_dst) ||
+		    is_multicast_ether_addr(hw_dst))
+			goto out;
+	}
+
 	type = ntohs(arphdr->ar_op);
 out:
 	return type;
-- 
cgit v1.2.3


From 75356a8143426a1301bc66c4fb920dcb1bd5e934 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 23 Jan 2013 20:38:34 +0000
Subject: ip6mr: limit IPv6 MRT_TABLE identifiers

We did this for IPv4 in b49d3c1e1c "net: ipmr: limit MRT_TABLE
identifiers" but we need to do it for IPv6 as well.  On IPv6 the name
is "pim6reg" instead of "pimreg" so there is one less digit allowed.

The strcpy() is in ip6mr_reg_vif().

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 26dcdec9e3a5..8fd154e5f079 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1710,6 +1710,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 			return -EINVAL;
 		if (get_user(v, (u32 __user *)optval))
 			return -EFAULT;
+		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
+		if (v != RT_TABLE_DEFAULT && v >= 100000000)
+			return -EINVAL;
 		if (sk == mrt->mroute6_sk)
 			return -EBUSY;
 
-- 
cgit v1.2.3


From 9839ff0dead906e85e4d17490aeff87a5859a157 Mon Sep 17 00:00:00 2001
From: Xufeng Zhang <xufeng.zhang@windriver.com>
Date: Wed, 23 Jan 2013 16:44:34 +0000
Subject: sctp: set association state to established in dupcook_a handler

While sctp handling a duplicate COOKIE-ECHO and the action is
'Association restart', sctp_sf_do_dupcook_a() will processing
the unexpected COOKIE-ECHO for peer restart, but it does not set
the association state to SCTP_STATE_ESTABLISHED, so the association
could stuck in SCTP_STATE_SHUTDOWN_PENDING state forever.
This violates the sctp specification:
  RFC 4960 5.2.4. Handle a COOKIE ECHO when a TCB Exists
  Action
  A) In this case, the peer may have restarted. .....
     After this, the endpoint shall enter the ESTABLISHED state.

To resolve this problem, adding a SCTP_CMD_NEW_STATE cmd to the
command list before SCTP_CMD_REPLY cmd, this will set the restart
association to SCTP_STATE_ESTABLISHED state properly and also avoid
I-bit being set in the DATA chunk header when COOKIE_ACK is bundled
with DATA chunks.

Signed-off-by: Xufeng Zhang <xufeng.zhang@windriver.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_statefuns.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 618ec7e216ca..5131fcfedb03 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1779,8 +1779,10 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
 
 	/* Update the content of current association. */
 	sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
-	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
 	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_ESTABLISHED));
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
 	return SCTP_DISPOSITION_CONSUME;
 
 nomem_ev:
-- 
cgit v1.2.3


From 5465740ace36f179de5bb0ccb5d46ddeb945e309 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Wed, 23 Jan 2013 11:45:42 +0000
Subject: IP_GRE: Fix kernel panic in IP_GRE with GRE csum.

Due to IP_GRE GSO support, GRE can recieve non linear skb which
results in panic in case of GRE_CSUM.  Following patch fixes it by
using correct csum API.

Bug introduced in commit 6b78f16e4bdde3936b (gre: add GSO support)

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 303012adf9e6..e81b1caf2ea2 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -963,8 +963,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			ptr--;
 		}
 		if (tunnel->parms.o_flags&GRE_CSUM) {
+			int offset = skb_transport_offset(skb);
+
 			*ptr = 0;
-			*(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
+			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset,
+								 skb->len - offset,
+								 0));
 		}
 	}
 
-- 
cgit v1.2.3


From 5f19d1219a5b96c7b00ad5c3f889030093a8d1a3 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Thu, 24 Jan 2013 11:02:47 -0500
Subject: SCTP: Free the per-net sysctl table on net exit. v2

Per-net sysctl table needs to be explicitly freed at
net exit.  Otherwise we see the following with kmemleak:

unreferenced object 0xffff880402d08000 (size 2048):
  comm "chrome_sandbox", pid 18437, jiffies 4310887172 (age 9097.630s)
  hex dump (first 32 bytes):
    b2 68 89 81 ff ff ff ff 20 04 04 f8 01 88 ff ff  .h...... .......
    04 00 00 00 a4 01 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff815b4aad>] kmemleak_alloc+0x21/0x3e
    [<ffffffff81110352>] slab_post_alloc_hook+0x28/0x2a
    [<ffffffff81113fad>] __kmalloc_track_caller+0xf1/0x104
    [<ffffffff810f10c2>] kmemdup+0x1b/0x30
    [<ffffffff81571e9f>] sctp_sysctl_net_register+0x1f/0x72
    [<ffffffff8155d305>] sctp_net_init+0x100/0x39f
    [<ffffffff814ad53c>] ops_init+0xc6/0xf5
    [<ffffffff814ad5b7>] setup_net+0x4c/0xd0
    [<ffffffff814ada5e>] copy_net_ns+0x6d/0xd6
    [<ffffffff810938b1>] create_new_namespaces+0xd7/0x147
    [<ffffffff810939f4>] copy_namespaces+0x63/0x99
    [<ffffffff81076733>] copy_process+0xa65/0x1233
    [<ffffffff81077030>] do_fork+0x10b/0x271
    [<ffffffff8100a0e9>] sys_clone+0x23/0x25
    [<ffffffff815dda73>] stub_clone+0x13/0x20
    [<ffffffffffffffff>] 0xffffffffffffffff

I fixed the spelling of sysctl_header so the code actually
compiles. -- EWB.

Reported-by: Martin Mokrejs <mmokrejs@fold.natur.cuni.cz>
Signed-off-by: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sysctl.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 043889ac86c0..bf3c6e8fc401 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -366,7 +366,11 @@ int sctp_sysctl_net_register(struct net *net)
 
 void sctp_sysctl_net_unregister(struct net *net)
 {
+	struct ctl_table *table;
+
+	table = net->sctp.sysctl_header->ctl_table_arg;
 	unregister_net_sysctl_table(net->sctp.sysctl_header);
+	kfree(table);
 }
 
 static struct ctl_table_header * sctp_sysctl_header;
-- 
cgit v1.2.3


From 80d84ef3ff1ddc7a829c58980a9dd566a8af5203 Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 22 Jan 2013 05:13:48 +0000
Subject: l2tp: prevent l2tp_tunnel_delete racing with userspace close

If a tunnel socket is created by userspace, l2tp hooks the socket destructor
in order to clean up resources if userspace closes the socket or crashes.  It
also caches a pointer to the struct sock for use in the data path and in the
netlink interface.

While it is safe to use the cached sock pointer in the data path, where the
skb references keep the socket alive, it is not safe to use it elsewhere as
such access introduces a race with userspace closing the socket.  In
particular, l2tp_tunnel_delete is prone to oopsing if a multithreaded
userspace application closes a socket at the same time as sending a netlink
delete command for the tunnel.

This patch fixes this oops by forcing l2tp_tunnel_delete to explicitly look up
a tunnel socket held by userspace using sockfd_lookup().

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 76 ++++++++++++++++++++++++++++++++++++++++++++--------
 net/l2tp/l2tp_core.h |  5 +++-
 2 files changed, 69 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1a9f3723c13c..06389d5ff120 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -168,6 +168,51 @@ l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id)
 
 }
 
+/* Lookup the tunnel socket, possibly involving the fs code if the socket is
+ * owned by userspace.  A struct sock returned from this function must be
+ * released using l2tp_tunnel_sock_put once you're done with it.
+ */
+struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel)
+{
+	int err = 0;
+	struct socket *sock = NULL;
+	struct sock *sk = NULL;
+
+	if (!tunnel)
+		goto out;
+
+	if (tunnel->fd >= 0) {
+		/* Socket is owned by userspace, who might be in the process
+		 * of closing it.  Look the socket up using the fd to ensure
+		 * consistency.
+		 */
+		sock = sockfd_lookup(tunnel->fd, &err);
+		if (sock)
+			sk = sock->sk;
+	} else {
+		/* Socket is owned by kernelspace */
+		sk = tunnel->sock;
+	}
+
+out:
+	return sk;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_lookup);
+
+/* Drop a reference to a tunnel socket obtained via. l2tp_tunnel_sock_put */
+void l2tp_tunnel_sock_put(struct sock *sk)
+{
+	struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
+	if (tunnel) {
+		if (tunnel->fd >= 0) {
+			/* Socket is owned by userspace */
+			sockfd_put(sk->sk_socket);
+		}
+		sock_put(sk);
+	}
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_put);
+
 /* Lookup a session by id in the global session list
  */
 static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id)
@@ -1607,6 +1652,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	tunnel->old_sk_destruct = sk->sk_destruct;
 	sk->sk_destruct = &l2tp_tunnel_destruct;
 	tunnel->sock = sk;
+	tunnel->fd = fd;
 	lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock");
 
 	sk->sk_allocation = GFP_ATOMIC;
@@ -1642,24 +1688,32 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
  */
 int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
 {
-	int err = 0;
-	struct socket *sock = tunnel->sock ? tunnel->sock->sk_socket : NULL;
+	int err = -EBADF;
+	struct socket *sock = NULL;
+	struct sock *sk = NULL;
+
+	sk = l2tp_tunnel_sock_lookup(tunnel);
+	if (!sk)
+		goto out;
+
+	sock = sk->sk_socket;
+	BUG_ON(!sock);
 
 	/* Force the tunnel socket to close. This will eventually
 	 * cause the tunnel to be deleted via the normal socket close
 	 * mechanisms when userspace closes the tunnel socket.
 	 */
-	if (sock != NULL) {
-		err = inet_shutdown(sock, 2);
+	err = inet_shutdown(sock, 2);
 
-		/* If the tunnel's socket was created by the kernel,
-		 * close the socket here since the socket was not
-		 * created by userspace.
-		 */
-		if (sock->file == NULL)
-			err = inet_release(sock);
-	}
+	/* If the tunnel's socket was created by the kernel,
+	 * close the socket here since the socket was not
+	 * created by userspace.
+	 */
+	if (sock->file == NULL)
+		err = inet_release(sock);
 
+	l2tp_tunnel_sock_put(sk);
+out:
 	return err;
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 56d583e083a7..e62204cad4fe 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -188,7 +188,8 @@ struct l2tp_tunnel {
 	int (*recv_payload_hook)(struct sk_buff *skb);
 	void (*old_sk_destruct)(struct sock *);
 	struct sock		*sock;		/* Parent socket */
-	int			fd;
+	int			fd;		/* Parent fd, if tunnel socket
+						 * was created by userspace */
 
 	uint8_t			priv[0];	/* private data */
 };
@@ -228,6 +229,8 @@ out:
 	return tunnel;
 }
 
+extern struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel);
+extern void l2tp_tunnel_sock_put(struct sock *sk);
 extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id);
 extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
 extern struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
-- 
cgit v1.2.3


From a13d3104710184ecc43edc35a25ae8092058463f Mon Sep 17 00:00:00 2001
From: Johannes Naab <jn@stusta.de>
Date: Wed, 23 Jan 2013 11:36:51 +0000
Subject: netem: fix delay calculation in rate extension

The delay calculation with the rate extension introduces in v3.3 does
not properly work, if other packets are still queued for transmission.
For the delay calculation to work, both delay types (latency and delay
introduces by rate limitation) have to be handled differently. The
latency delay for a packet can overlap with the delay of other packets.
The delay introduced by the rate however is separate, and can only
start, once all other rate-introduced delays finished.

Latency delay is from same distribution for each packet, rate delay
depends on the packet size.

.: latency delay
-: rate delay
x: additional delay we have to wait since another packet is currently
   transmitted

  .....----                    Packet 1
    .....xx------              Packet 2
               .....------     Packet 3
    ^^^^^
    latency stacks
         ^^
         rate delay doesn't stack
               ^^
               latency stacks

  -----> time

When a packet is enqueued, we first consider the latency delay. If other
packets are already queued, we can reduce the latency delay until the
last packet in the queue is send, however the latency delay cannot be
<0, since this would mean that the rate is overcommitted.  The new
reference point is the time at which the last packet will be send. To
find the time, when the packet should be send, the rate introduces delay
has to be added on top of that.

Signed-off-by: Johannes Naab <jn@stusta.de>
Acked-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 298c0ddfb57e..3d2acc7a9c80 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -438,18 +438,18 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		if (q->rate) {
 			struct sk_buff_head *list = &sch->q;
 
-			delay += packet_len_2_sched_time(skb->len, q);
-
 			if (!skb_queue_empty(list)) {
 				/*
-				 * Last packet in queue is reference point (now).
-				 * First packet in queue is already in flight,
-				 * calculate this time bonus and substract
+				 * Last packet in queue is reference point (now),
+				 * calculate this time bonus and subtract
 				 * from delay.
 				 */
-				delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
+				delay -= netem_skb_cb(skb_peek_tail(list))->time_to_send - now;
+				delay = max_t(psched_tdiff_t, 0, delay);
 				now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
 			}
+
+			delay += packet_len_2_sched_time(skb->len, q);
 		}
 
 		cb->time_to_send = now + delay;
-- 
cgit v1.2.3


From 604dfd6efc9b79bce432f2394791708d8e8f6efc Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Sun, 27 Jan 2013 21:14:08 +0000
Subject: pktgen: correctly handle failures when adding a device

The return value of pktgen_add_device() is not checked, so
even if we fail to add some device, for example, non-exist one,
we still see "OK:...". This patch fixes it.

After this patch, I got:

	# echo "add_device non-exist" > /proc/net/pktgen/kpktgend_0
	-bash: echo: write error: No such device
	# cat /proc/net/pktgen/kpktgend_0
	Running:
	Stopped:
	Result: ERROR: can not add device non-exist
	# echo "add_device eth0" > /proc/net/pktgen/kpktgend_0
	# cat /proc/net/pktgen/kpktgend_0
	Running:
	Stopped: eth0
	Result: OK: add_device=eth0

(Candidate for -stable)

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b29dacf900f9..e6e1cbe863f5 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1781,10 +1781,13 @@ static ssize_t pktgen_thread_write(struct file *file,
 			return -EFAULT;
 		i += len;
 		mutex_lock(&pktgen_thread_lock);
-		pktgen_add_device(t, f);
+		ret = pktgen_add_device(t, f);
 		mutex_unlock(&pktgen_thread_lock);
-		ret = count;
-		sprintf(pg_result, "OK: add_device=%s", f);
+		if (!ret) {
+			ret = count;
+			sprintf(pg_result, "OK: add_device=%s", f);
+		} else
+			sprintf(pg_result, "ERROR: can not add device %s", f);
 		goto out;
 	}
 
-- 
cgit v1.2.3


From 5e98a36ed4bf6ea396170e3af0dd4fcbe51d772f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?=
 =?UTF-8?q?=E6=98=8E?= <yoshfuji@linux-ipv6.org>
Date: Mon, 28 Jan 2013 10:44:29 +0000
Subject: ipv6 addrconf: Fix interface identifiers of 802.15.4 devices.

The "Universal/Local" (U/L) bit must be complmented according to RFC4944
and RFC2464.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 420e56326384..1b5d8cb9b123 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1660,6 +1660,7 @@ static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)
 	if (dev->addr_len != IEEE802154_ADDR_LEN)
 		return -1;
 	memcpy(eui, dev->dev_addr, 8);
+	eui[0] ^= 2;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 2aeef18d37aa8c0bfca169d4ede1790d972bf649 Mon Sep 17 00:00:00 2001
From: Nivedita Singhvi <niv@us.ibm.com>
Date: Mon, 28 Jan 2013 17:52:37 +0000
Subject: tcp: Increment LISTENOVERFLOW and LISTENDROPS in
 tcp_v4_conn_request()

We drop a connection request if the accept backlog is full and there are
sufficient packets in the syn queue to warrant starting drops. Increment the
appropriate counters so this isn't silent, for accurate stats and help in
debugging.

This patch assumes LINUX_MIB_LISTENDROPS is a superset of/includes the
counter LINUX_MIB_LISTENOVERFLOWS.

Signed-off-by: Nivedita Singhvi <niv@us.ibm.com>
Acked-by: Vijay Subramanian <subramanian.vijay@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 70b09ef2463b..629937d514eb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1500,8 +1500,11 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	 * clogging syn queue with openreqs with exponentially increasing
 	 * timeout.
 	 */
-	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 		goto drop;
+	}
 
 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
 	if (!req)
-- 
cgit v1.2.3


From edd2e36fe8bd3cec4fa67e746d4c4a9246d0830e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 27 Jan 2013 14:20:49 -0500
Subject: SUNRPC: When changing the queue priority, ensure that we change the
 owner

This fixes a livelock in the xprt->sending queue where we end up never
making progress on lower priority tasks because sleep_on_priority()
keeps adding new tasks with the same owner to the head of the queue,
and priority bumps mean that we keep resetting the queue->owner to
whatever task is at the head of the queue.

Regression introduced by commit c05eecf636101dd4347b2d8fa457626bf0088e0a
(SUNRPC: Don't allow low priority tasks to pre-empt higher priority ones).

Reported-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/sched.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index bfa31714581f..fb20f25ddec9 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -98,9 +98,25 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
 	list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
 }
 
+static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue)
+{
+	struct list_head *q = &queue->tasks[queue->priority];
+	struct rpc_task *task;
+
+	if (!list_empty(q)) {
+		task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
+		if (task->tk_owner == queue->owner)
+			list_move_tail(&task->u.tk_wait.list, q);
+	}
+}
+
 static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
 {
-	queue->priority = priority;
+	if (queue->priority != priority) {
+		/* Fairness: rotate the list when changing priority */
+		rpc_rotate_queue_owner(queue);
+		queue->priority = priority;
+	}
 }
 
 static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
-- 
cgit v1.2.3


From bd30e947207e2ea0ff2c08f5b4a03025ddce48d3 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <mleitner@redhat.com>
Date: Tue, 29 Jan 2013 22:26:08 +0000
Subject: ipv6: do not create neighbor entries for local delivery

They will be created at output, if ever needed. This avoids creating
empty neighbor entries when TPROXYing/Forwarding packets for addresses
that are not even directly reachable.

Note that IPv4 already handles it this way. No neighbor entries are
created for local input.

Tested by myself and customer.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e229a3bc345d..363d8b7772e8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -928,7 +928,7 @@ restart:
 	dst_hold(&rt->dst);
 	read_unlock_bh(&table->tb6_lock);
 
-	if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
+	if (!rt->n && !(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_LOCAL)))
 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 	else if (!(rt->dst.flags & DST_HOST))
 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
-- 
cgit v1.2.3


From 8cf9fa1240229cbdd888236c0c43fcbad680cf00 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 29 Jan 2013 10:44:23 -0600
Subject: Bluetooth: Fix handling of unexpected SMP PDUs

The conn->smp_chan pointer can be NULL if SMP PDUs arrive at unexpected
moments. To avoid NULL pointer dereferences the code should be checking
for this and disconnect if an unexpected SMP PDU arrives. This patch
fixes the issue by adding a check for conn->smp_chan for all other PDUs
except pairing request and security request (which are are the first
PDUs to come to initialize the SMP context).

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
CC: stable@vger.kernel.org
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/smp.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 68a9587c9694..5abefb12891d 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -859,6 +859,19 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb)
 
 	skb_pull(skb, sizeof(code));
 
+	/*
+	 * The SMP context must be initialized for all other PDUs except
+	 * pairing and security requests. If we get any other PDU when
+	 * not initialized simply disconnect (done if this function
+	 * returns an error).
+	 */
+	if (code != SMP_CMD_PAIRING_REQ && code != SMP_CMD_SECURITY_REQ &&
+	    !conn->smp_chan) {
+		BT_ERR("Unexpected SMP command 0x%02x. Disconnecting.", code);
+		kfree_skb(skb);
+		return -ENOTSUPP;
+	}
+
 	switch (code) {
 	case SMP_CMD_PAIRING_REQ:
 		reason = smp_cmd_pairing_req(conn, skb);
-- 
cgit v1.2.3


From 4c02e2d444595200d0b18b889994aac3611cd288 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@openbossa.org>
Date: Wed, 30 Jan 2013 11:50:55 -0300
Subject: Bluetooth: Fix hci_conn timeout routine

If occurs a LE or SCO hci_conn timeout and the connection is already
established (BT_CONNECTED state), the connection is not terminated as
expected. This bug can be reproduced using l2test or scotest tool.
Once the connection is established, kill l2test/scotest and the
connection won't be terminated.

This patch fixes hci_conn_disconnect helper so it is able to
terminate LE and SCO connections, as well as ACL.

Signed-off-by: Andre Guedes <andre.guedes@openbossa.org>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/hci_conn.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 25bfce0666eb..4925a02ae7e4 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -249,12 +249,12 @@ static void hci_conn_disconnect(struct hci_conn *conn)
 	__u8 reason = hci_proto_disconn_ind(conn);
 
 	switch (conn->type) {
-	case ACL_LINK:
-		hci_acl_disconn(conn, reason);
-		break;
 	case AMP_LINK:
 		hci_amp_disconn(conn, reason);
 		break;
+	default:
+		hci_acl_disconn(conn, reason);
+		break;
 	}
 }
 
-- 
cgit v1.2.3


From 73df66f8b1926c59cbc83000af6bf37ecc5509dd Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Thu, 31 Jan 2013 01:02:24 +0000
Subject: ipv6: rename datagram_send_ctl and datagram_recv_ctl

The datagram_*_ctl functions in net/ipv6/datagram.c are IPv6-specific.  Since
datagram_send_ctl is publicly exported it should be appropriately named to
reflect the fact that it's for IPv6 only.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/datagram.c      | 15 ++++++++-------
 net/ipv6/ip6_flowlabel.c |  4 ++--
 net/ipv6/ipv6_sockglue.c |  6 +++---
 net/ipv6/raw.c           |  6 +++---
 net/ipv6/udp.c           |  6 +++---
 net/l2tp/l2tp_ip6.c      |  4 ++--
 net/sunrpc/svcsock.c     |  2 +-
 7 files changed, 22 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 8edf2601065a..06fd2730838b 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -380,7 +380,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		if (skb->protocol == htons(ETH_P_IPV6)) {
 			sin->sin6_addr = ipv6_hdr(skb)->saddr;
 			if (np->rxopt.all)
-				datagram_recv_ctl(sk, msg, skb);
+				ip6_datagram_recv_ctl(sk, msg, skb);
 			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 				sin->sin6_scope_id = IP6CB(skb)->iif;
 		} else {
@@ -468,7 +468,8 @@ out:
 }
 
 
-int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
+int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
+			  struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct inet6_skb_parm *opt = IP6CB(skb);
@@ -598,10 +599,10 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 	return 0;
 }
 
-int datagram_send_ctl(struct net *net, struct sock *sk,
-		      struct msghdr *msg, struct flowi6 *fl6,
-		      struct ipv6_txoptions *opt,
-		      int *hlimit, int *tclass, int *dontfrag)
+int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
+			  struct msghdr *msg, struct flowi6 *fl6,
+			  struct ipv6_txoptions *opt,
+			  int *hlimit, int *tclass, int *dontfrag)
 {
 	struct in6_pktinfo *src_info;
 	struct cmsghdr *cmsg;
@@ -871,4 +872,4 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
 exit_f:
 	return err;
 }
-EXPORT_SYMBOL_GPL(datagram_send_ctl);
+EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 29124b7a04c8..d6de4b447250 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -365,8 +365,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
 		msg.msg_control = (void*)(fl->opt+1);
 		memset(&flowi6, 0, sizeof(flowi6));
 
-		err = datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, &junk,
-					&junk, &junk);
+		err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt,
+					    &junk, &junk, &junk);
 		if (err)
 			goto done;
 		err = -EINVAL;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index ee94d31c9d4d..d1e2e8ef29c5 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -476,8 +476,8 @@ sticky_done:
 		msg.msg_controllen = optlen;
 		msg.msg_control = (void*)(opt+1);
 
-		retv = datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk,
-					 &junk);
+		retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk,
+					     &junk, &junk);
 		if (retv)
 			goto done;
 update:
@@ -1002,7 +1002,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		release_sock(sk);
 
 		if (skb) {
-			int err = datagram_recv_ctl(sk, &msg, skb);
+			int err = ip6_datagram_recv_ctl(sk, &msg, skb);
 			kfree_skb(skb);
 			if (err)
 				return err;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 6cd29b1e8b92..70fa81449997 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -507,7 +507,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (np->rxopt.all)
-		datagram_recv_ctl(sk, msg, skb);
+		ip6_datagram_recv_ctl(sk, msg, skb);
 
 	err = copied;
 	if (flags & MSG_TRUNC)
@@ -822,8 +822,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 
-		err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
-					&hlimit, &tclass, &dontfrag);
+		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+					    &hlimit, &tclass, &dontfrag);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index dfaa29b8b293..fb083295ff0b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -443,7 +443,7 @@ try_again:
 			ip_cmsg_recv(msg, skb);
 	} else {
 		if (np->rxopt.all)
-			datagram_recv_ctl(sk, msg, skb);
+			ip6_datagram_recv_ctl(sk, msg, skb);
 	}
 
 	err = copied;
@@ -1153,8 +1153,8 @@ do_udp_sendmsg:
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(*opt);
 
-		err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
-					&hlimit, &tclass, &dontfrag);
+		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+					    &hlimit, &tclass, &dontfrag);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 927547171bc7..2316947ee772 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -554,8 +554,8 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 
-		err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
-					&hlimit, &tclass, &dontfrag);
+		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+					    &hlimit, &tclass, &dontfrag);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 0a148c9d2a5c..0f679df7d072 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -465,7 +465,7 @@ static int svc_udp_get_dest_address4(struct svc_rqst *rqstp,
 }
 
 /*
- * See net/ipv6/datagram.c : datagram_recv_ctl
+ * See net/ipv6/datagram.c : ip6_datagram_recv_ctl
  */
 static int svc_udp_get_dest_address6(struct svc_rqst *rqstp,
 				     struct cmsghdr *cmh)
-- 
cgit v1.2.3


From 8e72d37eb304d9ec5dfb51bc2d83e900b79ee764 Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Thu, 31 Jan 2013 01:02:25 +0000
Subject: ipv6: export ip6_datagram_recv_ctl

ip6_datagram_recv_ctl and ip6_datagram_send_ctl are used for handling IPv6
ancillary data.  Since ip6_datagram_send_ctl is already publicly exported for
use in modules, ip6_datagram_recv_ctl should also be available to support
ancillary data in the receive path.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/datagram.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 06fd2730838b..7a778b9a7b85 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -598,6 +598,7 @@ int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
 
 int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
 			  struct msghdr *msg, struct flowi6 *fl6,
-- 
cgit v1.2.3


From 700163db3de397a7557831e1eb9b8ce60e55590a Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Thu, 31 Jan 2013 01:02:26 +0000
Subject: l2tp: correctly handle ancillary data in the ip6 recv path

l2tp_ip6 is incorrectly using the IPv4-specific ip_cmsg_recv to handle
ancillary data.  This means that socket options such as IPV6_RECVPKTINFO are
not honoured in userspace.

Convert l2tp_ip6 to use the IPv6-specific handler.

Ref: net/ipv6/udp.c

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: Chris Elston <celston@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip6.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 2316947ee772..8ee4a86ae996 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -646,7 +646,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk,
 			    struct msghdr *msg, size_t len, int noblock,
 			    int flags, int *addr_len)
 {
-	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)msg->msg_name;
 	size_t copied = 0;
 	int err = -EOPNOTSUPP;
@@ -688,8 +688,8 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk,
 			lsa->l2tp_scope_id = IP6CB(skb)->iif;
 	}
 
-	if (inet->cmsg_flags)
-		ip_cmsg_recv(msg, skb);
+	if (np->rxopt.all)
+		ip6_datagram_recv_ctl(sk, msg, skb);
 
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
-- 
cgit v1.2.3


From 66555e92fb7a619188c02cceae4bbc414f15f96d Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 31 Jan 2013 11:16:46 -0800
Subject: tcp: detect SYN/data drop when F-RTO is disabled

On receiving the SYN-ACK, Fast Open checks icsk_retransmit for SYN
retransmission to detect SYN/data drops. But if F-RTO is disabled,
icsk_retransmit is reset at step D of tcp_fastretrans_alert() (
under tcp_ack()) before tcp_rcv_fastopen_synack(). The fix is to use
total_retrans instead which accounts for SYN retransmission regardless
the use of F-RTO.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 18f97ca76b00..8aca4ee95ff9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5649,8 +5649,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 	 * the remote receives only the retransmitted (regular) SYNs: either
 	 * the original SYN-data or the corresponding SYN-ACK is lost.
 	 */
-	syn_drop = (cookie->len <= 0 && data &&
-		    inet_csk(sk)->icsk_retransmits);
+	syn_drop = (cookie->len <= 0 && data && tp->total_retrans);
 
 	tcp_fastopen_cache_set(sk, mss, cookie, syn_drop);
 
-- 
cgit v1.2.3


From 973ec449bb4f2b8c514bacbcb4d9506fc31c8ce3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 2 Feb 2013 05:23:16 +0000
Subject: tcp: fix an infinite loop in tcp_slow_start()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 9dc274151a548 (tcp: fix ABC in tcp_slow_start()),
a nul snd_cwnd triggers an infinite loop in tcp_slow_start()

Avoid this infinite loop and log a one time error for further
analysis. FRTO code is suspected to cause this bug.

Reported-by: Pasi Kärkkäinen <pasik@iki.fi>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cong.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 291f2ed7cc31..cdf2e707bb10 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -310,6 +310,12 @@ void tcp_slow_start(struct tcp_sock *tp)
 {
 	int cnt; /* increase in packets */
 	unsigned int delta = 0;
+	u32 snd_cwnd = tp->snd_cwnd;
+
+	if (unlikely(!snd_cwnd)) {
+		pr_err_once("snd_cwnd is nul, please report this bug.\n");
+		snd_cwnd = 1U;
+	}
 
 	/* RFC3465: ABC Slow start
 	 * Increase only after a full MSS of bytes is acked
@@ -324,7 +330,7 @@ void tcp_slow_start(struct tcp_sock *tp)
 	if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh)
 		cnt = sysctl_tcp_max_ssthresh >> 1;	/* limited slow start */
 	else
-		cnt = tp->snd_cwnd;			/* exponential increase */
+		cnt = snd_cwnd;				/* exponential increase */
 
 	/* RFC3465: ABC
 	 * We MAY increase by 2 if discovered delayed ack
@@ -334,11 +340,11 @@ void tcp_slow_start(struct tcp_sock *tp)
 	tp->bytes_acked = 0;
 
 	tp->snd_cwnd_cnt += cnt;
-	while (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
-		tp->snd_cwnd_cnt -= tp->snd_cwnd;
+	while (tp->snd_cwnd_cnt >= snd_cwnd) {
+		tp->snd_cwnd_cnt -= snd_cwnd;
 		delta++;
 	}
-	tp->snd_cwnd = min(tp->snd_cwnd + delta, tp->snd_cwnd_clamp);
+	tp->snd_cwnd = min(snd_cwnd + delta, tp->snd_cwnd_clamp);
 }
 EXPORT_SYMBOL_GPL(tcp_slow_start);
 
-- 
cgit v1.2.3


From 2e5f421211ff76c17130b4597bc06df4eeead24f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 3 Feb 2013 09:13:05 +0000
Subject: tcp: frto should not set snd_cwnd to 0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 9dc274151a548 (tcp: fix ABC in tcp_slow_start())
uncovered a bug in FRTO code :
tcp_process_frto() is setting snd_cwnd to 0 if the number
of in flight packets is 0.

As Neal pointed out, if no packet is in flight we lost our
chance to disambiguate whether a loss timeout was spurious.

We should assume it was a proper loss.

Reported-by: Pasi Kärkkäinen <pasik@iki.fi>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Cc: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8aca4ee95ff9..680c4224ed96 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3484,7 +3484,8 @@ static bool tcp_process_frto(struct sock *sk, int flag)
 	    ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
 		tp->undo_marker = 0;
 
-	if (!before(tp->snd_una, tp->frto_highmark)) {
+	if (!before(tp->snd_una, tp->frto_highmark) ||
+	    !tcp_packets_in_flight(tp)) {
 		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
 		return true;
 	}
-- 
cgit v1.2.3


From 92df9b217ee2392024483ba5b85a88d92d60f3c1 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Fri, 1 Feb 2013 15:18:49 +0000
Subject: net: Fix inner_network_header assignment in skb-copy.

Use correct inner offset to set inner_network_offset.
Found by inspection.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a9a2ae3e2213..32443ebc3e89 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -683,7 +683,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->network_header	= old->network_header;
 	new->mac_header		= old->mac_header;
 	new->inner_transport_header = old->inner_transport_header;
-	new->inner_network_header = old->inner_transport_header;
+	new->inner_network_header = old->inner_network_header;
 	skb_dst_copy(new, old);
 	new->rxhash		= old->rxhash;
 	new->ooo_okay		= old->ooo_okay;
-- 
cgit v1.2.3


From 9665d5d62487e8e7b1f546c00e11107155384b9a Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil.sutter@viprinet.com>
Date: Fri, 1 Feb 2013 07:21:41 +0000
Subject: packet: fix leakage of tx_ring memory

When releasing a packet socket, the routine packet_set_ring() is reused
to free rings instead of allocating them. But when calling it for the
first time, it fills req->tp_block_nr with the value of rb->pg_vec_len
which in the second invocation makes it bail out since req->tp_block_nr
is greater zero but req->tp_block_size is zero.

This patch solves the problem by passing a zeroed auto-variable to
packet_set_ring() upon each invocation from packet_release().

As far as I can tell, this issue exists even since 69e3c75 (net: TX_RING
and packet mmap), i.e. the original inclusion of TX ring support into
af_packet, but applies only to sockets with both RX and TX ring
allocated, which is probably why this was unnoticed all the time.

Signed-off-by: Phil Sutter <phil.sutter@viprinet.com>
Cc: Johann Baudy <johann.baudy@gnu-log.net>
Cc: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e639645e8fec..c111bd0e083a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2361,13 +2361,15 @@ static int packet_release(struct socket *sock)
 
 	packet_flush_mclist(sk);
 
-	memset(&req_u, 0, sizeof(req_u));
-
-	if (po->rx_ring.pg_vec)
+	if (po->rx_ring.pg_vec) {
+		memset(&req_u, 0, sizeof(req_u));
 		packet_set_ring(sk, &req_u, 1, 0);
+	}
 
-	if (po->tx_ring.pg_vec)
+	if (po->tx_ring.pg_vec) {
+		memset(&req_u, 0, sizeof(req_u));
 		packet_set_ring(sk, &req_u, 1, 1);
+	}
 
 	fanout_release(sk);
 
-- 
cgit v1.2.3


From 848bf15f361c7c22da7998c81d50ed3dffbc827d Mon Sep 17 00:00:00 2001
From: Vijay Subramanian <subramanian.vijay@gmail.com>
Date: Thu, 31 Jan 2013 08:24:06 +0000
Subject: tcp: Update MIB counters for drops

This patch updates LINUX_MIB_LISTENDROPS in tcp_v4_conn_request() and
tcp_v4_err(). tcp_v4_conn_request() in particular can drop SYNs for various
reasons which are not currently tracked.

Signed-off-by: Vijay Subramanian <subramanian.vijay@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 629937d514eb..eadb693eef55 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -496,6 +496,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		 * errors returned from accept().
 		 */
 		inet_csk_reqsk_queue_drop(sk, req, prev);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 		goto out;
 
 	case TCP_SYN_SENT:
@@ -1502,7 +1503,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	 */
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
-		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 		goto drop;
 	}
 
@@ -1669,6 +1669,7 @@ drop_and_release:
 drop_and_free:
 	reqsk_free(req);
 drop:
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	return 0;
 }
 EXPORT_SYMBOL(tcp_v4_conn_request);
-- 
cgit v1.2.3


From 5f1e942cb45d06968b0ce94472d97014e0e1fdc9 Mon Sep 17 00:00:00 2001
From: Vijay Subramanian <subramanian.vijay@gmail.com>
Date: Thu, 31 Jan 2013 08:24:19 +0000
Subject: tcp: ipv6: Update MIB counters for drops

This patch updates LINUX_MIB_LISTENDROPS and LINUX_MIB_LISTENOVERFLOWS in
tcp_v6_conn_request() and tcp_v6_err(). tcp_v6_conn_request() in particular can
drop SYNs for various reasons which are not currently tracked.

Signed-off-by: Vijay Subramanian <subramanian.vijay@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 93825dd3a7c0..4f43537197ef 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -423,6 +423,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		}
 
 		inet_csk_reqsk_queue_drop(sk, req, prev);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 		goto out;
 
 	case TCP_SYN_SENT:
@@ -958,8 +959,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 			goto drop;
 	}
 
-	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
 		goto drop;
+	}
 
 	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
 	if (req == NULL)
@@ -1108,6 +1111,7 @@ drop_and_release:
 drop_and_free:
 	reqsk_free(req);
 drop:
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	return 0; /* don't send reset */
 }
 
-- 
cgit v1.2.3


From 6731d2095bd4aef18027c72ef845ab1087c3ba63 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Mon, 4 Feb 2013 02:14:25 +0000
Subject: tcp: fix for zero packets_in_flight was too broad
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are transients during normal FRTO procedure during which
the packets_in_flight can go to zero between write_queue state
updates and firing the resulting segments out. As FRTO processing
occurs during that window the check must be more precise to
not match "spuriously" :-). More specificly, e.g., when
packets_in_flight is zero but FLAG_DATA_ACKED is true the problematic
branch that set cwnd into zero would not be taken and new segments
might be sent out later.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Tested-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 680c4224ed96..ad70a962c20e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3484,8 +3484,7 @@ static bool tcp_process_frto(struct sock *sk, int flag)
 	    ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
 		tp->undo_marker = 0;
 
-	if (!before(tp->snd_una, tp->frto_highmark) ||
-	    !tcp_packets_in_flight(tp)) {
+	if (!before(tp->snd_una, tp->frto_highmark)) {
 		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
 		return true;
 	}
@@ -3505,6 +3504,11 @@ static bool tcp_process_frto(struct sock *sk, int flag)
 		}
 	} else {
 		if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
+			if (!tcp_packets_in_flight(tp)) {
+				tcp_enter_frto_loss(sk, 2, flag);
+				return true;
+			}
+
 			/* Prevent sending of new data. */
 			tp->snd_cwnd = min(tp->snd_cwnd,
 					   tcp_packets_in_flight(tp));
-- 
cgit v1.2.3


From 41ab3e31bd50b42c85ac0aa0469642866aee2a9a Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tt.rantala@gmail.com>
Date: Wed, 6 Feb 2013 03:24:02 +0000
Subject: ipv6/ip6_gre: fix error case handling in ip6gre_tunnel_xmit()

ip6gre_tunnel_xmit() is leaking the skb when we hit this error branch,
and the -1 return value from this function is bogus. Use the error
handling we already have in place in ip6gre_tunnel_xmit() for this error
case to fix this.

Signed-off-by: Tommi Rantala <tt.rantala@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index c727e4712751..131dd097736d 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -960,7 +960,7 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
 	int ret;
 
 	if (!ip6_tnl_xmit_ctl(t))
-		return -1;
+		goto tx_err;
 
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-- 
cgit v1.2.3


From 586c31f3bf04c290dc0a0de7fc91d20aa9a5ee53 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 7 Feb 2013 00:55:37 +0000
Subject: net: sctp: sctp_auth_key_put: use kzfree instead of kfree

For sensitive data like keying material, it is common practice to zero
out keys before returning the memory back to the allocator. Thus, use
kzfree instead of kfree.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/auth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 159b9bc5d633..d8420ae614dc 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -71,7 +71,7 @@ void sctp_auth_key_put(struct sctp_auth_bytes *key)
 		return;
 
 	if (atomic_dec_and_test(&key->refcnt)) {
-		kfree(key);
+		kzfree(key);
 		SCTP_DBG_OBJCNT_DEC(keys);
 	}
 }
-- 
cgit v1.2.3


From 87c084a980325d877dc7e388b8f2f26d5d3b4d01 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 7 Feb 2013 14:00:34 +0000
Subject: l2tp: dont play with skb->truesize

Andrew Savchenko reported a DNS failure and we diagnosed that
some UDP sockets were unable to send more packets because their
sk_wmem_alloc was corrupted after a while (tx_queue column in
following trace)

$ cat /proc/net/udp
  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
...
  459: 00000000:0270 00000000:0000 07 00000000:00000000 00:00000000 00000000     0        0 4507 2 ffff88003d612380 0
  466: 00000000:0277 00000000:0000 07 00000000:00000000 00:00000000 00000000     0        0 4802 2 ffff88003d613180 0
  470: 076A070A:007B 00000000:0000 07 FFFF4600:00000000 00:00000000 00000000   123        0 5552 2 ffff880039974380 0
  470: 010213AC:007B 00000000:0000 07 00000000:00000000 00:00000000 00000000     0        0 4986 2 ffff88003dbd3180 0
  470: 010013AC:007B 00000000:0000 07 00000000:00000000 00:00000000 00000000     0        0 4985 2 ffff88003dbd2e00 0
  470: 00FCA8C0:007B 00000000:0000 07 FFFFFB00:00000000 00:00000000 00000000     0        0 4984 2 ffff88003dbd2a80 0
...

Playing with skb->truesize is tricky, especially when
skb is attached to a socket, as we can fool memory charging.

Just remove this code, its not worth trying to be ultra
precise in xmit path.

Reported-by: Andrew Savchenko <bircoph@gmail.com>
Tested-by: Andrew Savchenko <bircoph@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 6 ------
 net/l2tp/l2tp_ppp.c  | 6 ------
 2 files changed, 12 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 06389d5ff120..2ac884d0e89b 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1168,8 +1168,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 	struct udphdr *uh;
 	struct inet_sock *inet;
 	__wsum csum;
-	int old_headroom;
-	int new_headroom;
 	int headroom;
 	int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
 	int udp_len;
@@ -1181,16 +1179,12 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 	 */
 	headroom = NET_SKB_PAD + sizeof(struct iphdr) +
 		uhlen + hdr_len;
-	old_headroom = skb_headroom(skb);
 	if (skb_cow_head(skb, headroom)) {
 		kfree_skb(skb);
 		return NET_XMIT_DROP;
 	}
 
-	new_headroom = skb_headroom(skb);
 	skb_orphan(skb);
-	skb->truesize += new_headroom - old_headroom;
-
 	/* Setup L2TP header */
 	session->build_header(session, __skb_push(skb, hdr_len));
 
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 286366ef8930..716605c241f4 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -388,8 +388,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel;
 	struct pppol2tp_session *ps;
-	int old_headroom;
-	int new_headroom;
 	int uhlen, headroom;
 
 	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
@@ -408,7 +406,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	if (tunnel == NULL)
 		goto abort_put_sess;
 
-	old_headroom = skb_headroom(skb);
 	uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
 	headroom = NET_SKB_PAD +
 		   sizeof(struct iphdr) + /* IP header */
@@ -418,9 +415,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	if (skb_cow_head(skb, headroom))
 		goto abort_put_sess_tun;
 
-	new_headroom = skb_headroom(skb);
-	skb->truesize += new_headroom - old_headroom;
-
 	/* Setup PPP header */
 	__skb_push(skb, sizeof(ppph));
 	skb->data[0] = ppph[0];
-- 
cgit v1.2.3


From 6ba542a291a5e558603ac51cda9bded347ce7627 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Fri, 8 Feb 2013 03:04:34 +0000
Subject: net: sctp: sctp_setsockopt_auth_key: use kzfree instead of kfree

In sctp_setsockopt_auth_key, we create a temporary copy of the user
passed shared auth key for the endpoint or association and after
internal setup, we free it right away. Since it's sensitive data, we
should zero out the key before returning the memory back to the
allocator. Thus, use kzfree instead of kfree, just as we do in
sctp_auth_key_put().

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9e65758cb038..cedd9bf67b8c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3390,7 +3390,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
 
 	ret = sctp_auth_set_key(sctp_sk(sk)->ep, asoc, authkey);
 out:
-	kfree(authkey);
+	kzfree(authkey);
 	return ret;
 }
 
-- 
cgit v1.2.3


From b5c37fe6e24eec194bb29d22fdd55d73bcc709bf Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Fri, 8 Feb 2013 03:04:35 +0000
Subject: net: sctp: sctp_endpoint_free: zero out secret key data

On sctp_endpoint_destroy, previously used sensitive keying material
should be zeroed out before the memory is returned, as we already do
with e.g. auth keys when released.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/endpointola.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 17a001bac2cc..1a9c5fb77310 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -249,6 +249,8 @@ void sctp_endpoint_free(struct sctp_endpoint *ep)
 /* Final destructor for endpoint.  */
 static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
 {
+	int i;
+
 	SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return);
 
 	/* Free up the HMAC transform. */
@@ -271,6 +273,9 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
 	sctp_inq_free(&ep->base.inqueue);
 	sctp_bind_addr_free(&ep->base.bind_addr);
 
+	for (i = 0; i < SCTP_HOW_MANY_SECRETS; ++i)
+		memset(&ep->secret_key[i], 0, SCTP_SECRET_SIZE);
+
 	/* Remove and free the port */
 	if (sctp_sk(ep->base.sk)->bind_hash)
 		sctp_put_port(ep->base.sk);
-- 
cgit v1.2.3