summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/batman-adv/unicast.c10
-rw-r--r--net/bridge/br_fdb.c10
-rw-r--r--net/bridge/br_netlink.c4
-rw-r--r--net/bridge/br_vlan.c4
-rw-r--r--net/ieee802154/6lowpan.c286
-rw-r--r--net/ieee802154/6lowpan.h20
-rw-r--r--net/ipv4/ip_tunnel.c8
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c2
-rw-r--r--net/ipv4/route.c8
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/tcp_input.c32
-rw-r--r--net/ipv4/tcp_ipv4.c5
-rw-r--r--net/ipv4/tcp_probe.c82
-rw-r--r--net/ipv6/addrconf.c88
-rw-r--r--net/ipv6/ip6_tunnel.c4
-rw-r--r--net/ipv6/mcast.c30
-rw-r--r--net/ipv6/ndisc.c4
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c2
-rw-r--r--net/ipv6/reassembly.c5
-rw-r--r--net/ipv6/route.c21
-rw-r--r--net/key/af_key.c14
-rw-r--r--net/netfilter/Kconfig22
-rw-r--r--net/netfilter/Makefile3
-rw-r--r--net/netfilter/core.c7
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c6
-rw-r--r--net/netfilter/nf_conntrack_core.c69
-rw-r--r--net/netfilter/nf_conntrack_labels.c4
-rw-r--r--net/netfilter/nf_conntrack_netlink.c269
-rw-r--r--net/netfilter/nf_conntrack_proto.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c4
-rw-r--r--net/netfilter/nf_nat_core.c6
-rw-r--r--net/netfilter/nf_nat_helper.c28
-rw-r--r--net/netfilter/nf_tproxy_core.c62
-rw-r--r--net/netfilter/nfnetlink_queue_core.c11
-rw-r--r--net/netfilter/nfnetlink_queue_ct.c15
-rw-r--r--net/netfilter/xt_TPROXY.c169
-rw-r--r--net/netfilter/xt_addrtype.c2
-rw-r--r--net/netfilter/xt_socket.c66
-rw-r--r--net/netlink/genetlink.c7
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/sctp/probe.c9
-rw-r--r--net/wireless/nl80211.c22
-rw-r--r--net/wireless/sme.c10
-rw-r--r--net/xfrm/xfrm_policy.c4
-rw-r--r--net/xfrm/xfrm_state.c15
46 files changed, 917 insertions, 553 deletions
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index 688a0419756b..857e1b8349ee 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -432,12 +432,16 @@ find_router:
switch (packet_type) {
case BATADV_UNICAST:
- batadv_unicast_prepare_skb(skb, orig_node);
+ if (!batadv_unicast_prepare_skb(skb, orig_node))
+ goto out;
+
header_len = sizeof(struct batadv_unicast_packet);
break;
case BATADV_UNICAST_4ADDR:
- batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node,
- packet_subtype);
+ if (!batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node,
+ packet_subtype))
+ goto out;
+
header_len = sizeof(struct batadv_unicast_4addr_packet);
break;
default:
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 60aca9109a50..ffd5874f2592 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -161,7 +161,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
if (!pv)
return;
- for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+ for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) {
f = __br_fdb_get(br, br->dev->dev_addr, vid);
if (f && f->is_local && !f->dst)
fdb_delete(br, f);
@@ -730,7 +730,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
/* VID was specified, so use it. */
err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
} else {
- if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+ if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) {
err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
goto out;
}
@@ -739,7 +739,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
* specify a VLAN. To be nice, add/update entry for every
* vlan on this port.
*/
- for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
if (err)
goto out;
@@ -817,7 +817,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
err = __br_fdb_delete(p, addr, vid);
} else {
- if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+ if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) {
err = __br_fdb_delete(p, addr, 0);
goto out;
}
@@ -827,7 +827,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
* vlan on this port.
*/
err = -ENOENT;
- for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
err &= __br_fdb_delete(p, addr, vid);
}
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 1fc30abd3a52..b9259efa636e 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -132,7 +132,7 @@ static int br_fill_ifinfo(struct sk_buff *skb,
else
pv = br_get_vlan_info(br);
- if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN))
+ if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID))
goto done;
af = nla_nest_start(skb, IFLA_AF_SPEC);
@@ -140,7 +140,7 @@ static int br_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure;
pvid = br_get_pvid(pv);
- for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
vinfo.vid = vid;
vinfo.flags = 0;
if (vid == pvid)
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index bd58b45f5f90..9a9ffe7e4019 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -108,7 +108,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
clear_bit(vid, v->vlan_bitmap);
v->num_vlans--;
- if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+ if (bitmap_empty(v->vlan_bitmap, VLAN_N_VID)) {
if (v->port_idx)
rcu_assign_pointer(v->parent.port->vlan_info, NULL);
else
@@ -122,7 +122,7 @@ static void __vlan_flush(struct net_port_vlans *v)
{
smp_wmb();
v->pvid = 0;
- bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN);
+ bitmap_zero(v->vlan_bitmap, VLAN_N_VID);
if (v->port_idx)
rcu_assign_pointer(v->parent.port->vlan_info, NULL);
else
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 3b9d5f20bd1c..c85e71e0c7ff 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -67,39 +67,6 @@ static const u8 lowpan_ttl_values[] = {0, 1, 64, 255};
static LIST_HEAD(lowpan_devices);
-/*
- * Uncompression of linklocal:
- * 0 -> 16 bytes from packet
- * 1 -> 2 bytes from prefix - bunch of zeroes and 8 from packet
- * 2 -> 2 bytes from prefix - zeroes + 2 from packet
- * 3 -> 2 bytes from prefix - infer 8 bytes from lladdr
- *
- * NOTE: => the uncompress function does change 0xf to 0x10
- * NOTE: 0x00 => no-autoconfig => unspecified
- */
-static const u8 lowpan_unc_llconf[] = {0x0f, 0x28, 0x22, 0x20};
-
-/*
- * Uncompression of ctx-based:
- * 0 -> 0 bits from packet [unspecified / reserved]
- * 1 -> 8 bytes from prefix - bunch of zeroes and 8 from packet
- * 2 -> 8 bytes from prefix - zeroes + 2 from packet
- * 3 -> 8 bytes from prefix - infer 8 bytes from lladdr
- */
-static const u8 lowpan_unc_ctxconf[] = {0x00, 0x88, 0x82, 0x80};
-
-/*
- * Uncompression of ctx-base
- * 0 -> 0 bits from packet
- * 1 -> 2 bytes from prefix - bunch of zeroes 5 from packet
- * 2 -> 2 bytes from prefix - zeroes + 3 from packet
- * 3 -> 2 bytes from prefix - infer 1 bytes from lladdr
- */
-static const u8 lowpan_unc_mxconf[] = {0x0f, 0x25, 0x23, 0x21};
-
-/* Link local prefix */
-static const u8 lowpan_llprefix[] = {0xfe, 0x80};
-
/* private device info */
struct lowpan_dev_info {
struct net_device *real_dev; /* real WPAN device ptr */
@@ -191,55 +158,177 @@ lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, const struct in6_addr *ipaddr,
return rol8(val, shift);
}
-static void
-lowpan_uip_ds6_set_addr_iid(struct in6_addr *ipaddr, unsigned char *lladdr)
+/*
+ * Uncompress address function for source and
+ * destination address(non-multicast).
+ *
+ * address_mode is sam value or dam value.
+ */
+static int
+lowpan_uncompress_addr(struct sk_buff *skb,
+ struct in6_addr *ipaddr,
+ const u8 address_mode,
+ const struct ieee802154_addr *lladdr)
{
- memcpy(&ipaddr->s6_addr[8], lladdr, IEEE802154_ADDR_LEN);
- /* second bit-flip (Universe/Local) is done according RFC2464 */
- ipaddr->s6_addr[8] ^= 0x02;
+ bool fail;
+
+ switch (address_mode) {
+ case LOWPAN_IPHC_ADDR_00:
+ /* for global link addresses */
+ fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16);
+ break;
+ case LOWPAN_IPHC_ADDR_01:
+ /* fe:80::XXXX:XXXX:XXXX:XXXX */
+ ipaddr->s6_addr[0] = 0xFE;
+ ipaddr->s6_addr[1] = 0x80;
+ fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8);
+ break;
+ case LOWPAN_IPHC_ADDR_02:
+ /* fe:80::ff:fe00:XXXX */
+ ipaddr->s6_addr[0] = 0xFE;
+ ipaddr->s6_addr[1] = 0x80;
+ ipaddr->s6_addr[11] = 0xFF;
+ ipaddr->s6_addr[12] = 0xFE;
+ fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2);
+ break;
+ case LOWPAN_IPHC_ADDR_03:
+ fail = false;
+ switch (lladdr->addr_type) {
+ case IEEE802154_ADDR_LONG:
+ /* fe:80::XXXX:XXXX:XXXX:XXXX
+ * \_________________/
+ * hwaddr
+ */
+ ipaddr->s6_addr[0] = 0xFE;
+ ipaddr->s6_addr[1] = 0x80;
+ memcpy(&ipaddr->s6_addr[8], lladdr->hwaddr,
+ IEEE802154_ADDR_LEN);
+ /* second bit-flip (Universe/Local)
+ * is done according RFC2464
+ */
+ ipaddr->s6_addr[8] ^= 0x02;
+ break;
+ case IEEE802154_ADDR_SHORT:
+ /* fe:80::ff:fe00:XXXX
+ * \__/
+ * short_addr
+ *
+ * Universe/Local bit is zero.
+ */
+ ipaddr->s6_addr[0] = 0xFE;
+ ipaddr->s6_addr[1] = 0x80;
+ ipaddr->s6_addr[11] = 0xFF;
+ ipaddr->s6_addr[12] = 0xFE;
+ ipaddr->s6_addr16[7] = htons(lladdr->short_addr);
+ break;
+ default:
+ pr_debug("Invalid addr_type set\n");
+ return -EINVAL;
+ }
+ break;
+ default:
+ pr_debug("Invalid address mode value: 0x%x\n", address_mode);
+ return -EINVAL;
+ }
+
+ if (fail) {
+ pr_debug("Failed to fetch skb data\n");
+ return -EIO;
+ }
+
+ lowpan_raw_dump_inline(NULL, "Reconstructed ipv6 addr is:\n",
+ ipaddr->s6_addr, 16);
+
+ return 0;
}
-/*
- * Uncompress addresses based on a prefix and a postfix with zeroes in
- * between. If the postfix is zero in length it will use the link address
- * to configure the IP address (autoconf style).
- * pref_post_count takes a byte where the first nibble specify prefix count
- * and the second postfix count (NOTE: 15/0xf => 16 bytes copy).
+/* Uncompress address function for source context
+ * based address(non-multicast).
*/
static int
-lowpan_uncompress_addr(struct sk_buff *skb, struct in6_addr *ipaddr,
- u8 const *prefix, u8 pref_post_count, unsigned char *lladdr)
+lowpan_uncompress_context_based_src_addr(struct sk_buff *skb,
+ struct in6_addr *ipaddr,
+ const u8 sam)
{
- u8 prefcount = pref_post_count >> 4;
- u8 postcount = pref_post_count & 0x0f;
-
- /* full nibble 15 => 16 */
- prefcount = (prefcount == 15 ? 16 : prefcount);
- postcount = (postcount == 15 ? 16 : postcount);
-
- if (lladdr)
- lowpan_raw_dump_inline(__func__, "linklocal address",
- lladdr, IEEE802154_ADDR_LEN);
- if (prefcount > 0)
- memcpy(ipaddr, prefix, prefcount);
-
- if (prefcount + postcount < 16)
- memset(&ipaddr->s6_addr[prefcount], 0,
- 16 - (prefcount + postcount));
-
- if (postcount > 0) {
- memcpy(&ipaddr->s6_addr[16 - postcount], skb->data, postcount);
- skb_pull(skb, postcount);
- } else if (prefcount > 0) {
- if (lladdr == NULL)
- return -EINVAL;
+ switch (sam) {
+ case LOWPAN_IPHC_ADDR_00:
+ /* unspec address ::
+ * Do nothing, address is already ::
+ */
+ break;
+ case LOWPAN_IPHC_ADDR_01:
+ /* TODO */
+ case LOWPAN_IPHC_ADDR_02:
+ /* TODO */
+ case LOWPAN_IPHC_ADDR_03:
+ /* TODO */
+ netdev_warn(skb->dev, "SAM value 0x%x not supported\n", sam);
+ return -EINVAL;
+ default:
+ pr_debug("Invalid sam value: 0x%x\n", sam);
+ return -EINVAL;
+ }
+
+ lowpan_raw_dump_inline(NULL,
+ "Reconstructed context based ipv6 src addr is:\n",
+ ipaddr->s6_addr, 16);
+
+ return 0;
+}
- /* no IID based configuration if no prefix and no data */
- lowpan_uip_ds6_set_addr_iid(ipaddr, lladdr);
+/* Uncompress function for multicast destination address,
+ * when M bit is set.
+ */
+static int
+lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
+ struct in6_addr *ipaddr,
+ const u8 dam)
+{
+ bool fail;
+
+ switch (dam) {
+ case LOWPAN_IPHC_DAM_00:
+ /* 00: 128 bits. The full address
+ * is carried in-line.
+ */
+ fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16);
+ break;
+ case LOWPAN_IPHC_DAM_01:
+ /* 01: 48 bits. The address takes
+ * the form ffXX::00XX:XXXX:XXXX.
+ */
+ ipaddr->s6_addr[0] = 0xFF;
+ fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1);
+ fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[11], 5);
+ break;
+ case LOWPAN_IPHC_DAM_10:
+ /* 10: 32 bits. The address takes
+ * the form ffXX::00XX:XXXX.
+ */
+ ipaddr->s6_addr[0] = 0xFF;
+ fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1);
+ fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[13], 3);
+ break;
+ case LOWPAN_IPHC_DAM_11:
+ /* 11: 8 bits. The address takes
+ * the form ff02::00XX.
+ */
+ ipaddr->s6_addr[0] = 0xFF;
+ ipaddr->s6_addr[1] = 0x02;
+ fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[15], 1);
+ break;
+ default:
+ pr_debug("DAM value has a wrong value: 0x%x\n", dam);
+ return -EINVAL;
+ }
+
+ if (fail) {
+ pr_debug("Failed to fetch skb data\n");
+ return -EIO;
}
- pr_debug("uncompressing %d + %d => ", prefcount, postcount);
- lowpan_raw_dump_inline(NULL, NULL, ipaddr->s6_addr, 16);
+ lowpan_raw_dump_inline(NULL, "Reconstructed ipv6 multicast addr is:\n",
+ ipaddr->s6_addr, 16);
return 0;
}
@@ -702,6 +791,12 @@ lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag)
skb_reserve(frame->skb, sizeof(struct ipv6hdr));
skb_put(frame->skb, frame->length);
+ /* copy the first control block to keep a
+ * trace of the link-layer addresses in case
+ * of a link-local compressed address
+ */
+ memcpy(frame->skb->cb, skb->cb, sizeof(skb->cb));
+
init_timer(&frame->timer);
/* time out is the same as for ipv6 - 60 sec */
frame->timer.expires = jiffies + LOWPAN_FRAG_TIMEOUT;
@@ -723,9 +818,9 @@ frame_err:
static int
lowpan_process_data(struct sk_buff *skb)
{
- struct ipv6hdr hdr;
+ struct ipv6hdr hdr = {};
u8 tmp, iphc0, iphc1, num_context = 0;
- u8 *_saddr, *_daddr;
+ const struct ieee802154_addr *_saddr, *_daddr;
int err;
lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data,
@@ -828,8 +923,8 @@ lowpan_process_data(struct sk_buff *skb)
if (lowpan_fetch_skb_u8(skb, &iphc1))
goto drop;
- _saddr = mac_cb(skb)->sa.hwaddr;
- _daddr = mac_cb(skb)->da.hwaddr;
+ _saddr = &mac_cb(skb)->sa;
+ _daddr = &mac_cb(skb)->da;
pr_debug("iphc0 = %02x, iphc1 = %02x\n", iphc0, iphc1);
@@ -868,8 +963,6 @@ lowpan_process_data(struct sk_buff *skb)
hdr.priority = ((tmp >> 2) & 0x0f);
hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30);
- hdr.flow_lbl[1] = 0;
- hdr.flow_lbl[2] = 0;
break;
/*
* Flow Label carried in-line
@@ -885,10 +978,6 @@ lowpan_process_data(struct sk_buff *skb)
break;
/* Traffic Class and Flow Label are elided */
case 3: /* 11b */
- hdr.priority = 0;
- hdr.flow_lbl[0] = 0;
- hdr.flow_lbl[1] = 0;
- hdr.flow_lbl[2] = 0;
break;
default:
break;
@@ -915,10 +1004,18 @@ lowpan_process_data(struct sk_buff *skb)
/* Extract SAM to the tmp variable */
tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03;
- /* Source address uncompression */
- pr_debug("source address stateless compression\n");
- err = lowpan_uncompress_addr(skb, &hdr.saddr, lowpan_llprefix,
- lowpan_unc_llconf[tmp], skb->data);
+ if (iphc1 & LOWPAN_IPHC_SAC) {
+ /* Source address context based uncompression */
+ pr_debug("SAC bit is set. Handle context based source address.\n");
+ err = lowpan_uncompress_context_based_src_addr(
+ skb, &hdr.saddr, tmp);
+ } else {
+ /* Source address uncompression */
+ pr_debug("source address stateless compression\n");
+ err = lowpan_uncompress_addr(skb, &hdr.saddr, tmp, _saddr);
+ }
+
+ /* Check on error of previous branch */
if (err)
goto drop;
@@ -931,23 +1028,14 @@ lowpan_process_data(struct sk_buff *skb)
pr_debug("dest: context-based mcast compression\n");
/* TODO: implement this */
} else {
- u8 prefix[] = {0xff, 0x02};
-
- pr_debug("dest: non context-based mcast compression\n");
- if (0 < tmp && tmp < 3) {
- if (lowpan_fetch_skb_u8(skb, &prefix[1]))
- goto drop;
- }
-
- err = lowpan_uncompress_addr(skb, &hdr.daddr, prefix,
- lowpan_unc_mxconf[tmp], NULL);
+ err = lowpan_uncompress_multicast_daddr(
+ skb, &hdr.daddr, tmp);
if (err)
goto drop;
}
} else {
pr_debug("dest: stateless compression\n");
- err = lowpan_uncompress_addr(skb, &hdr.daddr, lowpan_llprefix,
- lowpan_unc_llconf[tmp], skb->data);
+ err = lowpan_uncompress_addr(skb, &hdr.daddr, tmp, _daddr);
if (err)
goto drop;
}
diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
index 4b8f917658b5..2869c0526dad 100644
--- a/net/ieee802154/6lowpan.h
+++ b/net/ieee802154/6lowpan.h
@@ -193,10 +193,12 @@
/* Values of fields within the IPHC encoding second byte */
#define LOWPAN_IPHC_CID 0x80
+#define LOWPAN_IPHC_ADDR_00 0x00
+#define LOWPAN_IPHC_ADDR_01 0x01
+#define LOWPAN_IPHC_ADDR_02 0x02
+#define LOWPAN_IPHC_ADDR_03 0x03
+
#define LOWPAN_IPHC_SAC 0x40
-#define LOWPAN_IPHC_SAM_00 0x00
-#define LOWPAN_IPHC_SAM_01 0x10
-#define LOWPAN_IPHC_SAM_10 0x20
#define LOWPAN_IPHC_SAM 0x30
#define LOWPAN_IPHC_SAM_BIT 4
@@ -230,4 +232,16 @@
dest = 16 bit inline */
#define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */
+static inline bool lowpan_fetch_skb(struct sk_buff *skb,
+ void *data, const unsigned int len)
+{
+ if (unlikely(!pskb_may_pull(skb, len)))
+ return true;
+
+ skb_copy_from_linear_data(skb, data, len);
+ skb_pull(skb, len);
+
+ return false;
+}
+
#endif /* __6LOWPAN_H__ */
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index a4d9126c7b51..830de3f4e293 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -857,13 +857,11 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ if (!IS_ERR(itn->fb_tunnel_dev))
+ itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
rtnl_unlock();
- if (IS_ERR(itn->fb_tunnel_dev))
- return PTR_ERR(itn->fb_tunnel_dev);
-
- return 0;
+ return PTR_RET(itn->fb_tunnel_dev);
}
EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 30e4de940567..00352ce0f0de 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -118,7 +118,7 @@ static int masq_device_event(struct notifier_block *this,
NF_CT_ASSERT(dev->ifindex != 0);
nf_ct_iterate_cleanup(net, device_cmp,
- (void *)(long)dev->ifindex);
+ (void *)(long)dev->ifindex, 0, 0);
}
return NOTIFY_DONE;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e805481eff72..727f4365bcdf 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,7 +112,8 @@
#define RT_FL_TOS(oldflp4) \
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
-#define IP_MAX_MTU 0xFFF0
+/* IPv4 datagram length is stored into 16bit field (tot_len) */
+#define IP_MAX_MTU 0xFFFF
#define RT_GC_TIMEOUT (300*HZ)
@@ -1227,10 +1228,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
mtu = 576;
}
- if (mtu > IP_MAX_MTU)
- mtu = IP_MAX_MTU;
-
- return mtu;
+ return min_t(unsigned int, mtu, IP_MAX_MTU);
}
static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ab64eea042fa..4e42c03859f4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1117,6 +1117,13 @@ new_segment:
goto wait_for_memory;
/*
+ * All packets are restored as if they have
+ * already been sent.
+ */
+ if (tp->repair)
+ TCP_SKB_CB(skb)->when = tcp_time_stamp;
+
+ /*
* Check whether we can use HW checksum.
*/
if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e965cc7b87ff..ec492eae0cd7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2485,8 +2485,6 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
tcp_try_keep_open(sk);
- if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
- tcp_moderate_cwnd(tp);
} else {
tcp_cwnd_reduction(sk, prior_unsacked, 0);
}
@@ -3128,11 +3126,24 @@ static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
}
+/* Decide wheather to run the increase function of congestion control. */
static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
{
- const struct tcp_sock *tp = tcp_sk(sk);
- return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
- !tcp_in_cwnd_reduction(sk);
+ if (tcp_in_cwnd_reduction(sk))
+ return false;
+
+ /* If reordering is high then always grow cwnd whenever data is
+ * delivered regardless of its ordering. Otherwise stay conservative
+ * and only grow cwnd on in-order delivery in Open state, and retain
+ * cwnd in Disordered state (RFC5681). A stretched ACK with
+ * new SACK or ECE mark may first advance cwnd here and later reduce
+ * cwnd in tcp_fastretrans_alert() based on more states.
+ */
+ if (tcp_sk(sk)->reordering > sysctl_tcp_reordering)
+ return flag & FLAG_FORWARD_PROGRESS;
+
+ return inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
+ flag & FLAG_DATA_ACKED;
}
/* Check that window update is acceptable.
@@ -3352,18 +3363,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt);
acked -= tp->packets_out;
+ /* Advance cwnd if state allows */
+ if (tcp_may_raise_cwnd(sk, flag))
+ tcp_cong_avoid(sk, ack, prior_in_flight);
+
if (tcp_ack_is_dubious(sk, flag)) {
- /* Advance CWND, if state allows this. */
- if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
- tcp_cong_avoid(sk, ack, prior_in_flight);
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
tcp_fastretrans_alert(sk, acked, prior_unsacked,
is_dupack, flag);
- } else {
- if (flag & FLAG_DATA_ACKED)
- tcp_cong_avoid(sk, ack, prior_in_flight);
}
-
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 05a3d45d3102..09d45d718973 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -821,8 +821,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
*/
static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
- u16 queue_mapping,
- bool nocache)
+ u16 queue_mapping)
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct flowi4 fl4;
@@ -852,7 +851,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
{
- int res = tcp_v4_send_synack(sk, NULL, req, 0, false);
+ int res = tcp_v4_send_synack(sk, NULL, req, 0);
if (!res)
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index d4943f67aff2..622a4377b397 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -46,6 +46,10 @@ static unsigned int bufsize __read_mostly = 4096;
MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)");
module_param(bufsize, uint, 0);
+static unsigned int fwmark __read_mostly = 0;
+MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)");
+module_param(fwmark, uint, 0);
+
static int full __read_mostly;
MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)");
module_param(full, int, 0);
@@ -54,12 +58,16 @@ static const char procname[] = "tcpprobe";
struct tcp_log {
ktime_t tstamp;
- __be32 saddr, daddr;
- __be16 sport, dport;
+ union {
+ struct sockaddr raw;
+ struct sockaddr_in v4;
+ struct sockaddr_in6 v6;
+ } src, dst;
u16 length;
u32 snd_nxt;
u32 snd_una;
u32 snd_wnd;
+ u32 rcv_wnd;
u32 snd_cwnd;
u32 ssthresh;
u32 srtt;
@@ -86,19 +94,45 @@ static inline int tcp_probe_avail(void)
return bufsize - tcp_probe_used() - 1;
}
+#define tcp_probe_copy_fl_to_si4(inet, si4, mem) \
+ do { \
+ si4.sin_family = AF_INET; \
+ si4.sin_port = inet->inet_##mem##port; \
+ si4.sin_addr.s_addr = inet->inet_##mem##addr; \
+ } while (0) \
+
+#if IS_ENABLED(CONFIG_IPV6)
+#define tcp_probe_copy_fl_to_si6(inet, si6, mem) \
+ do { \
+ struct ipv6_pinfo *pi6 = inet->pinet6; \
+ si6.sin6_family = AF_INET6; \
+ si6.sin6_port = inet->inet_##mem##port; \
+ si6.sin6_addr = pi6->mem##addr; \
+ si6.sin6_flowinfo = 0; /* No need here. */ \
+ si6.sin6_scope_id = 0; /* No need here. */ \
+ } while (0)
+#else
+#define tcp_probe_copy_fl_to_si6(fl, si6, mem) \
+ do { \
+ memset(&si6, 0, sizeof(si6)); \
+ } while (0)
+#endif
+
/*
* Hook inserted to be called before each receive packet.
* Note: arguments must match tcp_rcv_established()!
*/
static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- struct tcphdr *th, unsigned int len)
+ const struct tcphdr *th, unsigned int len)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
- /* Only update if port matches */
- if ((port == 0 || ntohs(inet->inet_dport) == port ||
- ntohs(inet->inet_sport) == port) &&
+ /* Only update if port or skb mark matches */
+ if (((port == 0 && fwmark == 0) ||
+ ntohs(inet->inet_dport) == port ||
+ ntohs(inet->inet_sport) == port ||
+ (fwmark > 0 && skb->mark == fwmark)) &&
(full || tp->snd_cwnd != tcp_probe.lastcwnd)) {
spin_lock(&tcp_probe.lock);
@@ -107,15 +141,25 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
struct tcp_log *p = tcp_probe.log + tcp_probe.head;
p->tstamp = ktime_get();
- p->saddr = inet->inet_saddr;
- p->sport = inet->inet_sport;
- p->daddr = inet->inet_daddr;
- p->dport = inet->inet_dport;
+ switch (sk->sk_family) {
+ case AF_INET:
+ tcp_probe_copy_fl_to_si4(inet, p->src.v4, s);
+ tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d);
+ break;
+ case AF_INET6:
+ tcp_probe_copy_fl_to_si6(inet, p->src.v6, s);
+ tcp_probe_copy_fl_to_si6(inet, p->dst.v6, d);
+ break;
+ default:
+ BUG();
+ }
+
p->length = skb->len;
p->snd_nxt = tp->snd_nxt;
p->snd_una = tp->snd_una;
p->snd_cwnd = tp->snd_cwnd;
p->snd_wnd = tp->snd_wnd;
+ p->rcv_wnd = tp->rcv_wnd;
p->ssthresh = tcp_current_ssthresh(sk);
p->srtt = tp->srtt >> 3;
@@ -157,13 +201,11 @@ static int tcpprobe_sprint(char *tbuf, int n)
= ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start));
return scnprintf(tbuf, n,
- "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n",
+ "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n",
(unsigned long) tv.tv_sec,
(unsigned long) tv.tv_nsec,
- &p->saddr, ntohs(p->sport),
- &p->daddr, ntohs(p->dport),
- p->length, p->snd_nxt, p->snd_una,
- p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt);
+ &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una,
+ p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd);
}
static ssize_t tcpprobe_read(struct file *file, char __user *buf,
@@ -223,6 +265,13 @@ static __init int tcpprobe_init(void)
{
int ret = -ENOMEM;
+ /* Warning: if the function signature of tcp_rcv_established,
+ * has been changed, you also have to change the signature of
+ * jtcp_rcv_established, otherwise you end up right here!
+ */
+ BUILD_BUG_ON(__same_type(tcp_rcv_established,
+ jtcp_rcv_established) == 0);
+
init_waitqueue_head(&tcp_probe.wait);
spin_lock_init(&tcp_probe.lock);
@@ -241,7 +290,8 @@ static __init int tcpprobe_init(void)
if (ret)
goto err1;
- pr_info("probe registered (port=%d) bufsize=%u\n", port, bufsize);
+ pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n",
+ port, fwmark, bufsize);
return 0;
err1:
remove_proc_entry(procname, init_net.proc_net);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ad12f7c43f25..2d6d1793bbfe 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -99,9 +99,9 @@
#define ACONF_DEBUG 2
#if ACONF_DEBUG >= 3
-#define ADBG(x) printk x
+#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
#else
-#define ADBG(x)
+#define ADBG(fmt, ...) do { if (0) printk(fmt, ##__VA_ARGS__); } while (0)
#endif
#define INFINITY_LIFE_TIME 0xFFFFFFFF
@@ -374,9 +374,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
dev_hold(dev);
if (snmp6_alloc_dev(ndev) < 0) {
- ADBG((KERN_WARNING
+ ADBG(KERN_WARNING
"%s: cannot allocate memory for statistics; dev=%s.\n",
- __func__, dev->name));
+ __func__, dev->name);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
dev_put(dev);
kfree(ndev);
@@ -384,9 +384,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
}
if (snmp6_register_dev(ndev) < 0) {
- ADBG((KERN_WARNING
+ ADBG(KERN_WARNING
"%s: cannot create /proc/net/dev_snmp6/%s\n",
- __func__, dev->name));
+ __func__, dev->name);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
ndev->dead = 1;
in6_dev_finish_destroy(ndev);
@@ -849,7 +849,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
/* Ignore adding duplicate addresses on an interface */
if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
- ADBG(("ipv6_add_addr: already assigned\n"));
+ ADBG("ipv6_add_addr: already assigned\n");
err = -EEXIST;
goto out;
}
@@ -857,7 +857,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
if (ifa == NULL) {
- ADBG(("ipv6_add_addr: malloc failed\n"));
+ ADBG("ipv6_add_addr: malloc failed\n");
err = -ENOBUFS;
goto out;
}
@@ -1131,12 +1131,10 @@ retry:
if (ifp->flags & IFA_F_OPTIMISTIC)
addr_flags |= IFA_F_OPTIMISTIC;
- ift = !max_addresses ||
- ipv6_count_addresses(idev) < max_addresses ?
- ipv6_add_addr(idev, &addr, NULL, tmp_plen,
- ipv6_addr_scope(&addr), addr_flags,
- tmp_valid_lft, tmp_prefered_lft) : NULL;
- if (IS_ERR_OR_NULL(ift)) {
+ ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
+ ipv6_addr_scope(&addr), addr_flags,
+ tmp_valid_lft, tmp_prefered_lft);
+ if (IS_ERR(ift)) {
in6_ifa_put(ifp);
in6_dev_put(idev);
pr_info("%s: retry temporary address regeneration\n", __func__);
@@ -1814,6 +1812,16 @@ static int addrconf_ifid_gre(u8 *eui, struct net_device *dev)
return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr);
}
+static int addrconf_ifid_ip6tnl(u8 *eui, struct net_device *dev)
+{
+ memcpy(eui, dev->perm_addr, 3);
+ memcpy(eui + 5, dev->perm_addr + 3, 3);
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ eui[0] ^= 2;
+ return 0;
+}
+
static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
{
switch (dev->type) {
@@ -1832,6 +1840,8 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
return addrconf_ifid_eui64(eui, dev);
case ARPHRD_IEEE1394:
return addrconf_ifid_ieee1394(eui, dev);
+ case ARPHRD_TUNNEL6:
+ return addrconf_ifid_ip6tnl(eui, dev);
}
return -1;
}
@@ -2057,7 +2067,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
pinfo = (struct prefix_info *) opt;
if (len < sizeof(struct prefix_info)) {
- ADBG(("addrconf: prefix option too short\n"));
+ ADBG("addrconf: prefix option too short\n");
return;
}
@@ -2709,7 +2719,8 @@ static void addrconf_dev_config(struct net_device *dev)
(dev->type != ARPHRD_ARCNET) &&
(dev->type != ARPHRD_INFINIBAND) &&
(dev->type != ARPHRD_IEEE802154) &&
- (dev->type != ARPHRD_IEEE1394)) {
+ (dev->type != ARPHRD_IEEE1394) &&
+ (dev->type != ARPHRD_TUNNEL6)) {
/* Alas, we support only Ethernet autoconfiguration. */
return;
}
@@ -2795,44 +2806,6 @@ ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
return -1;
}
-static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
-{
- struct net_device *link_dev;
- struct net *net = dev_net(idev->dev);
-
- /* first try to inherit the link-local address from the link device */
- if (idev->dev->iflink &&
- (link_dev = __dev_get_by_index(net, idev->dev->iflink))) {
- if (!ipv6_inherit_linklocal(idev, link_dev))
- return;
- }
- /* then try to inherit it from any device */
- for_each_netdev(net, link_dev) {
- if (!ipv6_inherit_linklocal(idev, link_dev))
- return;
- }
- pr_debug("init ip6-ip6: add_linklocal failed\n");
-}
-
-/*
- * Autoconfigure tunnel with a link-local address so routing protocols,
- * DHCPv6, MLD etc. can be run over the virtual link
- */
-
-static void addrconf_ip6_tnl_config(struct net_device *dev)
-{
- struct inet6_dev *idev;
-
- ASSERT_RTNL();
-
- idev = addrconf_add_dev(dev);
- if (IS_ERR(idev)) {
- pr_debug("init ip6-ip6: add_dev failed\n");
- return;
- }
- ip6_tnl_add_linklocal(idev);
-}
-
static int addrconf_notify(struct notifier_block *this, unsigned long event,
void *ptr)
{
@@ -2900,9 +2873,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
addrconf_gre_config(dev);
break;
#endif
- case ARPHRD_TUNNEL6:
- addrconf_ip6_tnl_config(dev);
- break;
case ARPHRD_LOOPBACK:
init_loopback(dev);
break;
@@ -3637,8 +3607,8 @@ restart:
if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
- ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
- now, next, next_sec, next_sched));
+ ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+ now, next, next_sec, next_sched);
addr_chk_timer.expires = next_sched;
add_timer(&addr_chk_timer);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index cc3bb201b8b0..d6e00a39274c 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -41,6 +41,7 @@
#include <linux/netfilter_ipv6.h>
#include <linux/slab.h>
#include <linux/hash.h>
+#include <linux/etherdevice.h>
#include <asm/uaccess.h>
#include <linux/atomic.h>
@@ -1471,6 +1472,9 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ /* This perm addr will be used as interface identifier by IPv6 */
+ dev->addr_assign_type = NET_ADDR_RANDOM;
+ eth_random_addr(dev->perm_addr);
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 6c76df9909bf..98ead2b1a669 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -107,9 +107,12 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
struct inet6_dev *idev);
-
#define MLD_QRV_DEFAULT 2
+/* RFC3810, 8.1 Query Version Distinctions */
+#define MLD_V1_QUERY_LEN 24
+#define MLD_V2_QUERY_LEN_MIN 28
+
#define MLD_V1_SEEN(idev) (dev_net((idev)->dev)->ipv6.devconf_all->force_mld_version == 1 || \
(idev)->cnf.force_mld_version == 1 || \
((idev)->mc_v1_seen && \
@@ -996,24 +999,24 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
static void mld_gq_start_timer(struct inet6_dev *idev)
{
- int tv = net_random() % idev->mc_maxdelay;
+ unsigned long tv = net_random() % idev->mc_maxdelay;
idev->mc_gq_running = 1;
if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2))
in6_dev_hold(idev);
}
-static void mld_ifc_start_timer(struct inet6_dev *idev, int delay)
+static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay)
{
- int tv = net_random() % delay;
+ unsigned long tv = net_random() % delay;
if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2))
in6_dev_hold(idev);
}
-static void mld_dad_start_timer(struct inet6_dev *idev, int delay)
+static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay)
{
- int tv = net_random() % delay;
+ unsigned long tv = net_random() % delay;
if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2))
in6_dev_hold(idev);
@@ -1146,13 +1149,11 @@ int igmp6_event_query(struct sk_buff *skb)
!(group_type&IPV6_ADDR_MULTICAST))
return -EINVAL;
- if (len == 24) {
+ if (len == MLD_V1_QUERY_LEN) {
int switchback;
/* MLDv1 router present */
- /* Translate milliseconds to jiffies */
- max_delay = (ntohs(mld->mld_maxdelay)*HZ)/1000;
-
+ max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay));
switchback = (idev->mc_qrv + 1) * max_delay;
idev->mc_v1_seen = jiffies + switchback;
@@ -1162,17 +1163,18 @@ int igmp6_event_query(struct sk_buff *skb)
__in6_dev_put(idev);
/* clear deleted report items */
mld_clear_delrec(idev);
- } else if (len >= 28) {
+ } else if (len >= MLD_V2_QUERY_LEN_MIN) {
int srcs_offset = sizeof(struct mld2_query) -
sizeof(struct icmp6hdr);
if (!pskb_may_pull(skb, srcs_offset))
return -EINVAL;
mlh2 = (struct mld2_query *)skb_transport_header(skb);
- max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000;
- if (!max_delay)
- max_delay = 1;
+
+ max_delay = max(msecs_to_jiffies(MLDV2_MRC(ntohs(mlh2->mld2q_mrc))), 1UL);
+
idev->mc_maxdelay = max_delay;
+
if (mlh2->mld2q_qrv)
idev->mc_qrv = mlh2->mld2q_qrv;
if (group_type == IPV6_ADDR_ANY) { /* general query */
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 79aa9652ed86..04d31c2fbef1 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1369,8 +1369,10 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts))
return;
- if (!ndopts.nd_opts_rh)
+ if (!ndopts.nd_opts_rh) {
+ ip6_redirect_no_header(skb, dev_net(skb->dev), 0, 0);
return;
+ }
hdr = (u8 *)ndopts.nd_opts_rh;
hdr += 8;
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 47bff6107519..3e4e92d5e157 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -76,7 +76,7 @@ static int masq_device_event(struct notifier_block *this,
if (event == NETDEV_DOWN)
nf_ct_iterate_cleanup(net, device_cmp,
- (void *)(long)dev->ifindex);
+ (void *)(long)dev->ifindex, 0, 0);
return NOTIFY_DONE;
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 790d9f4b8b0b..1aeb473b2cc6 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -490,6 +490,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
ipv6_hdr(head)->payload_len = htons(payload_len);
ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
IP6CB(head)->nhoff = nhoff;
+ IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -524,6 +525,9 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
struct net *net = dev_net(skb_dst(skb)->dev);
int evicted;
+ if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
+ goto fail_hdr;
+
IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
/* Jumbo payload inhibits frag. header */
@@ -544,6 +548,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
+ IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
return 1;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e22c4db8d07a..55236a84c748 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1177,6 +1177,27 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
}
EXPORT_SYMBOL_GPL(ip6_redirect);
+void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
+ u32 mark)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = mark;
+ fl6.flowi6_flags = 0;
+ fl6.daddr = msg->dest;
+ fl6.saddr = iph->daddr;
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (!dst->error)
+ rt6_do_redirect(dst, NULL, skb);
+ dst_release(dst);
+}
+
void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
{
ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ab8bd2cabfa0..9d585370c5b4 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -45,7 +45,7 @@ struct netns_pfkey {
static DEFINE_MUTEX(pfkey_mutex);
#define DUMMY_MARK 0
-static struct xfrm_mark dummy_mark = {0, 0};
+static const struct xfrm_mark dummy_mark = {0, 0};
struct pfkey_sock {
/* struct sock must be the first member of struct pfkey_sock */
struct sock sk;
@@ -338,7 +338,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk)
return 0;
}
-static u8 sadb_ext_min_len[] = {
+static const u8 sadb_ext_min_len[] = {
[SADB_EXT_RESERVED] = (u8) 0,
[SADB_EXT_SA] = (u8) sizeof(struct sadb_sa),
[SADB_EXT_LIFETIME_CURRENT] = (u8) sizeof(struct sadb_lifetime),
@@ -1196,10 +1196,6 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
x->props.family = pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
&x->props.saddr);
- if (!x->props.family) {
- err = -EAFNOSUPPORT;
- goto out;
- }
pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1],
&x->id.daddr);
@@ -2205,10 +2201,6 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_
sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1];
xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr);
- if (!xp->family) {
- err = -EINVAL;
- goto out;
- }
xp->selector.family = xp->family;
xp->selector.prefixlen_s = sa->sadb_address_prefixlen;
xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
@@ -2737,7 +2729,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb,
const struct sadb_msg *hdr, void * const *ext_hdrs);
-static pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
+static const pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
[SADB_RESERVED] = pfkey_reserved,
[SADB_GETSPI] = pfkey_getspi,
[SADB_UPDATE] = pfkey_add,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 56d22cae5906..c45fc1a60e0d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -410,20 +410,6 @@ config NF_NAT_TFTP
endif # NF_CONNTRACK
-# transparent proxy support
-config NETFILTER_TPROXY
- tristate "Transparent proxying support"
- depends on IP_NF_MANGLE
- depends on NETFILTER_ADVANCED
- help
- This option enables transparent proxying support, that is,
- support for handling non-locally bound IPv4 TCP and UDP sockets.
- For it to work you will have to configure certain iptables rules
- and use policy routing. For more information on how to set it up
- see Documentation/networking/tproxy.txt.
-
- To compile it as a module, choose M here. If unsure, say N.
-
config NETFILTER_XTABLES
tristate "Netfilter Xtables support (required for ip_tables)"
default m if NETFILTER_ADVANCED=n
@@ -720,10 +706,10 @@ config NETFILTER_XT_TARGET_TEE
this clone be rerouted to another nexthop.
config NETFILTER_XT_TARGET_TPROXY
- tristate '"TPROXY" target support'
- depends on NETFILTER_TPROXY
+ tristate '"TPROXY" target transparent proxying support'
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
+ depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
help
@@ -731,6 +717,9 @@ config NETFILTER_XT_TARGET_TPROXY
REDIRECT. It can only be used in the mangle table and is useful
to redirect traffic to a transparent proxy. It does _not_ depend
on Netfilter connection tracking and NAT, unlike REDIRECT.
+ For it to work you will have to configure certain iptables rules
+ and use policy routing. For more information on how to set it up
+ see Documentation/networking/tproxy.txt.
To compile it as a module, choose M here. If unsure, say N.
@@ -1180,7 +1169,6 @@ config NETFILTER_XT_MATCH_SCTP
config NETFILTER_XT_MATCH_SOCKET
tristate '"socket" match support'
- depends on NETFILTER_TPROXY
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
depends on !NF_CONNTRACK || NF_CONNTRACK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index a1abf87d43bf..ebfa7dc747cd 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -61,9 +61,6 @@ obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o
obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o
obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
-# transparent proxy support
-obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
-
# generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 2217363ab422..593b16ea45e0 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -234,12 +234,13 @@ EXPORT_SYMBOL(skb_make_writable);
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
+void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
+ __rcu __read_mostly;
EXPORT_SYMBOL(ip_ct_attach);
-void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
+void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
{
- void (*attach)(struct sk_buff *, struct sk_buff *);
+ void (*attach)(struct sk_buff *, const struct sk_buff *);
if (skb->nfct) {
rcu_read_lock();
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 3cd85b2fc67c..5199448697f6 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -414,7 +414,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
spin_lock_bh(&svc->sched_lock);
tbl->dead = 1;
- for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+ for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
ip_vs_lblcr_free(en);
}
@@ -440,7 +440,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
struct ip_vs_lblcr_entry *en;
struct hlist_node *next;
- for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+ for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
spin_lock(&svc->sched_lock);
@@ -495,7 +495,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
if (goal > tbl->max_size/2)
goal = tbl->max_size/2;
- for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+ for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
spin_lock(&svc->sched_lock);
@@ -536,7 +536,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
/*
* Initialize the hash buckets
*/
- for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+ for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
INIT_HLIST_HEAD(&tbl->bucket[i]);
}
tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index f16c027df15b..3588faebe529 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -269,14 +269,20 @@ ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
switch (iph->protocol) {
case IPPROTO_TCP:
th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
+ if (unlikely(th == NULL))
+ return 0;
port = th->source;
break;
case IPPROTO_UDP:
uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
+ if (unlikely(uh == NULL))
+ return 0;
port = uh->source;
break;
case IPPROTO_SCTP:
sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
+ if (unlikely(sh == NULL))
+ return 0;
port = sh->source;
break;
default:
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0283baedcdfb..da6f1787a102 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -238,7 +238,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
nf_conntrack_free(ct);
}
-void nf_ct_delete_from_lists(struct nf_conn *ct)
+static void nf_ct_delete_from_lists(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
@@ -253,7 +253,6 @@ void nf_ct_delete_from_lists(struct nf_conn *ct)
&net->ct.dying);
spin_unlock_bh(&nf_conntrack_lock);
}
-EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
static void death_by_event(unsigned long ul_conntrack)
{
@@ -275,7 +274,7 @@ static void death_by_event(unsigned long ul_conntrack)
nf_ct_put(ct);
}
-void nf_ct_dying_timeout(struct nf_conn *ct)
+static void nf_ct_dying_timeout(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
@@ -288,27 +287,33 @@ void nf_ct_dying_timeout(struct nf_conn *ct)
(prandom_u32() % net->ct.sysctl_events_retry_timeout);
add_timer(&ecache->timeout);
}
-EXPORT_SYMBOL_GPL(nf_ct_dying_timeout);
-static void death_by_timeout(unsigned long ul_conntrack)
+bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
{
- struct nf_conn *ct = (void *)ul_conntrack;
struct nf_conn_tstamp *tstamp;
tstamp = nf_conn_tstamp_find(ct);
if (tstamp && tstamp->stop == 0)
tstamp->stop = ktime_to_ns(ktime_get_real());
- if (!test_bit(IPS_DYING_BIT, &ct->status) &&
- unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
+ if (!nf_ct_is_dying(ct) &&
+ unlikely(nf_conntrack_event_report(IPCT_DESTROY, ct,
+ portid, report) < 0)) {
/* destroy event was not delivered */
nf_ct_delete_from_lists(ct);
nf_ct_dying_timeout(ct);
- return;
+ return false;
}
set_bit(IPS_DYING_BIT, &ct->status);
nf_ct_delete_from_lists(ct);
nf_ct_put(ct);
+ return true;
+}
+EXPORT_SYMBOL_GPL(nf_ct_delete);
+
+static void death_by_timeout(unsigned long ul_conntrack)
+{
+ nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
}
/*
@@ -643,10 +648,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
return dropped;
if (del_timer(&ct->timeout)) {
- death_by_timeout((unsigned long)ct);
- /* Check if we indeed killed this entry. Reliable event
- delivery may have inserted it into the dying list. */
- if (test_bit(IPS_DYING_BIT, &ct->status)) {
+ if (nf_ct_delete(ct, 0, 0)) {
dropped = 1;
NF_CT_STAT_INC_ATOMIC(net, early_drop);
}
@@ -1192,7 +1194,7 @@ EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
#endif
/* Used by ipt_REJECT and ip6t_REJECT. */
-static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
+static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
@@ -1244,7 +1246,7 @@ found:
void nf_ct_iterate_cleanup(struct net *net,
int (*iter)(struct nf_conn *i, void *data),
- void *data)
+ void *data, u32 portid, int report)
{
struct nf_conn *ct;
unsigned int bucket = 0;
@@ -1252,7 +1254,8 @@ void nf_ct_iterate_cleanup(struct net *net,
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
if (del_timer(&ct->timeout))
- death_by_timeout((unsigned long)ct);
+ nf_ct_delete(ct, portid, report);
+
/* ... else the timer will get him soon. */
nf_ct_put(ct);
@@ -1260,30 +1263,6 @@ void nf_ct_iterate_cleanup(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
-struct __nf_ct_flush_report {
- u32 portid;
- int report;
-};
-
-static int kill_report(struct nf_conn *i, void *data)
-{
- struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
- struct nf_conn_tstamp *tstamp;
-
- tstamp = nf_conn_tstamp_find(i);
- if (tstamp && tstamp->stop == 0)
- tstamp->stop = ktime_to_ns(ktime_get_real());
-
- /* If we fail to deliver the event, death_by_timeout() will retry */
- if (nf_conntrack_event_report(IPCT_DESTROY, i,
- fr->portid, fr->report) < 0)
- return 1;
-
- /* Avoid the delivery of the destroy event in death_by_timeout(). */
- set_bit(IPS_DYING_BIT, &i->status);
- return 1;
-}
-
static int kill_all(struct nf_conn *i, void *data)
{
return 1;
@@ -1301,11 +1280,7 @@ EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
{
- struct __nf_ct_flush_report fr = {
- .portid = portid,
- .report = report,
- };
- nf_ct_iterate_cleanup(net, kill_report, &fr);
+ nf_ct_iterate_cleanup(net, kill_all, NULL, portid, report);
}
EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
@@ -1386,7 +1361,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
i_see_dead_people:
busy = 0;
list_for_each_entry(net, net_exit_list, exit_list) {
- nf_ct_iterate_cleanup(net, kill_all, NULL);
+ nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
nf_ct_release_dying_list(net);
if (atomic_read(&net->ct.count) != 0)
busy = 1;
@@ -1692,7 +1667,7 @@ err_stat:
return ret;
}
-s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
+s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
enum ip_conntrack_dir dir,
u32 seq);
EXPORT_SYMBOL_GPL(nf_ct_nat_offset);
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index 355d2ef08094..bb53f120e79c 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -8,12 +8,8 @@
* published by the Free Software Foundation.
*/
-#include <linux/ctype.h>
#include <linux/export.h>
-#include <linux/jhash.h>
-#include <linux/spinlock.h>
#include <linux/types.h>
-#include <linux/slab.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_labels.h>
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index edc410e778f7..fa61fea63234 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1038,21 +1038,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
}
}
- if (del_timer(&ct->timeout)) {
- if (nf_conntrack_event_report(IPCT_DESTROY, ct,
- NETLINK_CB(skb).portid,
- nlmsg_report(nlh)) < 0) {
- nf_ct_delete_from_lists(ct);
- /* we failed to report the event, try later */
- nf_ct_dying_timeout(ct);
- nf_ct_put(ct);
- return 0;
- }
- /* death_by_timeout would report the event again */
- set_bit(IPS_DYING_BIT, &ct->status);
- nf_ct_delete_from_lists(ct);
- nf_ct_put(ct);
- }
+ if (del_timer(&ct->timeout))
+ nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
+
nf_ct_put(ct);
return 0;
@@ -1999,6 +1987,27 @@ out:
return err == -EAGAIN ? -ENOBUFS : err;
}
+static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
+ [CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
+ [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
+ [CTA_EXPECT_MASK] = { .type = NLA_NESTED },
+ [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
+ [CTA_EXPECT_ID] = { .type = NLA_U32 },
+ [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
+ .len = NF_CT_HELPER_NAME_LEN - 1 },
+ [CTA_EXPECT_ZONE] = { .type = NLA_U16 },
+ [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
+ [CTA_EXPECT_CLASS] = { .type = NLA_U32 },
+ [CTA_EXPECT_NAT] = { .type = NLA_NESTED },
+ [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
+};
+
+static struct nf_conntrack_expect *
+ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
+ struct nf_conntrack_helper *helper,
+ struct nf_conntrack_tuple *tuple,
+ struct nf_conntrack_tuple *mask);
+
#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
static size_t
ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
@@ -2139,10 +2148,69 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
return ret;
}
+static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
+ const struct nf_conn *ct,
+ struct nf_conntrack_tuple *tuple,
+ struct nf_conntrack_tuple *mask)
+{
+ int err;
+
+ err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE,
+ nf_ct_l3num(ct));
+ if (err < 0)
+ return err;
+
+ return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK,
+ nf_ct_l3num(ct));
+}
+
+static int
+ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
+ u32 portid, u32 report)
+{
+ struct nlattr *cda[CTA_EXPECT_MAX+1];
+ struct nf_conntrack_tuple tuple, mask;
+ struct nf_conntrack_helper *helper;
+ struct nf_conntrack_expect *exp;
+ int err;
+
+ err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy);
+ if (err < 0)
+ return err;
+
+ err = ctnetlink_nfqueue_exp_parse((const struct nlattr * const *)cda,
+ ct, &tuple, &mask);
+ if (err < 0)
+ return err;
+
+ if (cda[CTA_EXPECT_HELP_NAME]) {
+ const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
+
+ helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
+ nf_ct_protonum(ct));
+ if (helper == NULL)
+ return -EOPNOTSUPP;
+ }
+
+ exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct,
+ helper, &tuple, &mask);
+ if (IS_ERR(exp))
+ return PTR_ERR(exp);
+
+ err = nf_ct_expect_related_report(exp, portid, report);
+ if (err < 0) {
+ nf_ct_expect_put(exp);
+ return err;
+ }
+
+ return 0;
+}
+
static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
.build_size = ctnetlink_nfqueue_build_size,
.build = ctnetlink_nfqueue_build,
.parse = ctnetlink_nfqueue_parse,
+ .attach_expect = ctnetlink_nfqueue_attach_expect,
};
#endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */
@@ -2510,21 +2578,6 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
return err;
}
-static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
- [CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
- [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
- [CTA_EXPECT_MASK] = { .type = NLA_NESTED },
- [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
- [CTA_EXPECT_ID] = { .type = NLA_U32 },
- [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
- .len = NF_CT_HELPER_NAME_LEN - 1 },
- [CTA_EXPECT_ZONE] = { .type = NLA_U16 },
- [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
- [CTA_EXPECT_CLASS] = { .type = NLA_U32 },
- [CTA_EXPECT_NAT] = { .type = NLA_NESTED },
- [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
-};
-
static int
ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
@@ -2747,76 +2800,26 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
#endif
}
-static int
-ctnetlink_create_expect(struct net *net, u16 zone,
- const struct nlattr * const cda[],
- u_int8_t u3,
- u32 portid, int report)
+static struct nf_conntrack_expect *
+ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
+ struct nf_conntrack_helper *helper,
+ struct nf_conntrack_tuple *tuple,
+ struct nf_conntrack_tuple *mask)
{
- struct nf_conntrack_tuple tuple, mask, master_tuple;
- struct nf_conntrack_tuple_hash *h = NULL;
+ u_int32_t class = 0;
struct nf_conntrack_expect *exp;
- struct nf_conn *ct;
struct nf_conn_help *help;
- struct nf_conntrack_helper *helper = NULL;
- u_int32_t class = 0;
- int err = 0;
-
- /* caller guarantees that those three CTA_EXPECT_* exist */
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
- if (err < 0)
- return err;
- err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
- if (err < 0)
- return err;
- err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
- if (err < 0)
- return err;
-
- /* Look for master conntrack of this expectation */
- h = nf_conntrack_find_get(net, zone, &master_tuple);
- if (!h)
- return -ENOENT;
- ct = nf_ct_tuplehash_to_ctrack(h);
-
- /* Look for helper of this expectation */
- if (cda[CTA_EXPECT_HELP_NAME]) {
- const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
-
- helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
- nf_ct_protonum(ct));
- if (helper == NULL) {
-#ifdef CONFIG_MODULES
- if (request_module("nfct-helper-%s", helpname) < 0) {
- err = -EOPNOTSUPP;
- goto out;
- }
-
- helper = __nf_conntrack_helper_find(helpname,
- nf_ct_l3num(ct),
- nf_ct_protonum(ct));
- if (helper) {
- err = -EAGAIN;
- goto out;
- }
-#endif
- err = -EOPNOTSUPP;
- goto out;
- }
- }
+ int err;
if (cda[CTA_EXPECT_CLASS] && helper) {
class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS]));
- if (class > helper->expect_class_max) {
- err = -EINVAL;
- goto out;
- }
+ if (class > helper->expect_class_max)
+ return ERR_PTR(-EINVAL);
}
exp = nf_ct_expect_alloc(ct);
- if (!exp) {
- err = -ENOMEM;
- goto out;
- }
+ if (!exp)
+ return ERR_PTR(-ENOMEM);
+
help = nfct_help(ct);
if (!help) {
if (!cda[CTA_EXPECT_TIMEOUT]) {
@@ -2854,21 +2857,89 @@ ctnetlink_create_expect(struct net *net, u16 zone,
exp->class = class;
exp->master = ct;
exp->helper = helper;
- memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
- memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3));
- exp->mask.src.u.all = mask.src.u.all;
+ exp->tuple = *tuple;
+ exp->mask.src.u3 = mask->src.u3;
+ exp->mask.src.u.all = mask->src.u.all;
if (cda[CTA_EXPECT_NAT]) {
err = ctnetlink_parse_expect_nat(cda[CTA_EXPECT_NAT],
- exp, u3);
+ exp, nf_ct_l3num(ct));
if (err < 0)
goto err_out;
}
- err = nf_ct_expect_related_report(exp, portid, report);
+ return exp;
err_out:
nf_ct_expect_put(exp);
-out:
- nf_ct_put(nf_ct_tuplehash_to_ctrack(h));
+ return ERR_PTR(err);
+}
+
+static int
+ctnetlink_create_expect(struct net *net, u16 zone,
+ const struct nlattr * const cda[],
+ u_int8_t u3, u32 portid, int report)
+{
+ struct nf_conntrack_tuple tuple, mask, master_tuple;
+ struct nf_conntrack_tuple_hash *h = NULL;
+ struct nf_conntrack_helper *helper = NULL;
+ struct nf_conntrack_expect *exp;
+ struct nf_conn *ct;
+ int err;
+
+ /* caller guarantees that those three CTA_EXPECT_* exist */
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+ if (err < 0)
+ return err;
+ err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
+ if (err < 0)
+ return err;
+ err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
+ if (err < 0)
+ return err;
+
+ /* Look for master conntrack of this expectation */
+ h = nf_conntrack_find_get(net, zone, &master_tuple);
+ if (!h)
+ return -ENOENT;
+ ct = nf_ct_tuplehash_to_ctrack(h);
+
+ if (cda[CTA_EXPECT_HELP_NAME]) {
+ const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
+
+ helper = __nf_conntrack_helper_find(helpname, u3,
+ nf_ct_protonum(ct));
+ if (helper == NULL) {
+#ifdef CONFIG_MODULES
+ if (request_module("nfct-helper-%s", helpname) < 0) {
+ err = -EOPNOTSUPP;
+ goto err_ct;
+ }
+ helper = __nf_conntrack_helper_find(helpname, u3,
+ nf_ct_protonum(ct));
+ if (helper) {
+ err = -EAGAIN;
+ goto err_ct;
+ }
+#endif
+ err = -EOPNOTSUPP;
+ goto err_ct;
+ }
+ }
+
+ exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask);
+ if (IS_ERR(exp)) {
+ err = PTR_ERR(exp);
+ goto err_ct;
+ }
+
+ err = nf_ct_expect_related_report(exp, portid, report);
+ if (err < 0)
+ goto err_exp;
+
+ return 0;
+err_exp:
+ nf_ct_expect_put(exp);
+err_ct:
+ nf_ct_put(ct);
return err;
}
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 0ab9636ac57e..ce3004156eeb 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -281,7 +281,7 @@ void nf_ct_l3proto_pernet_unregister(struct net *net,
nf_ct_l3proto_unregister_sysctl(net, proto);
/* Remove all contrack entries for this protocol */
- nf_ct_iterate_cleanup(net, kill_l3proto, proto);
+ nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
@@ -476,7 +476,7 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
/* Remove all contrack entries for this protocol */
- nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
+ nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 2f8010707d01..d224e001f14f 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -496,7 +496,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
}
#ifdef CONFIG_NF_NAT_NEEDED
-static inline s16 nat_offset(const struct nf_conn *ct,
+static inline s32 nat_offset(const struct nf_conn *ct,
enum ip_conntrack_dir dir,
u32 seq)
{
@@ -525,7 +525,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
__u32 seq, ack, sack, end, win, swin;
- s16 receiver_offset;
+ s32 receiver_offset;
bool res, in_recv_win;
/*
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 038eee5c8f85..6ff808375b5e 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -497,7 +497,7 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
rtnl_lock();
for_each_net(net)
- nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean);
+ nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
rtnl_unlock();
}
@@ -511,7 +511,7 @@ static void nf_nat_l3proto_clean(u8 l3proto)
rtnl_lock();
for_each_net(net)
- nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean);
+ nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
rtnl_unlock();
}
@@ -749,7 +749,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
{
struct nf_nat_proto_clean clean = {};
- nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean);
+ nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean, 0, 0);
synchronize_rcu();
nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
}
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 85e20a919081..46b9baa845a6 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -30,8 +30,6 @@
pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
x->offset_before, x->offset_after, x->correction_pos);
-static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
-
/* Setup TCP sequence correction given this change at this sequence */
static inline void
adjust_tcp_sequence(u32 seq,
@@ -49,7 +47,7 @@ adjust_tcp_sequence(u32 seq,
pr_debug("adjust_tcp_sequence: Seq_offset before: ");
DUMP_OFFSET(this_way);
- spin_lock_bh(&nf_nat_seqofs_lock);
+ spin_lock_bh(&ct->lock);
/* SYN adjust. If it's uninitialized, or this is after last
* correction, record it: we don't handle more than one
@@ -61,31 +59,26 @@ adjust_tcp_sequence(u32 seq,
this_way->offset_before = this_way->offset_after;
this_way->offset_after += sizediff;
}
- spin_unlock_bh(&nf_nat_seqofs_lock);
+ spin_unlock_bh(&ct->lock);
pr_debug("adjust_tcp_sequence: Seq_offset after: ");
DUMP_OFFSET(this_way);
}
-/* Get the offset value, for conntrack */
-s16 nf_nat_get_offset(const struct nf_conn *ct,
+/* Get the offset value, for conntrack. Caller must have the conntrack locked */
+s32 nf_nat_get_offset(const struct nf_conn *ct,
enum ip_conntrack_dir dir,
u32 seq)
{
struct nf_conn_nat *nat = nfct_nat(ct);
struct nf_nat_seq *this_way;
- s16 offset;
if (!nat)
return 0;
this_way = &nat->seq[dir];
- spin_lock_bh(&nf_nat_seqofs_lock);
- offset = after(seq, this_way->correction_pos)
+ return after(seq, this_way->correction_pos)
? this_way->offset_after : this_way->offset_before;
- spin_unlock_bh(&nf_nat_seqofs_lock);
-
- return offset;
}
/* Frobs data inside this packet, which is linear. */
@@ -143,7 +136,7 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
}
void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
- __be32 seq, s16 off)
+ __be32 seq, s32 off)
{
if (!off)
return;
@@ -370,9 +363,10 @@ nf_nat_seq_adjust(struct sk_buff *skb,
struct tcphdr *tcph;
int dir;
__be32 newseq, newack;
- s16 seqoff, ackoff;
+ s32 seqoff, ackoff;
struct nf_conn_nat *nat = nfct_nat(ct);
struct nf_nat_seq *this_way, *other_way;
+ int res;
dir = CTINFO2DIR(ctinfo);
@@ -383,6 +377,7 @@ nf_nat_seq_adjust(struct sk_buff *skb,
return 0;
tcph = (void *)skb->data + protoff;
+ spin_lock_bh(&ct->lock);
if (after(ntohl(tcph->seq), this_way->correction_pos))
seqoff = this_way->offset_after;
else
@@ -407,7 +402,10 @@ nf_nat_seq_adjust(struct sk_buff *skb,
tcph->seq = newseq;
tcph->ack_seq = newack;
- return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo);
+ res = nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo);
+ spin_unlock_bh(&ct->lock);
+
+ return res;
}
/* Setup NAT on this expected conntrack so it follows master. */
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
deleted file mode 100644
index 474d621cbc2e..000000000000
--- a/net/netfilter/nf_tproxy_core.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Transparent proxy support for Linux/iptables
- *
- * Copyright (c) 2006-2007 BalaBit IT Ltd.
- * Author: Balazs Scheidler, Krisztian Kovacs
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-
-#include <linux/net.h>
-#include <linux/if.h>
-#include <linux/netdevice.h>
-#include <net/udp.h>
-#include <net/netfilter/nf_tproxy_core.h>
-
-
-static void
-nf_tproxy_destructor(struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
-
- skb->sk = NULL;
- skb->destructor = NULL;
-
- if (sk)
- sock_put(sk);
-}
-
-/* consumes sk */
-void
-nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
-{
- /* assigning tw sockets complicates things; most
- * skb->sk->X checks would have to test sk->sk_state first */
- if (sk->sk_state == TCP_TIME_WAIT) {
- inet_twsk_put(inet_twsk(sk));
- return;
- }
-
- skb_orphan(skb);
- skb->sk = sk;
- skb->destructor = nf_tproxy_destructor;
-}
-EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock);
-
-static int __init nf_tproxy_init(void)
-{
- pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n");
- pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n");
- return 0;
-}
-
-module_init(nf_tproxy_init);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Krisztian Kovacs");
-MODULE_DESCRIPTION("Transparent proxy support core routines");
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 8a703c3dd318..95a98c8c1da6 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -862,6 +862,7 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
[NFQA_MARK] = { .type = NLA_U32 },
[NFQA_PAYLOAD] = { .type = NLA_UNSPEC },
[NFQA_CT] = { .type = NLA_UNSPEC },
+ [NFQA_EXP] = { .type = NLA_UNSPEC },
};
static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
@@ -990,9 +991,14 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
if (entry == NULL)
return -ENOENT;
- rcu_read_lock();
- if (nfqa[NFQA_CT] && (queue->flags & NFQA_CFG_F_CONNTRACK))
+ if (nfqa[NFQA_CT]) {
ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo);
+ if (ct && nfqa[NFQA_EXP]) {
+ nfqnl_attach_expect(ct, nfqa[NFQA_EXP],
+ NETLINK_CB(skb).portid,
+ nlmsg_report(nlh));
+ }
+ }
if (nfqa[NFQA_PAYLOAD]) {
u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]);
@@ -1005,7 +1011,6 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
if (ct)
nfqnl_ct_seq_adjust(skb, ct, ctinfo, diff);
}
- rcu_read_unlock();
if (nfqa[NFQA_MARK])
entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c
index ab61d66bc0b9..be893039966d 100644
--- a/net/netfilter/nfnetlink_queue_ct.c
+++ b/net/netfilter/nfnetlink_queue_ct.c
@@ -96,3 +96,18 @@ void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
if ((ct->status & IPS_NAT_MASK) && diff)
nfq_nat_ct->seq_adjust(skb, ct, ctinfo, diff);
}
+
+int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
+ u32 portid, u32 report)
+{
+ struct nfq_ct_hook *nfq_ct;
+
+ if (nf_ct_is_untracked(ct))
+ return 0;
+
+ nfq_ct = rcu_dereference(nfq_ct_hook);
+ if (nfq_ct == NULL)
+ return -EOPNOTSUPP;
+
+ return nfq_ct->attach_expect(attr, ct, portid, report);
+}
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d7f195388f66..5d8a3a3cd5a7 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -15,7 +15,9 @@
#include <linux/ip.h>
#include <net/checksum.h>
#include <net/udp.h>
+#include <net/tcp.h>
#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
#include <linux/inetdevice.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
@@ -26,13 +28,18 @@
#define XT_TPROXY_HAVE_IPV6 1
#include <net/if_inet6.h>
#include <net/addrconf.h>
+#include <net/inet6_hashtables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif
-#include <net/netfilter/nf_tproxy_core.h>
#include <linux/netfilter/xt_TPROXY.h>
+enum nf_tproxy_lookup_t {
+ NFT_LOOKUP_LISTENER,
+ NFT_LOOKUP_ESTABLISHED,
+};
+
static bool tproxy_sk_is_transparent(struct sock *sk)
{
if (sk->sk_state != TCP_TIME_WAIT) {
@@ -68,6 +75,157 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
return laddr ? laddr : daddr;
}
+/*
+ * This is used when the user wants to intercept a connection matching
+ * an explicit iptables rule. In this case the sockets are assumed
+ * matching in preference order:
+ *
+ * - match: if there's a fully established connection matching the
+ * _packet_ tuple, it is returned, assuming the redirection
+ * already took place and we process a packet belonging to an
+ * established connection
+ *
+ * - match: if there's a listening socket matching the redirection
+ * (e.g. on-port & on-ip of the connection), it is returned,
+ * regardless if it was bound to 0.0.0.0 or an explicit
+ * address. The reasoning is that if there's an explicit rule, it
+ * does not really matter if the listener is bound to an interface
+ * or to 0. The user already stated that he wants redirection
+ * (since he added the rule).
+ *
+ * Please note that there's an overlap between what a TPROXY target
+ * and a socket match will match. Normally if you have both rules the
+ * "socket" match will be the first one, effectively all packets
+ * belonging to established connections going through that one.
+ */
+static inline struct sock *
+nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
+ const __be32 saddr, const __be32 daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in,
+ const enum nf_tproxy_lookup_t lookup_type)
+{
+ struct sock *sk;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ switch (lookup_type) {
+ case NFT_LOOKUP_LISTENER:
+ sk = inet_lookup_listener(net, &tcp_hashinfo,
+ saddr, sport,
+ daddr, dport,
+ in->ifindex);
+
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ break;
+ case NFT_LOOKUP_ESTABLISHED:
+ sk = inet_lookup_established(net, &tcp_hashinfo,
+ saddr, sport, daddr, dport,
+ in->ifindex);
+ break;
+ default:
+ BUG();
+ }
+ break;
+ case IPPROTO_UDP:
+ sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ if (sk) {
+ int connected = (sk->sk_state == TCP_ESTABLISHED);
+ int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
+
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
+ (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
+ sock_put(sk);
+ sk = NULL;
+ }
+ }
+ break;
+ default:
+ WARN_ON(1);
+ sk = NULL;
+ }
+
+ pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
+ protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
+
+ return sk;
+}
+
+#ifdef XT_TPROXY_HAVE_IPV6
+static inline struct sock *
+nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
+ const struct in6_addr *saddr, const struct in6_addr *daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in,
+ const enum nf_tproxy_lookup_t lookup_type)
+{
+ struct sock *sk;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ switch (lookup_type) {
+ case NFT_LOOKUP_LISTENER:
+ sk = inet6_lookup_listener(net, &tcp_hashinfo,
+ saddr, sport,
+ daddr, ntohs(dport),
+ in->ifindex);
+
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ break;
+ case NFT_LOOKUP_ESTABLISHED:
+ sk = __inet6_lookup_established(net, &tcp_hashinfo,
+ saddr, sport, daddr, ntohs(dport),
+ in->ifindex);
+ break;
+ default:
+ BUG();
+ }
+ break;
+ case IPPROTO_UDP:
+ sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ if (sk) {
+ int connected = (sk->sk_state == TCP_ESTABLISHED);
+ int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr);
+
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
+ (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
+ sock_put(sk);
+ sk = NULL;
+ }
+ }
+ break;
+ default:
+ WARN_ON(1);
+ sk = NULL;
+ }
+
+ pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
+ protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
+
+ return sk;
+}
+#endif
+
/**
* tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
* @skb: The skb being processed.
@@ -117,6 +275,15 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
return sk;
}
+/* assign a socket to the skb -- consumes sk */
+static void
+nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
+{
+ skb_orphan(skb);
+ skb->sk = sk;
+ skb->destructor = sock_edemux;
+}
+
static unsigned int
tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
u_int32_t mark_mask, u_int32_t mark_value)
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 68ff29f60867..fab6eea1bf38 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -202,7 +202,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
return -EINVAL;
}
if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
- pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n");
+ pr_err("ipv6 PROHIBIT (THROW, NAT ..) matching not supported\n");
return -EINVAL;
}
if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 20b15916f403..06df2b9110f5 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -19,12 +19,12 @@
#include <net/icmp.h>
#include <net/sock.h>
#include <net/inet_sock.h>
-#include <net/netfilter/nf_tproxy_core.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
#define XT_SOCKET_HAVE_IPV6 1
#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/inet6_hashtables.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif
@@ -101,6 +101,43 @@ extract_icmp4_fields(const struct sk_buff *skb,
return 0;
}
+/* "socket" match based redirection (no specific rule)
+ * ===================================================
+ *
+ * There are connections with dynamic endpoints (e.g. FTP data
+ * connection) that the user is unable to add explicit rules
+ * for. These are taken care of by a generic "socket" rule. It is
+ * assumed that the proxy application is trusted to open such
+ * connections without explicit iptables rule (except of course the
+ * generic 'socket' rule). In this case the following sockets are
+ * matched in preference order:
+ *
+ * - match: if there's a fully established connection matching the
+ * _packet_ tuple
+ *
+ * - match: if there's a non-zero bound listener (possibly with a
+ * non-local address) We don't accept zero-bound listeners, since
+ * then local services could intercept traffic going through the
+ * box.
+ */
+static struct sock *
+xt_socket_get_sock_v4(struct net *net, const u8 protocol,
+ const __be32 saddr, const __be32 daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in)
+{
+ switch (protocol) {
+ case IPPROTO_TCP:
+ return __inet_lookup(net, &tcp_hashinfo,
+ saddr, sport, daddr, dport,
+ in->ifindex);
+ case IPPROTO_UDP:
+ return udp4_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ }
+ return NULL;
+}
+
static bool
socket_match(const struct sk_buff *skb, struct xt_action_param *par,
const struct xt_socket_mtinfo1 *info)
@@ -156,9 +193,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
#endif
if (!sk)
- sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
+ sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol,
saddr, daddr, sport, dport,
- par->in, NFT_LOOKUP_ANY);
+ par->in);
if (sk) {
bool wildcard;
bool transparent = true;
@@ -265,6 +302,25 @@ extract_icmp6_fields(const struct sk_buff *skb,
return 0;
}
+static struct sock *
+xt_socket_get_sock_v6(struct net *net, const u8 protocol,
+ const struct in6_addr *saddr, const struct in6_addr *daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in)
+{
+ switch (protocol) {
+ case IPPROTO_TCP:
+ return inet6_lookup(net, &tcp_hashinfo,
+ saddr, sport, daddr, dport,
+ in->ifindex);
+ case IPPROTO_UDP:
+ return udp6_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ }
+
+ return NULL;
+}
+
static bool
socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
{
@@ -302,9 +358,9 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
}
if (!sk)
- sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+ sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto,
saddr, daddr, sport, dport,
- par->in, NFT_LOOKUP_ANY);
+ par->in);
if (sk) {
bool wildcard;
bool transparent = true;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index f85f8a2ad6cf..512718adb0d5 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -789,10 +789,6 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
struct net *net = sock_net(skb->sk);
int chains_to_skip = cb->args[0];
int fams_to_skip = cb->args[1];
- bool need_locking = chains_to_skip || fams_to_skip;
-
- if (need_locking)
- genl_lock();
for (i = chains_to_skip; i < GENL_FAM_TAB_SIZE; i++) {
n = 0;
@@ -814,9 +810,6 @@ errout:
cb->args[0] = i;
cb->args[1] = n;
- if (need_locking)
- genl_unlock();
-
return skb->len;
}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6c53dd9f5ccc..1fdf9ab91c3f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3215,9 +3215,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
if (po->tp_version == TPACKET_V3) {
lv = sizeof(struct tpacket_stats_v3);
+ st.stats3.tp_packets += st.stats3.tp_drops;
data = &st.stats3;
} else {
lv = sizeof(struct tpacket_stats);
+ st.stats1.tp_packets += st.stats1.tp_drops;
data = &st.stats1;
}
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index e62c22535be4..cd72ae57aff1 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -155,13 +155,8 @@ static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
if (sp == asoc->peer.primary_path)
printl("*");
- if (sp->ipaddr.sa.sa_family == AF_INET)
- printl("%pI4 ", &sp->ipaddr.v4.sin_addr);
- else
- printl("%pI6 ", &sp->ipaddr.v6.sin6_addr);
-
- printl("%2u %8u %8u %8u %8u %8u ",
- sp->state, sp->cwnd, sp->ssthresh,
+ printl("%pISc %2u %8u %8u %8u %8u %8u ",
+ &sp->ipaddr, sp->state, sp->cwnd, sp->ssthresh,
sp->flight_size, sp->partial_bytes_acked,
sp->pathmtu);
}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index adf1e98f4c3e..170c0abd2a01 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2664,8 +2664,8 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_NEW_KEY);
- if (IS_ERR(hdr))
- return PTR_ERR(hdr);
+ if (!hdr)
+ return -ENOBUFS;
cookie.msg = msg;
cookie.idx = key_idx;
@@ -6670,6 +6670,9 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
NL80211_CMD_TESTMODE);
struct nlattr *tmdata;
+ if (!hdr)
+ break;
+
if (nla_put_u32(skb, NL80211_ATTR_WIPHY, phy_idx)) {
genlmsg_cancel(skb, hdr);
break;
@@ -7114,9 +7117,8 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_REMAIN_ON_CHANNEL);
-
- if (IS_ERR(hdr)) {
- err = PTR_ERR(hdr);
+ if (!hdr) {
+ err = -ENOBUFS;
goto free_msg;
}
@@ -7414,9 +7416,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_FRAME);
-
- if (IS_ERR(hdr)) {
- err = PTR_ERR(hdr);
+ if (!hdr) {
+ err = -ENOBUFS;
goto free_msg;
}
}
@@ -8551,9 +8552,8 @@ static int nl80211_probe_client(struct sk_buff *skb,
hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_PROBE_CLIENT);
-
- if (IS_ERR(hdr)) {
- err = PTR_ERR(hdr);
+ if (!hdr) {
+ err = -ENOBUFS;
goto free_msg;
}
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 81c8a10d743c..20e86a95dc4e 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -976,21 +976,19 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
struct net_device *dev, u16 reason, bool wextev)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- int err;
+ int err = 0;
ASSERT_WDEV_LOCK(wdev);
kfree(wdev->connect_keys);
wdev->connect_keys = NULL;
- if (wdev->conn) {
+ if (wdev->conn)
err = cfg80211_sme_disconnect(wdev, reason);
- } else if (!rdev->ops->disconnect) {
+ else if (!rdev->ops->disconnect)
cfg80211_mlme_down(rdev, dev);
- err = 0;
- } else {
+ else if (wdev->current_bss)
err = rdev_disconnect(rdev, dev, reason);
- }
return err;
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d8da6b8c6ba8..ad8cc7bcf065 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -308,7 +308,7 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
{
BUG_ON(!policy->walk.dead);
- if (del_timer(&policy->timer))
+ if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
BUG();
security_xfrm_policy_free(policy->security);
@@ -2132,8 +2132,6 @@ restart:
* have the xfrm_state's. We need to wait for KM to
* negotiate new SA's or bail out with error.*/
if (net->xfrm.sysctl_larval_drop) {
- /* EREMOTE tells the caller to generate
- * a one-shot blackhole route. */
dst_release(dst);
xfrm_pols_put(pols, drop_pols);
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 78f66fa92449..4f8ace855864 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -499,7 +499,8 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
INIT_HLIST_NODE(&x->bydst);
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
- tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+ tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
+ CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
setup_timer(&x->rtimer, xfrm_replay_timer_handler,
(unsigned long)x);
x->curlft.add_time = get_seconds();
@@ -990,11 +991,13 @@ void xfrm_state_insert(struct xfrm_state *x)
EXPORT_SYMBOL(xfrm_state_insert);
/* xfrm_state_lock is held */
-static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
+static struct xfrm_state *__find_acq_core(struct net *net,
+ const struct xfrm_mark *m,
unsigned short family, u8 mode,
u32 reqid, u8 proto,
const xfrm_address_t *daddr,
- const xfrm_address_t *saddr, int create)
+ const xfrm_address_t *saddr,
+ int create)
{
unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
struct xfrm_state *x;
@@ -1399,9 +1402,9 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark,
EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
struct xfrm_state *
-xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto,
- const xfrm_address_t *daddr, const xfrm_address_t *saddr,
- int create, unsigned short family)
+xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
+ u8 proto, const xfrm_address_t *daddr,
+ const xfrm_address_t *saddr, int create, unsigned short family)
{
struct xfrm_state *x;