diff options
Diffstat (limited to 'net')
46 files changed, 917 insertions, 553 deletions
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 688a0419756b..857e1b8349ee 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -432,12 +432,16 @@ find_router: switch (packet_type) { case BATADV_UNICAST: - batadv_unicast_prepare_skb(skb, orig_node); + if (!batadv_unicast_prepare_skb(skb, orig_node)) + goto out; + header_len = sizeof(struct batadv_unicast_packet); break; case BATADV_UNICAST_4ADDR: - batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node, - packet_subtype); + if (!batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node, + packet_subtype)) + goto out; + header_len = sizeof(struct batadv_unicast_4addr_packet); break; default: diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 60aca9109a50..ffd5874f2592 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -161,7 +161,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) if (!pv) return; - for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { + for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) { f = __br_fdb_get(br, br->dev->dev_addr, vid); if (f && f->is_local && !f->dst) fdb_delete(br, f); @@ -730,7 +730,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], /* VID was specified, so use it. */ err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); } else { - if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) { err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); goto out; } @@ -739,7 +739,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], * specify a VLAN. To be nice, add/update entry for every * vlan on this port. */ - for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); if (err) goto out; @@ -817,7 +817,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], err = __br_fdb_delete(p, addr, vid); } else { - if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) { err = __br_fdb_delete(p, addr, 0); goto out; } @@ -827,7 +827,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], * vlan on this port. */ err = -ENOENT; - for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { err &= __br_fdb_delete(p, addr, vid); } } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 1fc30abd3a52..b9259efa636e 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -132,7 +132,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, else pv = br_get_vlan_info(br); - if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) + if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) goto done; af = nla_nest_start(skb, IFLA_AF_SPEC); @@ -140,7 +140,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; pvid = br_get_pvid(pv); - for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { vinfo.vid = vid; vinfo.flags = 0; if (vid == pvid) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index bd58b45f5f90..9a9ffe7e4019 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -108,7 +108,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) clear_bit(vid, v->vlan_bitmap); v->num_vlans--; - if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + if (bitmap_empty(v->vlan_bitmap, VLAN_N_VID)) { if (v->port_idx) rcu_assign_pointer(v->parent.port->vlan_info, NULL); else @@ -122,7 +122,7 @@ static void __vlan_flush(struct net_port_vlans *v) { smp_wmb(); v->pvid = 0; - bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN); + bitmap_zero(v->vlan_bitmap, VLAN_N_VID); if (v->port_idx) rcu_assign_pointer(v->parent.port->vlan_info, NULL); else diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 3b9d5f20bd1c..c85e71e0c7ff 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -67,39 +67,6 @@ static const u8 lowpan_ttl_values[] = {0, 1, 64, 255}; static LIST_HEAD(lowpan_devices); -/* - * Uncompression of linklocal: - * 0 -> 16 bytes from packet - * 1 -> 2 bytes from prefix - bunch of zeroes and 8 from packet - * 2 -> 2 bytes from prefix - zeroes + 2 from packet - * 3 -> 2 bytes from prefix - infer 8 bytes from lladdr - * - * NOTE: => the uncompress function does change 0xf to 0x10 - * NOTE: 0x00 => no-autoconfig => unspecified - */ -static const u8 lowpan_unc_llconf[] = {0x0f, 0x28, 0x22, 0x20}; - -/* - * Uncompression of ctx-based: - * 0 -> 0 bits from packet [unspecified / reserved] - * 1 -> 8 bytes from prefix - bunch of zeroes and 8 from packet - * 2 -> 8 bytes from prefix - zeroes + 2 from packet - * 3 -> 8 bytes from prefix - infer 8 bytes from lladdr - */ -static const u8 lowpan_unc_ctxconf[] = {0x00, 0x88, 0x82, 0x80}; - -/* - * Uncompression of ctx-base - * 0 -> 0 bits from packet - * 1 -> 2 bytes from prefix - bunch of zeroes 5 from packet - * 2 -> 2 bytes from prefix - zeroes + 3 from packet - * 3 -> 2 bytes from prefix - infer 1 bytes from lladdr - */ -static const u8 lowpan_unc_mxconf[] = {0x0f, 0x25, 0x23, 0x21}; - -/* Link local prefix */ -static const u8 lowpan_llprefix[] = {0xfe, 0x80}; - /* private device info */ struct lowpan_dev_info { struct net_device *real_dev; /* real WPAN device ptr */ @@ -191,55 +158,177 @@ lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, const struct in6_addr *ipaddr, return rol8(val, shift); } -static void -lowpan_uip_ds6_set_addr_iid(struct in6_addr *ipaddr, unsigned char *lladdr) +/* + * Uncompress address function for source and + * destination address(non-multicast). + * + * address_mode is sam value or dam value. + */ +static int +lowpan_uncompress_addr(struct sk_buff *skb, + struct in6_addr *ipaddr, + const u8 address_mode, + const struct ieee802154_addr *lladdr) { - memcpy(&ipaddr->s6_addr[8], lladdr, IEEE802154_ADDR_LEN); - /* second bit-flip (Universe/Local) is done according RFC2464 */ - ipaddr->s6_addr[8] ^= 0x02; + bool fail; + + switch (address_mode) { + case LOWPAN_IPHC_ADDR_00: + /* for global link addresses */ + fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16); + break; + case LOWPAN_IPHC_ADDR_01: + /* fe:80::XXXX:XXXX:XXXX:XXXX */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8); + break; + case LOWPAN_IPHC_ADDR_02: + /* fe:80::ff:fe00:XXXX */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + ipaddr->s6_addr[11] = 0xFF; + ipaddr->s6_addr[12] = 0xFE; + fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2); + break; + case LOWPAN_IPHC_ADDR_03: + fail = false; + switch (lladdr->addr_type) { + case IEEE802154_ADDR_LONG: + /* fe:80::XXXX:XXXX:XXXX:XXXX + * \_________________/ + * hwaddr + */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + memcpy(&ipaddr->s6_addr[8], lladdr->hwaddr, + IEEE802154_ADDR_LEN); + /* second bit-flip (Universe/Local) + * is done according RFC2464 + */ + ipaddr->s6_addr[8] ^= 0x02; + break; + case IEEE802154_ADDR_SHORT: + /* fe:80::ff:fe00:XXXX + * \__/ + * short_addr + * + * Universe/Local bit is zero. + */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + ipaddr->s6_addr[11] = 0xFF; + ipaddr->s6_addr[12] = 0xFE; + ipaddr->s6_addr16[7] = htons(lladdr->short_addr); + break; + default: + pr_debug("Invalid addr_type set\n"); + return -EINVAL; + } + break; + default: + pr_debug("Invalid address mode value: 0x%x\n", address_mode); + return -EINVAL; + } + + if (fail) { + pr_debug("Failed to fetch skb data\n"); + return -EIO; + } + + lowpan_raw_dump_inline(NULL, "Reconstructed ipv6 addr is:\n", + ipaddr->s6_addr, 16); + + return 0; } -/* - * Uncompress addresses based on a prefix and a postfix with zeroes in - * between. If the postfix is zero in length it will use the link address - * to configure the IP address (autoconf style). - * pref_post_count takes a byte where the first nibble specify prefix count - * and the second postfix count (NOTE: 15/0xf => 16 bytes copy). +/* Uncompress address function for source context + * based address(non-multicast). */ static int -lowpan_uncompress_addr(struct sk_buff *skb, struct in6_addr *ipaddr, - u8 const *prefix, u8 pref_post_count, unsigned char *lladdr) +lowpan_uncompress_context_based_src_addr(struct sk_buff *skb, + struct in6_addr *ipaddr, + const u8 sam) { - u8 prefcount = pref_post_count >> 4; - u8 postcount = pref_post_count & 0x0f; - - /* full nibble 15 => 16 */ - prefcount = (prefcount == 15 ? 16 : prefcount); - postcount = (postcount == 15 ? 16 : postcount); - - if (lladdr) - lowpan_raw_dump_inline(__func__, "linklocal address", - lladdr, IEEE802154_ADDR_LEN); - if (prefcount > 0) - memcpy(ipaddr, prefix, prefcount); - - if (prefcount + postcount < 16) - memset(&ipaddr->s6_addr[prefcount], 0, - 16 - (prefcount + postcount)); - - if (postcount > 0) { - memcpy(&ipaddr->s6_addr[16 - postcount], skb->data, postcount); - skb_pull(skb, postcount); - } else if (prefcount > 0) { - if (lladdr == NULL) - return -EINVAL; + switch (sam) { + case LOWPAN_IPHC_ADDR_00: + /* unspec address :: + * Do nothing, address is already :: + */ + break; + case LOWPAN_IPHC_ADDR_01: + /* TODO */ + case LOWPAN_IPHC_ADDR_02: + /* TODO */ + case LOWPAN_IPHC_ADDR_03: + /* TODO */ + netdev_warn(skb->dev, "SAM value 0x%x not supported\n", sam); + return -EINVAL; + default: + pr_debug("Invalid sam value: 0x%x\n", sam); + return -EINVAL; + } + + lowpan_raw_dump_inline(NULL, + "Reconstructed context based ipv6 src addr is:\n", + ipaddr->s6_addr, 16); + + return 0; +} - /* no IID based configuration if no prefix and no data */ - lowpan_uip_ds6_set_addr_iid(ipaddr, lladdr); +/* Uncompress function for multicast destination address, + * when M bit is set. + */ +static int +lowpan_uncompress_multicast_daddr(struct sk_buff *skb, + struct in6_addr *ipaddr, + const u8 dam) +{ + bool fail; + + switch (dam) { + case LOWPAN_IPHC_DAM_00: + /* 00: 128 bits. The full address + * is carried in-line. + */ + fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16); + break; + case LOWPAN_IPHC_DAM_01: + /* 01: 48 bits. The address takes + * the form ffXX::00XX:XXXX:XXXX. + */ + ipaddr->s6_addr[0] = 0xFF; + fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1); + fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[11], 5); + break; + case LOWPAN_IPHC_DAM_10: + /* 10: 32 bits. The address takes + * the form ffXX::00XX:XXXX. + */ + ipaddr->s6_addr[0] = 0xFF; + fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1); + fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[13], 3); + break; + case LOWPAN_IPHC_DAM_11: + /* 11: 8 bits. The address takes + * the form ff02::00XX. + */ + ipaddr->s6_addr[0] = 0xFF; + ipaddr->s6_addr[1] = 0x02; + fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[15], 1); + break; + default: + pr_debug("DAM value has a wrong value: 0x%x\n", dam); + return -EINVAL; + } + + if (fail) { + pr_debug("Failed to fetch skb data\n"); + return -EIO; } - pr_debug("uncompressing %d + %d => ", prefcount, postcount); - lowpan_raw_dump_inline(NULL, NULL, ipaddr->s6_addr, 16); + lowpan_raw_dump_inline(NULL, "Reconstructed ipv6 multicast addr is:\n", + ipaddr->s6_addr, 16); return 0; } @@ -702,6 +791,12 @@ lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag) skb_reserve(frame->skb, sizeof(struct ipv6hdr)); skb_put(frame->skb, frame->length); + /* copy the first control block to keep a + * trace of the link-layer addresses in case + * of a link-local compressed address + */ + memcpy(frame->skb->cb, skb->cb, sizeof(skb->cb)); + init_timer(&frame->timer); /* time out is the same as for ipv6 - 60 sec */ frame->timer.expires = jiffies + LOWPAN_FRAG_TIMEOUT; @@ -723,9 +818,9 @@ frame_err: static int lowpan_process_data(struct sk_buff *skb) { - struct ipv6hdr hdr; + struct ipv6hdr hdr = {}; u8 tmp, iphc0, iphc1, num_context = 0; - u8 *_saddr, *_daddr; + const struct ieee802154_addr *_saddr, *_daddr; int err; lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, @@ -828,8 +923,8 @@ lowpan_process_data(struct sk_buff *skb) if (lowpan_fetch_skb_u8(skb, &iphc1)) goto drop; - _saddr = mac_cb(skb)->sa.hwaddr; - _daddr = mac_cb(skb)->da.hwaddr; + _saddr = &mac_cb(skb)->sa; + _daddr = &mac_cb(skb)->da; pr_debug("iphc0 = %02x, iphc1 = %02x\n", iphc0, iphc1); @@ -868,8 +963,6 @@ lowpan_process_data(struct sk_buff *skb) hdr.priority = ((tmp >> 2) & 0x0f); hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30); - hdr.flow_lbl[1] = 0; - hdr.flow_lbl[2] = 0; break; /* * Flow Label carried in-line @@ -885,10 +978,6 @@ lowpan_process_data(struct sk_buff *skb) break; /* Traffic Class and Flow Label are elided */ case 3: /* 11b */ - hdr.priority = 0; - hdr.flow_lbl[0] = 0; - hdr.flow_lbl[1] = 0; - hdr.flow_lbl[2] = 0; break; default: break; @@ -915,10 +1004,18 @@ lowpan_process_data(struct sk_buff *skb) /* Extract SAM to the tmp variable */ tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03; - /* Source address uncompression */ - pr_debug("source address stateless compression\n"); - err = lowpan_uncompress_addr(skb, &hdr.saddr, lowpan_llprefix, - lowpan_unc_llconf[tmp], skb->data); + if (iphc1 & LOWPAN_IPHC_SAC) { + /* Source address context based uncompression */ + pr_debug("SAC bit is set. Handle context based source address.\n"); + err = lowpan_uncompress_context_based_src_addr( + skb, &hdr.saddr, tmp); + } else { + /* Source address uncompression */ + pr_debug("source address stateless compression\n"); + err = lowpan_uncompress_addr(skb, &hdr.saddr, tmp, _saddr); + } + + /* Check on error of previous branch */ if (err) goto drop; @@ -931,23 +1028,14 @@ lowpan_process_data(struct sk_buff *skb) pr_debug("dest: context-based mcast compression\n"); /* TODO: implement this */ } else { - u8 prefix[] = {0xff, 0x02}; - - pr_debug("dest: non context-based mcast compression\n"); - if (0 < tmp && tmp < 3) { - if (lowpan_fetch_skb_u8(skb, &prefix[1])) - goto drop; - } - - err = lowpan_uncompress_addr(skb, &hdr.daddr, prefix, - lowpan_unc_mxconf[tmp], NULL); + err = lowpan_uncompress_multicast_daddr( + skb, &hdr.daddr, tmp); if (err) goto drop; } } else { pr_debug("dest: stateless compression\n"); - err = lowpan_uncompress_addr(skb, &hdr.daddr, lowpan_llprefix, - lowpan_unc_llconf[tmp], skb->data); + err = lowpan_uncompress_addr(skb, &hdr.daddr, tmp, _daddr); if (err) goto drop; } diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h index 4b8f917658b5..2869c0526dad 100644 --- a/net/ieee802154/6lowpan.h +++ b/net/ieee802154/6lowpan.h @@ -193,10 +193,12 @@ /* Values of fields within the IPHC encoding second byte */ #define LOWPAN_IPHC_CID 0x80 +#define LOWPAN_IPHC_ADDR_00 0x00 +#define LOWPAN_IPHC_ADDR_01 0x01 +#define LOWPAN_IPHC_ADDR_02 0x02 +#define LOWPAN_IPHC_ADDR_03 0x03 + #define LOWPAN_IPHC_SAC 0x40 -#define LOWPAN_IPHC_SAM_00 0x00 -#define LOWPAN_IPHC_SAM_01 0x10 -#define LOWPAN_IPHC_SAM_10 0x20 #define LOWPAN_IPHC_SAM 0x30 #define LOWPAN_IPHC_SAM_BIT 4 @@ -230,4 +232,16 @@ dest = 16 bit inline */ #define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */ +static inline bool lowpan_fetch_skb(struct sk_buff *skb, + void *data, const unsigned int len) +{ + if (unlikely(!pskb_may_pull(skb, len))) + return true; + + skb_copy_from_linear_data(skb, data, len); + skb_pull(skb, len); + + return false; +} + #endif /* __6LOWPAN_H__ */ diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index a4d9126c7b51..830de3f4e293 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -857,13 +857,11 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ - itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; + if (!IS_ERR(itn->fb_tunnel_dev)) + itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; rtnl_unlock(); - if (IS_ERR(itn->fb_tunnel_dev)) - return PTR_ERR(itn->fb_tunnel_dev); - - return 0; + return PTR_RET(itn->fb_tunnel_dev); } EXPORT_SYMBOL_GPL(ip_tunnel_init_net); diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 30e4de940567..00352ce0f0de 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -118,7 +118,7 @@ static int masq_device_event(struct notifier_block *this, NF_CT_ASSERT(dev->ifindex != 0); nf_ct_iterate_cleanup(net, device_cmp, - (void *)(long)dev->ifindex); + (void *)(long)dev->ifindex, 0, 0); } return NOTIFY_DONE; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e805481eff72..727f4365bcdf 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -112,7 +112,8 @@ #define RT_FL_TOS(oldflp4) \ ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) -#define IP_MAX_MTU 0xFFF0 +/* IPv4 datagram length is stored into 16bit field (tot_len) */ +#define IP_MAX_MTU 0xFFFF #define RT_GC_TIMEOUT (300*HZ) @@ -1227,10 +1228,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = 576; } - if (mtu > IP_MAX_MTU) - mtu = IP_MAX_MTU; - - return mtu; + return min_t(unsigned int, mtu, IP_MAX_MTU); } static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ab64eea042fa..4e42c03859f4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1117,6 +1117,13 @@ new_segment: goto wait_for_memory; /* + * All packets are restored as if they have + * already been sent. + */ + if (tp->repair) + TCP_SKB_CB(skb)->when = tcp_time_stamp; + + /* * Check whether we can use HW checksum. */ if (sk->sk_route_caps & NETIF_F_ALL_CSUM) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e965cc7b87ff..ec492eae0cd7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2485,8 +2485,6 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { tcp_try_keep_open(sk); - if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) - tcp_moderate_cwnd(tp); } else { tcp_cwnd_reduction(sk, prior_unsacked, 0); } @@ -3128,11 +3126,24 @@ static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag) inet_csk(sk)->icsk_ca_state != TCP_CA_Open; } +/* Decide wheather to run the increase function of congestion control. */ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) { - const struct tcp_sock *tp = tcp_sk(sk); - return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && - !tcp_in_cwnd_reduction(sk); + if (tcp_in_cwnd_reduction(sk)) + return false; + + /* If reordering is high then always grow cwnd whenever data is + * delivered regardless of its ordering. Otherwise stay conservative + * and only grow cwnd on in-order delivery in Open state, and retain + * cwnd in Disordered state (RFC5681). A stretched ACK with + * new SACK or ECE mark may first advance cwnd here and later reduce + * cwnd in tcp_fastretrans_alert() based on more states. + */ + if (tcp_sk(sk)->reordering > sysctl_tcp_reordering) + return flag & FLAG_FORWARD_PROGRESS; + + return inet_csk(sk)->icsk_ca_state == TCP_CA_Open && + flag & FLAG_DATA_ACKED; } /* Check that window update is acceptable. @@ -3352,18 +3363,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt); acked -= tp->packets_out; + /* Advance cwnd if state allows */ + if (tcp_may_raise_cwnd(sk, flag)) + tcp_cong_avoid(sk, ack, prior_in_flight); + if (tcp_ack_is_dubious(sk, flag)) { - /* Advance CWND, if state allows this. */ - if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) - tcp_cong_avoid(sk, ack, prior_in_flight); is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); tcp_fastretrans_alert(sk, acked, prior_unsacked, is_dupack, flag); - } else { - if (flag & FLAG_DATA_ACKED) - tcp_cong_avoid(sk, ack, prior_in_flight); } - if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 05a3d45d3102..09d45d718973 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -821,8 +821,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, */ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - u16 queue_mapping, - bool nocache) + u16 queue_mapping) { const struct inet_request_sock *ireq = inet_rsk(req); struct flowi4 fl4; @@ -852,7 +851,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) { - int res = tcp_v4_send_synack(sk, NULL, req, 0, false); + int res = tcp_v4_send_synack(sk, NULL, req, 0); if (!res) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index d4943f67aff2..622a4377b397 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -46,6 +46,10 @@ static unsigned int bufsize __read_mostly = 4096; MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)"); module_param(bufsize, uint, 0); +static unsigned int fwmark __read_mostly = 0; +MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)"); +module_param(fwmark, uint, 0); + static int full __read_mostly; MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); module_param(full, int, 0); @@ -54,12 +58,16 @@ static const char procname[] = "tcpprobe"; struct tcp_log { ktime_t tstamp; - __be32 saddr, daddr; - __be16 sport, dport; + union { + struct sockaddr raw; + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } src, dst; u16 length; u32 snd_nxt; u32 snd_una; u32 snd_wnd; + u32 rcv_wnd; u32 snd_cwnd; u32 ssthresh; u32 srtt; @@ -86,19 +94,45 @@ static inline int tcp_probe_avail(void) return bufsize - tcp_probe_used() - 1; } +#define tcp_probe_copy_fl_to_si4(inet, si4, mem) \ + do { \ + si4.sin_family = AF_INET; \ + si4.sin_port = inet->inet_##mem##port; \ + si4.sin_addr.s_addr = inet->inet_##mem##addr; \ + } while (0) \ + +#if IS_ENABLED(CONFIG_IPV6) +#define tcp_probe_copy_fl_to_si6(inet, si6, mem) \ + do { \ + struct ipv6_pinfo *pi6 = inet->pinet6; \ + si6.sin6_family = AF_INET6; \ + si6.sin6_port = inet->inet_##mem##port; \ + si6.sin6_addr = pi6->mem##addr; \ + si6.sin6_flowinfo = 0; /* No need here. */ \ + si6.sin6_scope_id = 0; /* No need here. */ \ + } while (0) +#else +#define tcp_probe_copy_fl_to_si6(fl, si6, mem) \ + do { \ + memset(&si6, 0, sizeof(si6)); \ + } while (0) +#endif + /* * Hook inserted to be called before each receive packet. * Note: arguments must match tcp_rcv_established()! */ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned int len) + const struct tcphdr *th, unsigned int len) { const struct tcp_sock *tp = tcp_sk(sk); const struct inet_sock *inet = inet_sk(sk); - /* Only update if port matches */ - if ((port == 0 || ntohs(inet->inet_dport) == port || - ntohs(inet->inet_sport) == port) && + /* Only update if port or skb mark matches */ + if (((port == 0 && fwmark == 0) || + ntohs(inet->inet_dport) == port || + ntohs(inet->inet_sport) == port || + (fwmark > 0 && skb->mark == fwmark)) && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { spin_lock(&tcp_probe.lock); @@ -107,15 +141,25 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, struct tcp_log *p = tcp_probe.log + tcp_probe.head; p->tstamp = ktime_get(); - p->saddr = inet->inet_saddr; - p->sport = inet->inet_sport; - p->daddr = inet->inet_daddr; - p->dport = inet->inet_dport; + switch (sk->sk_family) { + case AF_INET: + tcp_probe_copy_fl_to_si4(inet, p->src.v4, s); + tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d); + break; + case AF_INET6: + tcp_probe_copy_fl_to_si6(inet, p->src.v6, s); + tcp_probe_copy_fl_to_si6(inet, p->dst.v6, d); + break; + default: + BUG(); + } + p->length = skb->len; p->snd_nxt = tp->snd_nxt; p->snd_una = tp->snd_una; p->snd_cwnd = tp->snd_cwnd; p->snd_wnd = tp->snd_wnd; + p->rcv_wnd = tp->rcv_wnd; p->ssthresh = tcp_current_ssthresh(sk); p->srtt = tp->srtt >> 3; @@ -157,13 +201,11 @@ static int tcpprobe_sprint(char *tbuf, int n) = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); return scnprintf(tbuf, n, - "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n", + "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n", (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec, - &p->saddr, ntohs(p->sport), - &p->daddr, ntohs(p->dport), - p->length, p->snd_nxt, p->snd_una, - p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt); + &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una, + p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd); } static ssize_t tcpprobe_read(struct file *file, char __user *buf, @@ -223,6 +265,13 @@ static __init int tcpprobe_init(void) { int ret = -ENOMEM; + /* Warning: if the function signature of tcp_rcv_established, + * has been changed, you also have to change the signature of + * jtcp_rcv_established, otherwise you end up right here! + */ + BUILD_BUG_ON(__same_type(tcp_rcv_established, + jtcp_rcv_established) == 0); + init_waitqueue_head(&tcp_probe.wait); spin_lock_init(&tcp_probe.lock); @@ -241,7 +290,8 @@ static __init int tcpprobe_init(void) if (ret) goto err1; - pr_info("probe registered (port=%d) bufsize=%u\n", port, bufsize); + pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n", + port, fwmark, bufsize); return 0; err1: remove_proc_entry(procname, init_net.proc_net); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ad12f7c43f25..2d6d1793bbfe 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -99,9 +99,9 @@ #define ACONF_DEBUG 2 #if ACONF_DEBUG >= 3 -#define ADBG(x) printk x +#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__) #else -#define ADBG(x) +#define ADBG(fmt, ...) do { if (0) printk(fmt, ##__VA_ARGS__); } while (0) #endif #define INFINITY_LIFE_TIME 0xFFFFFFFF @@ -374,9 +374,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) dev_hold(dev); if (snmp6_alloc_dev(ndev) < 0) { - ADBG((KERN_WARNING + ADBG(KERN_WARNING "%s: cannot allocate memory for statistics; dev=%s.\n", - __func__, dev->name)); + __func__, dev->name); neigh_parms_release(&nd_tbl, ndev->nd_parms); dev_put(dev); kfree(ndev); @@ -384,9 +384,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) } if (snmp6_register_dev(ndev) < 0) { - ADBG((KERN_WARNING + ADBG(KERN_WARNING "%s: cannot create /proc/net/dev_snmp6/%s\n", - __func__, dev->name)); + __func__, dev->name); neigh_parms_release(&nd_tbl, ndev->nd_parms); ndev->dead = 1; in6_dev_finish_destroy(ndev); @@ -849,7 +849,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, /* Ignore adding duplicate addresses on an interface */ if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) { - ADBG(("ipv6_add_addr: already assigned\n")); + ADBG("ipv6_add_addr: already assigned\n"); err = -EEXIST; goto out; } @@ -857,7 +857,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); if (ifa == NULL) { - ADBG(("ipv6_add_addr: malloc failed\n")); + ADBG("ipv6_add_addr: malloc failed\n"); err = -ENOBUFS; goto out; } @@ -1131,12 +1131,10 @@ retry: if (ifp->flags & IFA_F_OPTIMISTIC) addr_flags |= IFA_F_OPTIMISTIC; - ift = !max_addresses || - ipv6_count_addresses(idev) < max_addresses ? - ipv6_add_addr(idev, &addr, NULL, tmp_plen, - ipv6_addr_scope(&addr), addr_flags, - tmp_valid_lft, tmp_prefered_lft) : NULL; - if (IS_ERR_OR_NULL(ift)) { + ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen, + ipv6_addr_scope(&addr), addr_flags, + tmp_valid_lft, tmp_prefered_lft); + if (IS_ERR(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); pr_info("%s: retry temporary address regeneration\n", __func__); @@ -1814,6 +1812,16 @@ static int addrconf_ifid_gre(u8 *eui, struct net_device *dev) return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr); } +static int addrconf_ifid_ip6tnl(u8 *eui, struct net_device *dev) +{ + memcpy(eui, dev->perm_addr, 3); + memcpy(eui + 5, dev->perm_addr + 3, 3); + eui[3] = 0xFF; + eui[4] = 0xFE; + eui[0] ^= 2; + return 0; +} + static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) { switch (dev->type) { @@ -1832,6 +1840,8 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) return addrconf_ifid_eui64(eui, dev); case ARPHRD_IEEE1394: return addrconf_ifid_ieee1394(eui, dev); + case ARPHRD_TUNNEL6: + return addrconf_ifid_ip6tnl(eui, dev); } return -1; } @@ -2057,7 +2067,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) pinfo = (struct prefix_info *) opt; if (len < sizeof(struct prefix_info)) { - ADBG(("addrconf: prefix option too short\n")); + ADBG("addrconf: prefix option too short\n"); return; } @@ -2709,7 +2719,8 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_ARCNET) && (dev->type != ARPHRD_INFINIBAND) && (dev->type != ARPHRD_IEEE802154) && - (dev->type != ARPHRD_IEEE1394)) { + (dev->type != ARPHRD_IEEE1394) && + (dev->type != ARPHRD_TUNNEL6)) { /* Alas, we support only Ethernet autoconfiguration. */ return; } @@ -2795,44 +2806,6 @@ ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) return -1; } -static void ip6_tnl_add_linklocal(struct inet6_dev *idev) -{ - struct net_device *link_dev; - struct net *net = dev_net(idev->dev); - - /* first try to inherit the link-local address from the link device */ - if (idev->dev->iflink && - (link_dev = __dev_get_by_index(net, idev->dev->iflink))) { - if (!ipv6_inherit_linklocal(idev, link_dev)) - return; - } - /* then try to inherit it from any device */ - for_each_netdev(net, link_dev) { - if (!ipv6_inherit_linklocal(idev, link_dev)) - return; - } - pr_debug("init ip6-ip6: add_linklocal failed\n"); -} - -/* - * Autoconfigure tunnel with a link-local address so routing protocols, - * DHCPv6, MLD etc. can be run over the virtual link - */ - -static void addrconf_ip6_tnl_config(struct net_device *dev) -{ - struct inet6_dev *idev; - - ASSERT_RTNL(); - - idev = addrconf_add_dev(dev); - if (IS_ERR(idev)) { - pr_debug("init ip6-ip6: add_dev failed\n"); - return; - } - ip6_tnl_add_linklocal(idev); -} - static int addrconf_notify(struct notifier_block *this, unsigned long event, void *ptr) { @@ -2900,9 +2873,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, addrconf_gre_config(dev); break; #endif - case ARPHRD_TUNNEL6: - addrconf_ip6_tnl_config(dev); - break; case ARPHRD_LOOPBACK: init_loopback(dev); break; @@ -3637,8 +3607,8 @@ restart: if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX)) next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX; - ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", - now, next, next_sec, next_sched)); + ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", + now, next, next_sec, next_sched); addr_chk_timer.expires = next_sched; add_timer(&addr_chk_timer); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index cc3bb201b8b0..d6e00a39274c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -41,6 +41,7 @@ #include <linux/netfilter_ipv6.h> #include <linux/slab.h> #include <linux/hash.h> +#include <linux/etherdevice.h> #include <asm/uaccess.h> #include <linux/atomic.h> @@ -1471,6 +1472,9 @@ static void ip6_tnl_dev_setup(struct net_device *dev) dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + /* This perm addr will be used as interface identifier by IPv6 */ + dev->addr_assign_type = NET_ADDR_RANDOM; + eth_random_addr(dev->perm_addr); } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 6c76df9909bf..98ead2b1a669 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -107,9 +107,12 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, struct inet6_dev *idev); - #define MLD_QRV_DEFAULT 2 +/* RFC3810, 8.1 Query Version Distinctions */ +#define MLD_V1_QUERY_LEN 24 +#define MLD_V2_QUERY_LEN_MIN 28 + #define MLD_V1_SEEN(idev) (dev_net((idev)->dev)->ipv6.devconf_all->force_mld_version == 1 || \ (idev)->cnf.force_mld_version == 1 || \ ((idev)->mc_v1_seen && \ @@ -996,24 +999,24 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, static void mld_gq_start_timer(struct inet6_dev *idev) { - int tv = net_random() % idev->mc_maxdelay; + unsigned long tv = net_random() % idev->mc_maxdelay; idev->mc_gq_running = 1; if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2)) in6_dev_hold(idev); } -static void mld_ifc_start_timer(struct inet6_dev *idev, int delay) +static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay) { - int tv = net_random() % delay; + unsigned long tv = net_random() % delay; if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2)) in6_dev_hold(idev); } -static void mld_dad_start_timer(struct inet6_dev *idev, int delay) +static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay) { - int tv = net_random() % delay; + unsigned long tv = net_random() % delay; if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2)) in6_dev_hold(idev); @@ -1146,13 +1149,11 @@ int igmp6_event_query(struct sk_buff *skb) !(group_type&IPV6_ADDR_MULTICAST)) return -EINVAL; - if (len == 24) { + if (len == MLD_V1_QUERY_LEN) { int switchback; /* MLDv1 router present */ - /* Translate milliseconds to jiffies */ - max_delay = (ntohs(mld->mld_maxdelay)*HZ)/1000; - + max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay)); switchback = (idev->mc_qrv + 1) * max_delay; idev->mc_v1_seen = jiffies + switchback; @@ -1162,17 +1163,18 @@ int igmp6_event_query(struct sk_buff *skb) __in6_dev_put(idev); /* clear deleted report items */ mld_clear_delrec(idev); - } else if (len >= 28) { + } else if (len >= MLD_V2_QUERY_LEN_MIN) { int srcs_offset = sizeof(struct mld2_query) - sizeof(struct icmp6hdr); if (!pskb_may_pull(skb, srcs_offset)) return -EINVAL; mlh2 = (struct mld2_query *)skb_transport_header(skb); - max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000; - if (!max_delay) - max_delay = 1; + + max_delay = max(msecs_to_jiffies(MLDV2_MRC(ntohs(mlh2->mld2q_mrc))), 1UL); + idev->mc_maxdelay = max_delay; + if (mlh2->mld2q_qrv) idev->mc_qrv = mlh2->mld2q_qrv; if (group_type == IPV6_ADDR_ANY) { /* general query */ diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 79aa9652ed86..04d31c2fbef1 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1369,8 +1369,10 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) return; - if (!ndopts.nd_opts_rh) + if (!ndopts.nd_opts_rh) { + ip6_redirect_no_header(skb, dev_net(skb->dev), 0, 0); return; + } hdr = (u8 *)ndopts.nd_opts_rh; hdr += 8; diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index 47bff6107519..3e4e92d5e157 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -76,7 +76,7 @@ static int masq_device_event(struct notifier_block *this, if (event == NETDEV_DOWN) nf_ct_iterate_cleanup(net, device_cmp, - (void *)(long)dev->ifindex); + (void *)(long)dev->ifindex, 0, 0); return NOTIFY_DONE; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 790d9f4b8b0b..1aeb473b2cc6 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -490,6 +490,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, ipv6_hdr(head)->payload_len = htons(payload_len); ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); IP6CB(head)->nhoff = nhoff; + IP6CB(head)->flags |= IP6SKB_FRAGMENTED; /* Yes, and fold redundant checksum back. 8) */ if (head->ip_summed == CHECKSUM_COMPLETE) @@ -524,6 +525,9 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct net *net = dev_net(skb_dst(skb)->dev); int evicted; + if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) + goto fail_hdr; + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); /* Jumbo payload inhibits frag. header */ @@ -544,6 +548,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS); IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); + IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; return 1; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e22c4db8d07a..55236a84c748 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1177,6 +1177,27 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) } EXPORT_SYMBOL_GPL(ip6_redirect); +void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, + u32 mark) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb); + struct dst_entry *dst; + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = oif; + fl6.flowi6_mark = mark; + fl6.flowi6_flags = 0; + fl6.daddr = msg->dest; + fl6.saddr = iph->daddr; + + dst = ip6_route_output(net, NULL, &fl6); + if (!dst->error) + rt6_do_redirect(dst, NULL, skb); + dst_release(dst); +} + void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) { ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); diff --git a/net/key/af_key.c b/net/key/af_key.c index ab8bd2cabfa0..9d585370c5b4 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -45,7 +45,7 @@ struct netns_pfkey { static DEFINE_MUTEX(pfkey_mutex); #define DUMMY_MARK 0 -static struct xfrm_mark dummy_mark = {0, 0}; +static const struct xfrm_mark dummy_mark = {0, 0}; struct pfkey_sock { /* struct sock must be the first member of struct pfkey_sock */ struct sock sk; @@ -338,7 +338,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk) return 0; } -static u8 sadb_ext_min_len[] = { +static const u8 sadb_ext_min_len[] = { [SADB_EXT_RESERVED] = (u8) 0, [SADB_EXT_SA] = (u8) sizeof(struct sadb_sa), [SADB_EXT_LIFETIME_CURRENT] = (u8) sizeof(struct sadb_lifetime), @@ -1196,10 +1196,6 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, x->props.family = pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_SRC-1], &x->props.saddr); - if (!x->props.family) { - err = -EAFNOSUPPORT; - goto out; - } pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1], &x->id.daddr); @@ -2205,10 +2201,6 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr); - if (!xp->family) { - err = -EINVAL; - goto out; - } xp->selector.family = xp->family; xp->selector.prefixlen_s = sa->sadb_address_prefixlen; xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); @@ -2737,7 +2729,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs); -static pfkey_handler pfkey_funcs[SADB_MAX + 1] = { +static const pfkey_handler pfkey_funcs[SADB_MAX + 1] = { [SADB_RESERVED] = pfkey_reserved, [SADB_GETSPI] = pfkey_getspi, [SADB_UPDATE] = pfkey_add, diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 56d22cae5906..c45fc1a60e0d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -410,20 +410,6 @@ config NF_NAT_TFTP endif # NF_CONNTRACK -# transparent proxy support -config NETFILTER_TPROXY - tristate "Transparent proxying support" - depends on IP_NF_MANGLE - depends on NETFILTER_ADVANCED - help - This option enables transparent proxying support, that is, - support for handling non-locally bound IPv4 TCP and UDP sockets. - For it to work you will have to configure certain iptables rules - and use policy routing. For more information on how to set it up - see Documentation/networking/tproxy.txt. - - To compile it as a module, choose M here. If unsure, say N. - config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n @@ -720,10 +706,10 @@ config NETFILTER_XT_TARGET_TEE this clone be rerouted to another nexthop. config NETFILTER_XT_TARGET_TPROXY - tristate '"TPROXY" target support' - depends on NETFILTER_TPROXY + tristate '"TPROXY" target transparent proxying support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED + depends on IP_NF_MANGLE select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES help @@ -731,6 +717,9 @@ config NETFILTER_XT_TARGET_TPROXY REDIRECT. It can only be used in the mangle table and is useful to redirect traffic to a transparent proxy. It does _not_ depend on Netfilter connection tracking and NAT, unlike REDIRECT. + For it to work you will have to configure certain iptables rules + and use policy routing. For more information on how to set it up + see Documentation/networking/tproxy.txt. To compile it as a module, choose M here. If unsure, say N. @@ -1180,7 +1169,6 @@ config NETFILTER_XT_MATCH_SCTP config NETFILTER_XT_MATCH_SOCKET tristate '"socket" match support' - depends on NETFILTER_TPROXY depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED depends on !NF_CONNTRACK || NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index a1abf87d43bf..ebfa7dc747cd 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -61,9 +61,6 @@ obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o -# transparent proxy support -obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o - # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 2217363ab422..593b16ea45e0 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -234,12 +234,13 @@ EXPORT_SYMBOL(skb_make_writable); /* This does not belong here, but locally generated errors need it if connection tracking in use: without this, connection may not be in hash table, and hence manufactured ICMP or RST packets will not be associated with it. */ -void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly; +void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) + __rcu __read_mostly; EXPORT_SYMBOL(ip_ct_attach); -void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) +void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb) { - void (*attach)(struct sk_buff *, struct sk_buff *); + void (*attach)(struct sk_buff *, const struct sk_buff *); if (skb->nfct) { rcu_read_lock(); diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 3cd85b2fc67c..5199448697f6 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -414,7 +414,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc) spin_lock_bh(&svc->sched_lock); tbl->dead = 1; - for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { + for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) { hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { ip_vs_lblcr_free(en); } @@ -440,7 +440,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) struct ip_vs_lblcr_entry *en; struct hlist_node *next; - for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { + for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLCR_TAB_MASK; spin_lock(&svc->sched_lock); @@ -495,7 +495,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) if (goal > tbl->max_size/2) goal = tbl->max_size/2; - for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { + for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLCR_TAB_MASK; spin_lock(&svc->sched_lock); @@ -536,7 +536,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) /* * Initialize the hash buckets */ - for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { + for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) { INIT_HLIST_HEAD(&tbl->bucket[i]); } tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index f16c027df15b..3588faebe529 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -269,14 +269,20 @@ ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph) switch (iph->protocol) { case IPPROTO_TCP: th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); + if (unlikely(th == NULL)) + return 0; port = th->source; break; case IPPROTO_UDP: uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); + if (unlikely(uh == NULL)) + return 0; port = uh->source; break; case IPPROTO_SCTP: sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); + if (unlikely(sh == NULL)) + return 0; port = sh->source; break; default: diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0283baedcdfb..da6f1787a102 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -238,7 +238,7 @@ destroy_conntrack(struct nf_conntrack *nfct) nf_conntrack_free(ct); } -void nf_ct_delete_from_lists(struct nf_conn *ct) +static void nf_ct_delete_from_lists(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); @@ -253,7 +253,6 @@ void nf_ct_delete_from_lists(struct nf_conn *ct) &net->ct.dying); spin_unlock_bh(&nf_conntrack_lock); } -EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists); static void death_by_event(unsigned long ul_conntrack) { @@ -275,7 +274,7 @@ static void death_by_event(unsigned long ul_conntrack) nf_ct_put(ct); } -void nf_ct_dying_timeout(struct nf_conn *ct) +static void nf_ct_dying_timeout(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); @@ -288,27 +287,33 @@ void nf_ct_dying_timeout(struct nf_conn *ct) (prandom_u32() % net->ct.sysctl_events_retry_timeout); add_timer(&ecache->timeout); } -EXPORT_SYMBOL_GPL(nf_ct_dying_timeout); -static void death_by_timeout(unsigned long ul_conntrack) +bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) { - struct nf_conn *ct = (void *)ul_conntrack; struct nf_conn_tstamp *tstamp; tstamp = nf_conn_tstamp_find(ct); if (tstamp && tstamp->stop == 0) tstamp->stop = ktime_to_ns(ktime_get_real()); - if (!test_bit(IPS_DYING_BIT, &ct->status) && - unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { + if (!nf_ct_is_dying(ct) && + unlikely(nf_conntrack_event_report(IPCT_DESTROY, ct, + portid, report) < 0)) { /* destroy event was not delivered */ nf_ct_delete_from_lists(ct); nf_ct_dying_timeout(ct); - return; + return false; } set_bit(IPS_DYING_BIT, &ct->status); nf_ct_delete_from_lists(ct); nf_ct_put(ct); + return true; +} +EXPORT_SYMBOL_GPL(nf_ct_delete); + +static void death_by_timeout(unsigned long ul_conntrack) +{ + nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0); } /* @@ -643,10 +648,7 @@ static noinline int early_drop(struct net *net, unsigned int hash) return dropped; if (del_timer(&ct->timeout)) { - death_by_timeout((unsigned long)ct); - /* Check if we indeed killed this entry. Reliable event - delivery may have inserted it into the dying list. */ - if (test_bit(IPS_DYING_BIT, &ct->status)) { + if (nf_ct_delete(ct, 0, 0)) { dropped = 1; NF_CT_STAT_INC_ATOMIC(net, early_drop); } @@ -1192,7 +1194,7 @@ EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size); #endif /* Used by ipt_REJECT and ip6t_REJECT. */ -static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) +static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -1244,7 +1246,7 @@ found: void nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), - void *data) + void *data, u32 portid, int report) { struct nf_conn *ct; unsigned int bucket = 0; @@ -1252,7 +1254,8 @@ void nf_ct_iterate_cleanup(struct net *net, while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { /* Time to push up daises... */ if (del_timer(&ct->timeout)) - death_by_timeout((unsigned long)ct); + nf_ct_delete(ct, portid, report); + /* ... else the timer will get him soon. */ nf_ct_put(ct); @@ -1260,30 +1263,6 @@ void nf_ct_iterate_cleanup(struct net *net, } EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); -struct __nf_ct_flush_report { - u32 portid; - int report; -}; - -static int kill_report(struct nf_conn *i, void *data) -{ - struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; - struct nf_conn_tstamp *tstamp; - - tstamp = nf_conn_tstamp_find(i); - if (tstamp && tstamp->stop == 0) - tstamp->stop = ktime_to_ns(ktime_get_real()); - - /* If we fail to deliver the event, death_by_timeout() will retry */ - if (nf_conntrack_event_report(IPCT_DESTROY, i, - fr->portid, fr->report) < 0) - return 1; - - /* Avoid the delivery of the destroy event in death_by_timeout(). */ - set_bit(IPS_DYING_BIT, &i->status); - return 1; -} - static int kill_all(struct nf_conn *i, void *data) { return 1; @@ -1301,11 +1280,7 @@ EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); void nf_conntrack_flush_report(struct net *net, u32 portid, int report) { - struct __nf_ct_flush_report fr = { - .portid = portid, - .report = report, - }; - nf_ct_iterate_cleanup(net, kill_report, &fr); + nf_ct_iterate_cleanup(net, kill_all, NULL, portid, report); } EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); @@ -1386,7 +1361,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) i_see_dead_people: busy = 0; list_for_each_entry(net, net_exit_list, exit_list) { - nf_ct_iterate_cleanup(net, kill_all, NULL); + nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0); nf_ct_release_dying_list(net); if (atomic_read(&net->ct.count) != 0) busy = 1; @@ -1692,7 +1667,7 @@ err_stat: return ret; } -s16 (*nf_ct_nat_offset)(const struct nf_conn *ct, +s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq); EXPORT_SYMBOL_GPL(nf_ct_nat_offset); diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index 355d2ef08094..bb53f120e79c 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -8,12 +8,8 @@ * published by the Free Software Foundation. */ -#include <linux/ctype.h> #include <linux/export.h> -#include <linux/jhash.h> -#include <linux/spinlock.h> #include <linux/types.h> -#include <linux/slab.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_labels.h> diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index edc410e778f7..fa61fea63234 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1038,21 +1038,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, } } - if (del_timer(&ct->timeout)) { - if (nf_conntrack_event_report(IPCT_DESTROY, ct, - NETLINK_CB(skb).portid, - nlmsg_report(nlh)) < 0) { - nf_ct_delete_from_lists(ct); - /* we failed to report the event, try later */ - nf_ct_dying_timeout(ct); - nf_ct_put(ct); - return 0; - } - /* death_by_timeout would report the event again */ - set_bit(IPS_DYING_BIT, &ct->status); - nf_ct_delete_from_lists(ct); - nf_ct_put(ct); - } + if (del_timer(&ct->timeout)) + nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); + nf_ct_put(ct); return 0; @@ -1999,6 +1987,27 @@ out: return err == -EAGAIN ? -ENOBUFS : err; } +static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { + [CTA_EXPECT_MASTER] = { .type = NLA_NESTED }, + [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED }, + [CTA_EXPECT_MASK] = { .type = NLA_NESTED }, + [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 }, + [CTA_EXPECT_ID] = { .type = NLA_U32 }, + [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING, + .len = NF_CT_HELPER_NAME_LEN - 1 }, + [CTA_EXPECT_ZONE] = { .type = NLA_U16 }, + [CTA_EXPECT_FLAGS] = { .type = NLA_U32 }, + [CTA_EXPECT_CLASS] = { .type = NLA_U32 }, + [CTA_EXPECT_NAT] = { .type = NLA_NESTED }, + [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING }, +}; + +static struct nf_conntrack_expect * +ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct, + struct nf_conntrack_helper *helper, + struct nf_conntrack_tuple *tuple, + struct nf_conntrack_tuple *mask); + #ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT static size_t ctnetlink_nfqueue_build_size(const struct nf_conn *ct) @@ -2139,10 +2148,69 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) return ret; } +static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda, + const struct nf_conn *ct, + struct nf_conntrack_tuple *tuple, + struct nf_conntrack_tuple *mask) +{ + int err; + + err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE, + nf_ct_l3num(ct)); + if (err < 0) + return err; + + return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK, + nf_ct_l3num(ct)); +} + +static int +ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, + u32 portid, u32 report) +{ + struct nlattr *cda[CTA_EXPECT_MAX+1]; + struct nf_conntrack_tuple tuple, mask; + struct nf_conntrack_helper *helper; + struct nf_conntrack_expect *exp; + int err; + + err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy); + if (err < 0) + return err; + + err = ctnetlink_nfqueue_exp_parse((const struct nlattr * const *)cda, + ct, &tuple, &mask); + if (err < 0) + return err; + + if (cda[CTA_EXPECT_HELP_NAME]) { + const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]); + + helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), + nf_ct_protonum(ct)); + if (helper == NULL) + return -EOPNOTSUPP; + } + + exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct, + helper, &tuple, &mask); + if (IS_ERR(exp)) + return PTR_ERR(exp); + + err = nf_ct_expect_related_report(exp, portid, report); + if (err < 0) { + nf_ct_expect_put(exp); + return err; + } + + return 0; +} + static struct nfq_ct_hook ctnetlink_nfqueue_hook = { .build_size = ctnetlink_nfqueue_build_size, .build = ctnetlink_nfqueue_build, .parse = ctnetlink_nfqueue_parse, + .attach_expect = ctnetlink_nfqueue_attach_expect, }; #endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */ @@ -2510,21 +2578,6 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb, return err; } -static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { - [CTA_EXPECT_MASTER] = { .type = NLA_NESTED }, - [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED }, - [CTA_EXPECT_MASK] = { .type = NLA_NESTED }, - [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 }, - [CTA_EXPECT_ID] = { .type = NLA_U32 }, - [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING, - .len = NF_CT_HELPER_NAME_LEN - 1 }, - [CTA_EXPECT_ZONE] = { .type = NLA_U16 }, - [CTA_EXPECT_FLAGS] = { .type = NLA_U32 }, - [CTA_EXPECT_CLASS] = { .type = NLA_U32 }, - [CTA_EXPECT_NAT] = { .type = NLA_NESTED }, - [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING }, -}; - static int ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, @@ -2747,76 +2800,26 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, #endif } -static int -ctnetlink_create_expect(struct net *net, u16 zone, - const struct nlattr * const cda[], - u_int8_t u3, - u32 portid, int report) +static struct nf_conntrack_expect * +ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, + struct nf_conntrack_helper *helper, + struct nf_conntrack_tuple *tuple, + struct nf_conntrack_tuple *mask) { - struct nf_conntrack_tuple tuple, mask, master_tuple; - struct nf_conntrack_tuple_hash *h = NULL; + u_int32_t class = 0; struct nf_conntrack_expect *exp; - struct nf_conn *ct; struct nf_conn_help *help; - struct nf_conntrack_helper *helper = NULL; - u_int32_t class = 0; - int err = 0; - - /* caller guarantees that those three CTA_EXPECT_* exist */ - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); - if (err < 0) - return err; - err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3); - if (err < 0) - return err; - err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3); - if (err < 0) - return err; - - /* Look for master conntrack of this expectation */ - h = nf_conntrack_find_get(net, zone, &master_tuple); - if (!h) - return -ENOENT; - ct = nf_ct_tuplehash_to_ctrack(h); - - /* Look for helper of this expectation */ - if (cda[CTA_EXPECT_HELP_NAME]) { - const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]); - - helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), - nf_ct_protonum(ct)); - if (helper == NULL) { -#ifdef CONFIG_MODULES - if (request_module("nfct-helper-%s", helpname) < 0) { - err = -EOPNOTSUPP; - goto out; - } - - helper = __nf_conntrack_helper_find(helpname, - nf_ct_l3num(ct), - nf_ct_protonum(ct)); - if (helper) { - err = -EAGAIN; - goto out; - } -#endif - err = -EOPNOTSUPP; - goto out; - } - } + int err; if (cda[CTA_EXPECT_CLASS] && helper) { class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS])); - if (class > helper->expect_class_max) { - err = -EINVAL; - goto out; - } + if (class > helper->expect_class_max) + return ERR_PTR(-EINVAL); } exp = nf_ct_expect_alloc(ct); - if (!exp) { - err = -ENOMEM; - goto out; - } + if (!exp) + return ERR_PTR(-ENOMEM); + help = nfct_help(ct); if (!help) { if (!cda[CTA_EXPECT_TIMEOUT]) { @@ -2854,21 +2857,89 @@ ctnetlink_create_expect(struct net *net, u16 zone, exp->class = class; exp->master = ct; exp->helper = helper; - memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple)); - memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3)); - exp->mask.src.u.all = mask.src.u.all; + exp->tuple = *tuple; + exp->mask.src.u3 = mask->src.u3; + exp->mask.src.u.all = mask->src.u.all; if (cda[CTA_EXPECT_NAT]) { err = ctnetlink_parse_expect_nat(cda[CTA_EXPECT_NAT], - exp, u3); + exp, nf_ct_l3num(ct)); if (err < 0) goto err_out; } - err = nf_ct_expect_related_report(exp, portid, report); + return exp; err_out: nf_ct_expect_put(exp); -out: - nf_ct_put(nf_ct_tuplehash_to_ctrack(h)); + return ERR_PTR(err); +} + +static int +ctnetlink_create_expect(struct net *net, u16 zone, + const struct nlattr * const cda[], + u_int8_t u3, u32 portid, int report) +{ + struct nf_conntrack_tuple tuple, mask, master_tuple; + struct nf_conntrack_tuple_hash *h = NULL; + struct nf_conntrack_helper *helper = NULL; + struct nf_conntrack_expect *exp; + struct nf_conn *ct; + int err; + + /* caller guarantees that those three CTA_EXPECT_* exist */ + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + if (err < 0) + return err; + err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3); + if (err < 0) + return err; + err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3); + if (err < 0) + return err; + + /* Look for master conntrack of this expectation */ + h = nf_conntrack_find_get(net, zone, &master_tuple); + if (!h) + return -ENOENT; + ct = nf_ct_tuplehash_to_ctrack(h); + + if (cda[CTA_EXPECT_HELP_NAME]) { + const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]); + + helper = __nf_conntrack_helper_find(helpname, u3, + nf_ct_protonum(ct)); + if (helper == NULL) { +#ifdef CONFIG_MODULES + if (request_module("nfct-helper-%s", helpname) < 0) { + err = -EOPNOTSUPP; + goto err_ct; + } + helper = __nf_conntrack_helper_find(helpname, u3, + nf_ct_protonum(ct)); + if (helper) { + err = -EAGAIN; + goto err_ct; + } +#endif + err = -EOPNOTSUPP; + goto err_ct; + } + } + + exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask); + if (IS_ERR(exp)) { + err = PTR_ERR(exp); + goto err_ct; + } + + err = nf_ct_expect_related_report(exp, portid, report); + if (err < 0) + goto err_exp; + + return 0; +err_exp: + nf_ct_expect_put(exp); +err_ct: + nf_ct_put(ct); return err; } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 0ab9636ac57e..ce3004156eeb 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -281,7 +281,7 @@ void nf_ct_l3proto_pernet_unregister(struct net *net, nf_ct_l3proto_unregister_sysctl(net, proto); /* Remove all contrack entries for this protocol */ - nf_ct_iterate_cleanup(net, kill_l3proto, proto); + nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0); } EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister); @@ -476,7 +476,7 @@ void nf_ct_l4proto_pernet_unregister(struct net *net, nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); /* Remove all contrack entries for this protocol */ - nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); + nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0); } EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 2f8010707d01..d224e001f14f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -496,7 +496,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, } #ifdef CONFIG_NF_NAT_NEEDED -static inline s16 nat_offset(const struct nf_conn *ct, +static inline s32 nat_offset(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq) { @@ -525,7 +525,7 @@ static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; - s16 receiver_offset; + s32 receiver_offset; bool res, in_recv_win; /* diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 038eee5c8f85..6ff808375b5e 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -497,7 +497,7 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) rtnl_lock(); for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); + nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0); rtnl_unlock(); } @@ -511,7 +511,7 @@ static void nf_nat_l3proto_clean(u8 l3proto) rtnl_lock(); for_each_net(net) - nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); + nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0); rtnl_unlock(); } @@ -749,7 +749,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) { struct nf_nat_proto_clean clean = {}; - nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean); + nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean, 0, 0); synchronize_rcu(); nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); } diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 85e20a919081..46b9baa845a6 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -30,8 +30,6 @@ pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \ x->offset_before, x->offset_after, x->correction_pos); -static DEFINE_SPINLOCK(nf_nat_seqofs_lock); - /* Setup TCP sequence correction given this change at this sequence */ static inline void adjust_tcp_sequence(u32 seq, @@ -49,7 +47,7 @@ adjust_tcp_sequence(u32 seq, pr_debug("adjust_tcp_sequence: Seq_offset before: "); DUMP_OFFSET(this_way); - spin_lock_bh(&nf_nat_seqofs_lock); + spin_lock_bh(&ct->lock); /* SYN adjust. If it's uninitialized, or this is after last * correction, record it: we don't handle more than one @@ -61,31 +59,26 @@ adjust_tcp_sequence(u32 seq, this_way->offset_before = this_way->offset_after; this_way->offset_after += sizediff; } - spin_unlock_bh(&nf_nat_seqofs_lock); + spin_unlock_bh(&ct->lock); pr_debug("adjust_tcp_sequence: Seq_offset after: "); DUMP_OFFSET(this_way); } -/* Get the offset value, for conntrack */ -s16 nf_nat_get_offset(const struct nf_conn *ct, +/* Get the offset value, for conntrack. Caller must have the conntrack locked */ +s32 nf_nat_get_offset(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq) { struct nf_conn_nat *nat = nfct_nat(ct); struct nf_nat_seq *this_way; - s16 offset; if (!nat) return 0; this_way = &nat->seq[dir]; - spin_lock_bh(&nf_nat_seqofs_lock); - offset = after(seq, this_way->correction_pos) + return after(seq, this_way->correction_pos) ? this_way->offset_after : this_way->offset_before; - spin_unlock_bh(&nf_nat_seqofs_lock); - - return offset; } /* Frobs data inside this packet, which is linear. */ @@ -143,7 +136,7 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) } void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - __be32 seq, s16 off) + __be32 seq, s32 off) { if (!off) return; @@ -370,9 +363,10 @@ nf_nat_seq_adjust(struct sk_buff *skb, struct tcphdr *tcph; int dir; __be32 newseq, newack; - s16 seqoff, ackoff; + s32 seqoff, ackoff; struct nf_conn_nat *nat = nfct_nat(ct); struct nf_nat_seq *this_way, *other_way; + int res; dir = CTINFO2DIR(ctinfo); @@ -383,6 +377,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, return 0; tcph = (void *)skb->data + protoff; + spin_lock_bh(&ct->lock); if (after(ntohl(tcph->seq), this_way->correction_pos)) seqoff = this_way->offset_after; else @@ -407,7 +402,10 @@ nf_nat_seq_adjust(struct sk_buff *skb, tcph->seq = newseq; tcph->ack_seq = newack; - return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo); + res = nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo); + spin_unlock_bh(&ct->lock); + + return res; } /* Setup NAT on this expected conntrack so it follows master. */ diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c deleted file mode 100644 index 474d621cbc2e..000000000000 --- a/net/netfilter/nf_tproxy_core.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Transparent proxy support for Linux/iptables - * - * Copyright (c) 2006-2007 BalaBit IT Ltd. - * Author: Balazs Scheidler, Krisztian Kovacs - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include <linux/module.h> - -#include <linux/net.h> -#include <linux/if.h> -#include <linux/netdevice.h> -#include <net/udp.h> -#include <net/netfilter/nf_tproxy_core.h> - - -static void -nf_tproxy_destructor(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - - skb->sk = NULL; - skb->destructor = NULL; - - if (sk) - sock_put(sk); -} - -/* consumes sk */ -void -nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) -{ - /* assigning tw sockets complicates things; most - * skb->sk->X checks would have to test sk->sk_state first */ - if (sk->sk_state == TCP_TIME_WAIT) { - inet_twsk_put(inet_twsk(sk)); - return; - } - - skb_orphan(skb); - skb->sk = sk; - skb->destructor = nf_tproxy_destructor; -} -EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock); - -static int __init nf_tproxy_init(void) -{ - pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n"); - pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n"); - return 0; -} - -module_init(nf_tproxy_init); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Krisztian Kovacs"); -MODULE_DESCRIPTION("Transparent proxy support core routines"); diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 8a703c3dd318..95a98c8c1da6 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -862,6 +862,7 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { [NFQA_MARK] = { .type = NLA_U32 }, [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, [NFQA_CT] = { .type = NLA_UNSPEC }, + [NFQA_EXP] = { .type = NLA_UNSPEC }, }; static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { @@ -990,9 +991,14 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, if (entry == NULL) return -ENOENT; - rcu_read_lock(); - if (nfqa[NFQA_CT] && (queue->flags & NFQA_CFG_F_CONNTRACK)) + if (nfqa[NFQA_CT]) { ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo); + if (ct && nfqa[NFQA_EXP]) { + nfqnl_attach_expect(ct, nfqa[NFQA_EXP], + NETLINK_CB(skb).portid, + nlmsg_report(nlh)); + } + } if (nfqa[NFQA_PAYLOAD]) { u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]); @@ -1005,7 +1011,6 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, if (ct) nfqnl_ct_seq_adjust(skb, ct, ctinfo, diff); } - rcu_read_unlock(); if (nfqa[NFQA_MARK]) entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c index ab61d66bc0b9..be893039966d 100644 --- a/net/netfilter/nfnetlink_queue_ct.c +++ b/net/netfilter/nfnetlink_queue_ct.c @@ -96,3 +96,18 @@ void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, if ((ct->status & IPS_NAT_MASK) && diff) nfq_nat_ct->seq_adjust(skb, ct, ctinfo, diff); } + +int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr, + u32 portid, u32 report) +{ + struct nfq_ct_hook *nfq_ct; + + if (nf_ct_is_untracked(ct)) + return 0; + + nfq_ct = rcu_dereference(nfq_ct_hook); + if (nfq_ct == NULL) + return -EOPNOTSUPP; + + return nfq_ct->attach_expect(attr, ct, portid, report); +} diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index d7f195388f66..5d8a3a3cd5a7 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -15,7 +15,9 @@ #include <linux/ip.h> #include <net/checksum.h> #include <net/udp.h> +#include <net/tcp.h> #include <net/inet_sock.h> +#include <net/inet_hashtables.h> #include <linux/inetdevice.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> @@ -26,13 +28,18 @@ #define XT_TPROXY_HAVE_IPV6 1 #include <net/if_inet6.h> #include <net/addrconf.h> +#include <net/inet6_hashtables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #endif -#include <net/netfilter/nf_tproxy_core.h> #include <linux/netfilter/xt_TPROXY.h> +enum nf_tproxy_lookup_t { + NFT_LOOKUP_LISTENER, + NFT_LOOKUP_ESTABLISHED, +}; + static bool tproxy_sk_is_transparent(struct sock *sk) { if (sk->sk_state != TCP_TIME_WAIT) { @@ -68,6 +75,157 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) return laddr ? laddr : daddr; } +/* + * This is used when the user wants to intercept a connection matching + * an explicit iptables rule. In this case the sockets are assumed + * matching in preference order: + * + * - match: if there's a fully established connection matching the + * _packet_ tuple, it is returned, assuming the redirection + * already took place and we process a packet belonging to an + * established connection + * + * - match: if there's a listening socket matching the redirection + * (e.g. on-port & on-ip of the connection), it is returned, + * regardless if it was bound to 0.0.0.0 or an explicit + * address. The reasoning is that if there's an explicit rule, it + * does not really matter if the listener is bound to an interface + * or to 0. The user already stated that he wants redirection + * (since he added the rule). + * + * Please note that there's an overlap between what a TPROXY target + * and a socket match will match. Normally if you have both rules the + * "socket" match will be the first one, effectively all packets + * belonging to established connections going through that one. + */ +static inline struct sock * +nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, + const enum nf_tproxy_lookup_t lookup_type) +{ + struct sock *sk; + + switch (protocol) { + case IPPROTO_TCP: + switch (lookup_type) { + case NFT_LOOKUP_LISTENER: + sk = inet_lookup_listener(net, &tcp_hashinfo, + saddr, sport, + daddr, dport, + in->ifindex); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too + */ + break; + case NFT_LOOKUP_ESTABLISHED: + sk = inet_lookup_established(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + default: + BUG(); + } + break; + case IPPROTO_UDP: + sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + if (sk) { + int connected = (sk->sk_state == TCP_ESTABLISHED); + int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too + */ + if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) || + (lookup_type == NFT_LOOKUP_LISTENER && connected)) { + sock_put(sk); + sk = NULL; + } + } + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n", + protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk); + + return sk; +} + +#ifdef XT_TPROXY_HAVE_IPV6 +static inline struct sock * +nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, + const struct in6_addr *saddr, const struct in6_addr *daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, + const enum nf_tproxy_lookup_t lookup_type) +{ + struct sock *sk; + + switch (protocol) { + case IPPROTO_TCP: + switch (lookup_type) { + case NFT_LOOKUP_LISTENER: + sk = inet6_lookup_listener(net, &tcp_hashinfo, + saddr, sport, + daddr, ntohs(dport), + in->ifindex); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too + */ + break; + case NFT_LOOKUP_ESTABLISHED: + sk = __inet6_lookup_established(net, &tcp_hashinfo, + saddr, sport, daddr, ntohs(dport), + in->ifindex); + break; + default: + BUG(); + } + break; + case IPPROTO_UDP: + sk = udp6_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + if (sk) { + int connected = (sk->sk_state == TCP_ESTABLISHED); + int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too + */ + if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) || + (lookup_type == NFT_LOOKUP_LISTENER && connected)) { + sock_put(sk); + sk = NULL; + } + } + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n", + protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk); + + return sk; +} +#endif + /** * tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections * @skb: The skb being processed. @@ -117,6 +275,15 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport, return sk; } +/* assign a socket to the skb -- consumes sk */ +static void +nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) +{ + skb_orphan(skb); + skb->sk = sk; + skb->destructor = sock_edemux; +} + static unsigned int tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, u_int32_t mark_mask, u_int32_t mark_value) diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 68ff29f60867..fab6eea1bf38 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -202,7 +202,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) return -EINVAL; } if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) { - pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n"); + pr_err("ipv6 PROHIBIT (THROW, NAT ..) matching not supported\n"); return -EINVAL; } if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) { diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 20b15916f403..06df2b9110f5 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -19,12 +19,12 @@ #include <net/icmp.h> #include <net/sock.h> #include <net/inet_sock.h> -#include <net/netfilter/nf_tproxy_core.h> #include <net/netfilter/ipv4/nf_defrag_ipv4.h> #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) #define XT_SOCKET_HAVE_IPV6 1 #include <linux/netfilter_ipv6/ip6_tables.h> +#include <net/inet6_hashtables.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #endif @@ -101,6 +101,43 @@ extract_icmp4_fields(const struct sk_buff *skb, return 0; } +/* "socket" match based redirection (no specific rule) + * =================================================== + * + * There are connections with dynamic endpoints (e.g. FTP data + * connection) that the user is unable to add explicit rules + * for. These are taken care of by a generic "socket" rule. It is + * assumed that the proxy application is trusted to open such + * connections without explicit iptables rule (except of course the + * generic 'socket' rule). In this case the following sockets are + * matched in preference order: + * + * - match: if there's a fully established connection matching the + * _packet_ tuple + * + * - match: if there's a non-zero bound listener (possibly with a + * non-local address) We don't accept zero-bound listeners, since + * then local services could intercept traffic going through the + * box. + */ +static struct sock * +xt_socket_get_sock_v4(struct net *net, const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in) +{ + switch (protocol) { + case IPPROTO_TCP: + return __inet_lookup(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + case IPPROTO_UDP: + return udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + } + return NULL; +} + static bool socket_match(const struct sk_buff *skb, struct xt_action_param *par, const struct xt_socket_mtinfo1 *info) @@ -156,9 +193,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, #endif if (!sk) - sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, + sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, sport, dport, - par->in, NFT_LOOKUP_ANY); + par->in); if (sk) { bool wildcard; bool transparent = true; @@ -265,6 +302,25 @@ extract_icmp6_fields(const struct sk_buff *skb, return 0; } +static struct sock * +xt_socket_get_sock_v6(struct net *net, const u8 protocol, + const struct in6_addr *saddr, const struct in6_addr *daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in) +{ + switch (protocol) { + case IPPROTO_TCP: + return inet6_lookup(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + case IPPROTO_UDP: + return udp6_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + } + + return NULL; +} + static bool socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) { @@ -302,9 +358,9 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) } if (!sk) - sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, + sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr, sport, dport, - par->in, NFT_LOOKUP_ANY); + par->in); if (sk) { bool wildcard; bool transparent = true; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f85f8a2ad6cf..512718adb0d5 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -789,10 +789,6 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) struct net *net = sock_net(skb->sk); int chains_to_skip = cb->args[0]; int fams_to_skip = cb->args[1]; - bool need_locking = chains_to_skip || fams_to_skip; - - if (need_locking) - genl_lock(); for (i = chains_to_skip; i < GENL_FAM_TAB_SIZE; i++) { n = 0; @@ -814,9 +810,6 @@ errout: cb->args[0] = i; cb->args[1] = n; - if (need_locking) - genl_unlock(); - return skb->len; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6c53dd9f5ccc..1fdf9ab91c3f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3215,9 +3215,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, if (po->tp_version == TPACKET_V3) { lv = sizeof(struct tpacket_stats_v3); + st.stats3.tp_packets += st.stats3.tp_drops; data = &st.stats3; } else { lv = sizeof(struct tpacket_stats); + st.stats1.tp_packets += st.stats1.tp_drops; data = &st.stats1; } diff --git a/net/sctp/probe.c b/net/sctp/probe.c index e62c22535be4..cd72ae57aff1 100644 --- a/net/sctp/probe.c +++ b/net/sctp/probe.c @@ -155,13 +155,8 @@ static sctp_disposition_t jsctp_sf_eat_sack(struct net *net, if (sp == asoc->peer.primary_path) printl("*"); - if (sp->ipaddr.sa.sa_family == AF_INET) - printl("%pI4 ", &sp->ipaddr.v4.sin_addr); - else - printl("%pI6 ", &sp->ipaddr.v6.sin6_addr); - - printl("%2u %8u %8u %8u %8u %8u ", - sp->state, sp->cwnd, sp->ssthresh, + printl("%pISc %2u %8u %8u %8u %8u %8u ", + &sp->ipaddr, sp->state, sp->cwnd, sp->ssthresh, sp->flight_size, sp->partial_bytes_acked, sp->pathmtu); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index adf1e98f4c3e..170c0abd2a01 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2664,8 +2664,8 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_NEW_KEY); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -ENOBUFS; cookie.msg = msg; cookie.idx = key_idx; @@ -6670,6 +6670,9 @@ static int nl80211_testmode_dump(struct sk_buff *skb, NL80211_CMD_TESTMODE); struct nlattr *tmdata; + if (!hdr) + break; + if (nla_put_u32(skb, NL80211_ATTR_WIPHY, phy_idx)) { genlmsg_cancel(skb, hdr); break; @@ -7114,9 +7117,8 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_REMAIN_ON_CHANNEL); - - if (IS_ERR(hdr)) { - err = PTR_ERR(hdr); + if (!hdr) { + err = -ENOBUFS; goto free_msg; } @@ -7414,9 +7416,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_FRAME); - - if (IS_ERR(hdr)) { - err = PTR_ERR(hdr); + if (!hdr) { + err = -ENOBUFS; goto free_msg; } } @@ -8551,9 +8552,8 @@ static int nl80211_probe_client(struct sk_buff *skb, hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_PROBE_CLIENT); - - if (IS_ERR(hdr)) { - err = PTR_ERR(hdr); + if (!hdr) { + err = -ENOBUFS; goto free_msg; } diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 81c8a10d743c..20e86a95dc4e 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -976,21 +976,19 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, struct net_device *dev, u16 reason, bool wextev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; + int err = 0; ASSERT_WDEV_LOCK(wdev); kfree(wdev->connect_keys); wdev->connect_keys = NULL; - if (wdev->conn) { + if (wdev->conn) err = cfg80211_sme_disconnect(wdev, reason); - } else if (!rdev->ops->disconnect) { + else if (!rdev->ops->disconnect) cfg80211_mlme_down(rdev, dev); - err = 0; - } else { + else if (wdev->current_bss) err = rdev_disconnect(rdev, dev, reason); - } return err; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d8da6b8c6ba8..ad8cc7bcf065 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -308,7 +308,7 @@ void xfrm_policy_destroy(struct xfrm_policy *policy) { BUG_ON(!policy->walk.dead); - if (del_timer(&policy->timer)) + if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer)) BUG(); security_xfrm_policy_free(policy->security); @@ -2132,8 +2132,6 @@ restart: * have the xfrm_state's. We need to wait for KM to * negotiate new SA's or bail out with error.*/ if (net->xfrm.sysctl_larval_drop) { - /* EREMOTE tells the caller to generate - * a one-shot blackhole route. */ dst_release(dst); xfrm_pols_put(pols, drop_pols); XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 78f66fa92449..4f8ace855864 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -499,7 +499,8 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) INIT_HLIST_NODE(&x->bydst); INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); - tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, CLOCK_REALTIME, HRTIMER_MODE_ABS); + tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, + CLOCK_BOOTTIME, HRTIMER_MODE_ABS); setup_timer(&x->rtimer, xfrm_replay_timer_handler, (unsigned long)x); x->curlft.add_time = get_seconds(); @@ -990,11 +991,13 @@ void xfrm_state_insert(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_insert); /* xfrm_state_lock is held */ -static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, +static struct xfrm_state *__find_acq_core(struct net *net, + const struct xfrm_mark *m, unsigned short family, u8 mode, u32 reqid, u8 proto, const xfrm_address_t *daddr, - const xfrm_address_t *saddr, int create) + const xfrm_address_t *saddr, + int create) { unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); struct xfrm_state *x; @@ -1399,9 +1402,9 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark, EXPORT_SYMBOL(xfrm_state_lookup_byaddr); struct xfrm_state * -xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto, - const xfrm_address_t *daddr, const xfrm_address_t *saddr, - int create, unsigned short family) +xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid, + u8 proto, const xfrm_address_t *daddr, + const xfrm_address_t *saddr, int create, unsigned short family) { struct xfrm_state *x; |