diff options
author | David S. Miller <davem@davemloft.net> | 2019-07-08 12:13:38 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-07-08 12:13:38 -0700 |
commit | 47cfb90406e1d8de4007e781155884ff10bb40d3 (patch) | |
tree | bff43ffd983233d08edc09243df7759e3ff15d61 /net | |
parent | bfb204129a5adeb8eb62d693e97047ae6c8f37fd (diff) | |
parent | 0ef1efd1354d732d040f29b2005420f83fcdd8f4 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:
====================
Netfilter/IPVS updates for net-next
The following patchset contains Netfilter/IPVS updates for net-next:
1) Move bridge keys in nft_meta to nft_meta_bridge, from wenxu.
2) Support for bridge pvid matching, from wenxu.
3) Support for bridge vlan protocol matching, also from wenxu.
4) Add br_vlan_get_pvid_rcu(), to fetch the bridge port pvid
from packet path.
5) Prefer specific family extension in nf_tables.
6) Autoload specific family extension in case it is missing.
7) Add synproxy support to nf_tables, from Fernando Fernandez Mancera.
8) Support for GRE encapsulation in IPVS, from Vadim Fedorenko.
9) ICMP handling for GRE encapsulation, from Julian Anastasov.
10) Remove unused parameter in nf_queue, from Florian Westphal.
11) Replace seq_printf() by seq_puts() in nf_log, from Markus Elfring.
12) Rename nf_SYNPROXY.h => nf_synproxy.h before this header becomes
public.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/bridge/br_input.c | 2 | ||||
-rw-r--r-- | net/bridge/br_vlan.c | 29 | ||||
-rw-r--r-- | net/bridge/netfilter/Kconfig | 6 | ||||
-rw-r--r-- | net/bridge/netfilter/Makefile | 1 | ||||
-rw-r--r-- | net/bridge/netfilter/nft_meta_bridge.c | 163 | ||||
-rw-r--r-- | net/netfilter/Kconfig | 11 | ||||
-rw-r--r-- | net/netfilter/Makefile | 1 | ||||
-rw-r--r-- | net/netfilter/core.c | 2 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c | 46 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ctl.c | 1 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_xmit.c | 66 | ||||
-rw-r--r-- | net/netfilter/nf_log.c | 2 | ||||
-rw-r--r-- | net/netfilter/nf_queue.c | 8 | ||||
-rw-r--r-- | net/netfilter/nf_synproxy_core.c | 2 | ||||
-rw-r--r-- | net/netfilter/nf_tables_api.c | 36 | ||||
-rw-r--r-- | net/netfilter/nf_tables_core.c | 1 | ||||
-rw-r--r-- | net/netfilter/nft_meta.c | 85 | ||||
-rw-r--r-- | net/netfilter/nft_synproxy.c | 287 |
18 files changed, 668 insertions, 81 deletions
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 21b74e7a7b2f..512383d5e53f 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -234,7 +234,7 @@ static int nf_hook_bridge_pre(struct sk_buff *skb, struct sk_buff **pskb) kfree_skb(skb); return RX_HANDLER_CONSUMED; case NF_QUEUE: - ret = nf_queue(skb, &state, e, i, verdict); + ret = nf_queue(skb, &state, i, verdict); if (ret == 1) continue; return RX_HANDLER_CONSUMED; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index f47f526b4f19..021cc9f66804 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -797,6 +797,16 @@ bool br_vlan_enabled(const struct net_device *dev) } EXPORT_SYMBOL_GPL(br_vlan_enabled); +int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto) +{ + struct net_bridge *br = netdev_priv(dev); + + *p_proto = ntohs(br->vlan_proto); + + return 0; +} +EXPORT_SYMBOL_GPL(br_vlan_get_proto); + int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) { int err = 0; @@ -1227,13 +1237,11 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v, } } -int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid) +static int __br_vlan_get_pvid(const struct net_device *dev, + struct net_bridge_port *p, u16 *p_pvid) { struct net_bridge_vlan_group *vg; - struct net_bridge_port *p; - ASSERT_RTNL(); - p = br_port_get_check_rtnl(dev); if (p) vg = nbp_vlan_group(p); else if (netif_is_bridge_master(dev)) @@ -1244,8 +1252,21 @@ int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid) *p_pvid = br_get_pvid(vg); return 0; } + +int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid) +{ + ASSERT_RTNL(); + + return __br_vlan_get_pvid(dev, br_port_get_check_rtnl(dev), p_pvid); +} EXPORT_SYMBOL_GPL(br_vlan_get_pvid); +int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid) +{ + return __br_vlan_get_pvid(dev, br_port_get_check_rcu(dev), p_pvid); +} +EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu); + int br_vlan_get_info(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo) { diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index f4fb0b9b927d..fbc708508360 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -9,6 +9,12 @@ menuconfig NF_TABLES_BRIDGE bool "Ethernet Bridge nf_tables support" if NF_TABLES_BRIDGE + +config NFT_BRIDGE_META + tristate "Netfilter nf_table bridge meta support" + help + Add support for bridge dedicated meta key. + config NFT_BRIDGE_REJECT tristate "Netfilter nf_tables bridge reject support" depends on NFT_REJECT && NFT_REJECT_IPV4 && NFT_REJECT_IPV6 diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index 9d7767322a64..8e2c5759d964 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -3,6 +3,7 @@ # Makefile for the netfilter modules for Link Layer filtering on a bridge. # +obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o # connection tracking diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c new file mode 100644 index 000000000000..bed66f536b34 --- /dev/null +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_meta.h> +#include <linux/if_bridge.h> + +static const struct net_device * +nft_meta_get_bridge(const struct net_device *dev) +{ + if (dev && netif_is_bridge_port(dev)) + return netdev_master_upper_dev_get_rcu((struct net_device *)dev); + + return NULL; +} + +static void nft_meta_bridge_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + const struct net_device *in = nft_in(pkt), *out = nft_out(pkt); + u32 *dest = ®s->data[priv->dreg]; + const struct net_device *br_dev; + + switch (priv->key) { + case NFT_META_BRI_IIFNAME: + br_dev = nft_meta_get_bridge(in); + if (!br_dev) + goto err; + break; + case NFT_META_BRI_OIFNAME: + br_dev = nft_meta_get_bridge(out); + if (!br_dev) + goto err; + break; + case NFT_META_BRI_IIFPVID: { + u16 p_pvid; + + br_dev = nft_meta_get_bridge(in); + if (!br_dev || !br_vlan_enabled(br_dev)) + goto err; + + br_vlan_get_pvid_rcu(in, &p_pvid); + nft_reg_store16(dest, p_pvid); + return; + } + case NFT_META_BRI_IIFVPROTO: { + u16 p_proto; + + br_dev = nft_meta_get_bridge(in); + if (!br_dev || !br_vlan_enabled(br_dev)) + goto err; + + br_vlan_get_proto(br_dev, &p_proto); + nft_reg_store16(dest, p_proto); + return; + } + default: + goto out; + } + + strncpy((char *)dest, br_dev->name, IFNAMSIZ); + return; +out: + return nft_meta_get_eval(expr, regs, pkt); +err: + regs->verdict.code = NFT_BREAK; +} + +static int nft_meta_bridge_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + unsigned int len; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_BRI_IIFNAME: + case NFT_META_BRI_OIFNAME: + len = IFNAMSIZ; + break; + case NFT_META_BRI_IIFPVID: + case NFT_META_BRI_IIFVPROTO: + len = sizeof(u16); + break; + default: + return nft_meta_get_init(ctx, expr, tb); + } + + priv->dreg = nft_parse_register(tb[NFTA_META_DREG]); + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, len); +} + +static struct nft_expr_type nft_meta_bridge_type; +static const struct nft_expr_ops nft_meta_bridge_get_ops = { + .type = &nft_meta_bridge_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_bridge_get_eval, + .init = nft_meta_bridge_get_init, + .dump = nft_meta_get_dump, +}; + +static const struct nft_expr_ops nft_meta_bridge_set_ops = { + .type = &nft_meta_bridge_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_set_eval, + .init = nft_meta_set_init, + .destroy = nft_meta_set_destroy, + .dump = nft_meta_set_dump, + .validate = nft_meta_set_validate, +}; + +static const struct nft_expr_ops * +nft_meta_bridge_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_META_KEY] == NULL) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG]) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG]) + return &nft_meta_bridge_get_ops; + + if (tb[NFTA_META_SREG]) + return &nft_meta_bridge_set_ops; + + return ERR_PTR(-EINVAL); +} + +static struct nft_expr_type nft_meta_bridge_type __read_mostly = { + .family = NFPROTO_BRIDGE, + .name = "meta", + .select_ops = nft_meta_bridge_select_ops, + .policy = nft_meta_policy, + .maxattr = NFTA_META_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_meta_bridge_module_init(void) +{ + return nft_register_expr(&nft_meta_bridge_type); +} + +static void __exit nft_meta_bridge_module_exit(void) +{ + nft_unregister_expr(&nft_meta_bridge_type); +} + +module_init(nft_meta_bridge_module_init); +module_exit(nft_meta_bridge_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("wenxu <wenxu@ucloud.cn>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 21025c2c605b..d59742408d9b 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -651,6 +651,17 @@ config NFT_TPROXY help This makes transparent proxy support available in nftables. +config NFT_SYNPROXY + tristate "Netfilter nf_tables SYNPROXY expression support" + depends on NF_CONNTRACK && NETFILTER_ADVANCED + select NETFILTER_SYNPROXY + select SYN_COOKIES + help + The SYNPROXY expression allows you to intercept TCP connections and + establish them using syncookies before they are passed on to the + server. This allows to avoid conntrack and server resource usage + during SYN-flood attacks. + if NF_TABLES_NETDEV config NF_DUP_NETDEV diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 72cca6b48960..deada20975ff 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -110,6 +110,7 @@ obj-$(CONFIG_NFT_SOCKET) += nft_socket.o obj-$(CONFIG_NFT_OSF) += nft_osf.o obj-$(CONFIG_NFT_TPROXY) += nft_tproxy.o obj-$(CONFIG_NFT_XFRM) += nft_xfrm.o +obj-$(CONFIG_NFT_SYNPROXY) += nft_synproxy.o obj-$(CONFIG_NFT_NAT) += nft_chain_nat.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 817a9e5d16e4..5d5bdf450091 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -520,7 +520,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, ret = -EPERM; return ret; case NF_QUEUE: - ret = nf_queue(skb, state, e, s, verdict); + ret = nf_queue(skb, state, s, verdict); if (ret == 1) continue; return ret; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e8651fd621ef..dd4727a5d6ec 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -35,6 +35,7 @@ #include <net/udp.h> #include <net/icmp.h> /* for icmp_send */ #include <net/gue.h> +#include <net/gre.h> #include <net/route.h> #include <net/ip6_checksum.h> #include <net/netns/generic.h> /* net_generic() */ @@ -1610,6 +1611,38 @@ unk: return 0; } +/* Check the GRE tunnel and return its header length */ +static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb, + unsigned int offset, __u16 af, + const union nf_inet_addr *daddr, __u8 *proto) +{ + struct gre_base_hdr _greh, *greh; + struct ip_vs_dest *dest; + + greh = skb_header_pointer(skb, offset, sizeof(_greh), &_greh); + if (!greh) + goto unk; + dest = ip_vs_find_tunnel(ipvs, af, daddr, 0); + if (!dest) + goto unk; + if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + __be16 type; + + /* Only support version 0 and C (csum) */ + if ((greh->flags & ~GRE_CSUM) != 0) + goto unk; + type = greh->protocol; + /* Later we can support also IPPROTO_IPV6 */ + if (type != htons(ETH_P_IP)) + goto unk; + *proto = IPPROTO_IPIP; + return gre_calc_hlen(gre_flags_to_tnl_flags(greh->flags)); + } + +unk: + return 0; +} + /* * Handle ICMP messages in the outside-to-inside direction (incoming). * Find any that might be relevant, check against existing connections, @@ -1689,7 +1722,8 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ ipip = true; - } else if (cih->protocol == IPPROTO_UDP && /* Can be UDP encap */ + } else if ((cih->protocol == IPPROTO_UDP || /* Can be UDP encap */ + cih->protocol == IPPROTO_GRE) && /* Can be GRE encap */ /* Error for our tunnel must arrive at LOCAL_IN */ (skb_rtable(skb)->rt_flags & RTCF_LOCAL)) { __u8 iproto; @@ -1699,10 +1733,14 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, if (unlikely(cih->frag_off & htons(IP_OFFSET))) return NF_ACCEPT; offset2 = offset + cih->ihl * 4; - ulen = ipvs_udp_decap(ipvs, skb, offset2, AF_INET, raddr, - &iproto); + if (cih->protocol == IPPROTO_UDP) + ulen = ipvs_udp_decap(ipvs, skb, offset2, AF_INET, + raddr, &iproto); + else + ulen = ipvs_gre_decap(ipvs, skb, offset2, AF_INET, + raddr, &iproto); if (ulen > 0) { - /* Skip IP and UDP tunnel headers */ + /* Skip IP and UDP/GRE tunnel headers */ offset = offset2 + ulen; /* Now we should be at the original IP header */ cih = skb_header_pointer(skb, offset, sizeof(_ciph), diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 84384d896e29..998353bec74f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -525,6 +525,7 @@ static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) port = dest->tun_port; break; case IP_VS_CONN_F_TUNNEL_TYPE_IPIP: + case IP_VS_CONN_F_TUNNEL_TYPE_GRE: port = 0; break; default: diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 71fc6d63a67f..9c464d24beec 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -29,6 +29,7 @@ #include <linux/tcp.h> /* for tcphdr */ #include <net/ip.h> #include <net/gue.h> +#include <net/gre.h> #include <net/tcp.h> /* for csum_tcpudp_magic */ #include <net/udp.h> #include <net/icmp.h> /* for icmp_send */ @@ -388,6 +389,12 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) && skb->ip_summed == CHECKSUM_PARTIAL) mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV; + } else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + __be16 tflags = 0; + + if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) + tflags |= TUNNEL_CSUM; + mtu -= gre_calc_hlen(tflags); } if (mtu < 68) { IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); @@ -548,6 +555,12 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) && skb->ip_summed == CHECKSUM_PARTIAL) mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV; + } else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + __be16 tflags = 0; + + if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) + tflags |= TUNNEL_CSUM; + mtu -= gre_calc_hlen(tflags); } if (mtu < IPV6_MIN_MTU) { IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, @@ -1079,6 +1092,24 @@ ipvs_gue_encap(struct net *net, struct sk_buff *skb, return 0; } +static void +ipvs_gre_encap(struct net *net, struct sk_buff *skb, + struct ip_vs_conn *cp, __u8 *next_protocol) +{ + __be16 proto = *next_protocol == IPPROTO_IPIP ? + htons(ETH_P_IP) : htons(ETH_P_IPV6); + __be16 tflags = 0; + size_t hdrlen; + + if (cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) + tflags |= TUNNEL_CSUM; + + hdrlen = gre_calc_hlen(tflags); + gre_build_header(skb, hdrlen, tflags, proto, 0, 0); + + *next_protocol = IPPROTO_GRE; +} + /* * IP Tunneling transmitter * @@ -1151,6 +1182,15 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, gue_hdrlen = sizeof(struct guehdr) + gue_optlen; max_headroom += sizeof(struct udphdr) + gue_hdrlen; + } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + size_t gre_hdrlen; + __be16 tflags = 0; + + if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) + tflags |= TUNNEL_CSUM; + gre_hdrlen = gre_calc_hlen(tflags); + + max_headroom += gre_hdrlen; } /* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */ @@ -1172,6 +1212,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->ip_summed == CHECKSUM_PARTIAL) { gso_type |= SKB_GSO_TUNNEL_REMCSUM; } + } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) + gso_type |= SKB_GSO_GRE_CSUM; + else + gso_type |= SKB_GSO_GRE; } if (iptunnel_handle_offloads(skb, gso_type)) @@ -1192,8 +1237,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, check = true; udp_set_csum(!check, skb, saddr, cp->daddr.ip, skb->len); - } - + } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) + ipvs_gre_encap(net, skb, cp, &next_protocol); skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); @@ -1287,6 +1332,15 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, gue_hdrlen = sizeof(struct guehdr) + gue_optlen; max_headroom += sizeof(struct udphdr) + gue_hdrlen; + } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + size_t gre_hdrlen; + __be16 tflags = 0; + + if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) + tflags |= TUNNEL_CSUM; + gre_hdrlen = gre_calc_hlen(tflags); + + max_headroom += gre_hdrlen; } skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom, @@ -1306,6 +1360,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->ip_summed == CHECKSUM_PARTIAL) { gso_type |= SKB_GSO_TUNNEL_REMCSUM; } + } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) { + if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) + gso_type |= SKB_GSO_GRE_CSUM; + else + gso_type |= SKB_GSO_GRE; } if (iptunnel_handle_offloads(skb, gso_type)) @@ -1326,7 +1385,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, check = true; udp6_set_csum(!check, skb, &saddr, &cp->daddr.in6, skb->len); - } + } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) + ipvs_gre_encap(net, skb, cp, &next_protocol); skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 3574a212bdc2..bb25d4c794c7 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -374,7 +374,7 @@ static int seq_show(struct seq_file *s, void *v) continue; logger = nft_log_dereference(loggers[*pos][i]); - seq_printf(s, "%s", logger->name); + seq_puts(s, logger->name); if (i == 0 && loggers[*pos][i + 1] != NULL) seq_puts(s, ","); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index b5b2be55ca82..c72a5bdd123f 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -156,7 +156,6 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, } static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, - const struct nf_hook_entries *entries, unsigned int index, unsigned int queuenum) { int status = -ENOENT; @@ -225,12 +224,11 @@ err: /* Packets leaving via this function must come back through nf_reinject(). */ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, - const struct nf_hook_entries *entries, unsigned int index, - unsigned int verdict) + unsigned int index, unsigned int verdict) { int ret; - ret = __nf_queue(skb, state, entries, index, verdict >> NF_VERDICT_QBITS); + ret = __nf_queue(skb, state, index, verdict >> NF_VERDICT_QBITS); if (ret < 0) { if (ret == -ESRCH && (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) @@ -336,7 +334,7 @@ next_hook: local_bh_enable(); break; case NF_QUEUE: - err = nf_queue(skb, &entry->state, hooks, i, verdict); + err = nf_queue(skb, &entry->state, i, verdict); if (err == 1) goto next_hook; break; diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 409722d23302..b101f187eda8 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -11,7 +11,7 @@ #include <linux/proc_fs.h> #include <linux/netfilter_ipv6.h> -#include <linux/netfilter/nf_SYNPROXY.h> +#include <linux/netfilter/nf_synproxy.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_ecache.h> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index cae5c46e2dd4..d22d00ca78c1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2009,15 +2009,31 @@ EXPORT_SYMBOL_GPL(nft_unregister_expr); static const struct nft_expr_type *__nft_expr_type_get(u8 family, struct nlattr *nla) { - const struct nft_expr_type *type; + const struct nft_expr_type *type, *candidate = NULL; list_for_each_entry(type, &nf_tables_expressions, list) { - if (!nla_strcmp(nla, type->name) && - (!type->family || type->family == family)) - return type; + if (!nla_strcmp(nla, type->name)) { + if (!type->family && !candidate) + candidate = type; + else if (type->family == family) + candidate = type; + } } - return NULL; + return candidate; +} + +#ifdef CONFIG_MODULES +static int nft_expr_type_request_module(struct net *net, u8 family, + struct nlattr *nla) +{ + nft_request_module(net, "nft-expr-%u-%.*s", family, + nla_len(nla), (char *)nla_data(nla)); + if (__nft_expr_type_get(family, nla)) + return -EAGAIN; + + return 0; } +#endif static const struct nft_expr_type *nft_expr_type_get(struct net *net, u8 family, @@ -2035,9 +2051,7 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net, lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES if (type == NULL) { - nft_request_module(net, "nft-expr-%u-%.*s", family, - nla_len(nla), (char *)nla_data(nla)); - if (__nft_expr_type_get(family, nla)) + if (nft_expr_type_request_module(net, family, nla) == -EAGAIN) return ERR_PTR(-EAGAIN); nft_request_module(net, "nft-expr-%.*s", @@ -2130,6 +2144,12 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, (const struct nlattr * const *)info->tb); if (IS_ERR(ops)) { err = PTR_ERR(ops); +#ifdef CONFIG_MODULES + if (err == -EAGAIN) + nft_expr_type_request_module(ctx->net, + ctx->family, + tb[NFTA_EXPR_NAME]); +#endif goto err1; } } else diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index b950cd31348b..96c74c4c7176 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -19,6 +19,7 @@ #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_log.h> +#include <net/netfilter/nft_meta.h> static noinline void __nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index a54329b8634a..417f8d32e9a3 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -21,23 +21,12 @@ #include <net/tcp_states.h> /* for TCP_TIME_WAIT */ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nft_meta.h> #include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */ -struct nft_meta { - enum nft_meta_keys key:8; - union { - enum nft_registers dreg:8; - enum nft_registers sreg:8; - }; -}; - static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state); -#ifdef CONFIG_NF_TABLES_BRIDGE -#include "../bridge/br_private.h" -#endif - void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -47,9 +36,6 @@ void nft_meta_get_eval(const struct nft_expr *expr, const struct net_device *in = nft_in(pkt), *out = nft_out(pkt); struct sock *sk; u32 *dest = ®s->data[priv->dreg]; -#ifdef CONFIG_NF_TABLES_BRIDGE - const struct net_bridge_port *p; -#endif switch (priv->key) { case NFT_META_LEN: @@ -229,18 +215,6 @@ void nft_meta_get_eval(const struct nft_expr *expr, nft_reg_store8(dest, secpath_exists(skb)); break; #endif -#ifdef CONFIG_NF_TABLES_BRIDGE - case NFT_META_BRI_IIFNAME: - if (in == NULL || (p = br_port_get_rcu(in)) == NULL) - goto err; - strncpy((char *)dest, p->br->dev->name, IFNAMSIZ); - return; - case NFT_META_BRI_OIFNAME: - if (out == NULL || (p = br_port_get_rcu(out)) == NULL) - goto err; - strncpy((char *)dest, p->br->dev->name, IFNAMSIZ); - return; -#endif case NFT_META_IIFKIND: if (in == NULL || in->rtnl_link_ops == NULL) goto err; @@ -260,10 +234,11 @@ void nft_meta_get_eval(const struct nft_expr *expr, err: regs->verdict.code = NFT_BREAK; } +EXPORT_SYMBOL_GPL(nft_meta_get_eval); -static void nft_meta_set_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_meta_set_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { const struct nft_meta *meta = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; @@ -300,16 +275,18 @@ static void nft_meta_set_eval(const struct nft_expr *expr, WARN_ON(1); } } +EXPORT_SYMBOL_GPL(nft_meta_set_eval); -static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { +const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { [NFTA_META_DREG] = { .type = NLA_U32 }, [NFTA_META_KEY] = { .type = NLA_U32 }, [NFTA_META_SREG] = { .type = NLA_U32 }, }; +EXPORT_SYMBOL_GPL(nft_meta_policy); -static int nft_meta_get_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +int nft_meta_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); unsigned int len; @@ -360,14 +337,6 @@ static int nft_meta_get_init(const struct nft_ctx *ctx, len = sizeof(u8); break; #endif -#ifdef CONFIG_NF_TABLES_BRIDGE - case NFT_META_BRI_IIFNAME: - case NFT_META_BRI_OIFNAME: - if (ctx->family != NFPROTO_BRIDGE) - return -EOPNOTSUPP; - len = IFNAMSIZ; - break; -#endif default: return -EOPNOTSUPP; } @@ -376,6 +345,7 @@ static int nft_meta_get_init(const struct nft_ctx *ctx, return nft_validate_register_store(ctx, priv->dreg, NULL, NFT_DATA_VALUE, len); } +EXPORT_SYMBOL_GPL(nft_meta_get_init); static int nft_meta_get_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, @@ -409,9 +379,9 @@ static int nft_meta_get_validate(const struct nft_ctx *ctx, #endif } -static int nft_meta_set_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) +int nft_meta_set_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) { struct nft_meta *priv = nft_expr_priv(expr); unsigned int hooks; @@ -437,10 +407,11 @@ static int nft_meta_set_validate(const struct nft_ctx *ctx, return nft_chain_validate_hooks(ctx->chain, hooks); } +EXPORT_SYMBOL_GPL(nft_meta_set_validate); -static int nft_meta_set_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +int nft_meta_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); unsigned int len; @@ -475,9 +446,10 @@ static int nft_meta_set_init(const struct nft_ctx *ctx, return 0; } +EXPORT_SYMBOL_GPL(nft_meta_set_init); -static int nft_meta_get_dump(struct sk_buff *skb, - const struct nft_expr *expr) +int nft_meta_get_dump(struct sk_buff *skb, + const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); @@ -490,8 +462,9 @@ static int nft_meta_get_dump(struct sk_buff *skb, nla_put_failure: return -1; } +EXPORT_SYMBOL_GPL(nft_meta_get_dump); -static int nft_meta_set_dump(struct sk_buff *skb, const struct nft_expr *expr) +int nft_meta_set_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); @@ -505,15 +478,17 @@ static int nft_meta_set_dump(struct sk_buff *skb, const struct nft_expr *expr) nla_put_failure: return -1; } +EXPORT_SYMBOL_GPL(nft_meta_set_dump); -static void nft_meta_set_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +void nft_meta_set_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); if (priv->key == NFT_META_NFTRACE) static_branch_dec(&nft_trace_enabled); } +EXPORT_SYMBOL_GPL(nft_meta_set_destroy); static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, @@ -544,6 +519,10 @@ nft_meta_select_ops(const struct nft_ctx *ctx, if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG]) return ERR_PTR(-EINVAL); +#ifdef CONFIG_NF_TABLES_BRIDGE + if (ctx->family == NFPROTO_BRIDGE) + return ERR_PTR(-EAGAIN); +#endif if (tb[NFTA_META_DREG]) return &nft_meta_get_ops; diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c new file mode 100644 index 000000000000..80060ade8a5b --- /dev/null +++ b/net/netfilter/nft_synproxy.c @@ -0,0 +1,287 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/types.h> +#include <net/ip.h> +#include <net/tcp.h> +#include <net/netlink.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_synproxy.h> +#include <net/netfilter/nf_synproxy.h> +#include <linux/netfilter/nf_tables.h> +#include <linux/netfilter/nf_synproxy.h> + +struct nft_synproxy { + struct nf_synproxy_info info; +}; + +static const struct nla_policy nft_synproxy_policy[NFTA_SYNPROXY_MAX + 1] = { + [NFTA_SYNPROXY_MSS] = { .type = NLA_U16 }, + [NFTA_SYNPROXY_WSCALE] = { .type = NLA_U8 }, + [NFTA_SYNPROXY_FLAGS] = { .type = NLA_U32 }, +}; + +static void nft_synproxy_tcp_options(struct synproxy_options *opts, + const struct tcphdr *tcp, + struct synproxy_net *snet, + struct nf_synproxy_info *info, + struct nft_synproxy *priv) +{ + this_cpu_inc(snet->stats->syn_received); + if (tcp->ece && tcp->cwr) + opts->options |= NF_SYNPROXY_OPT_ECN; + + opts->options &= priv->info.options; + if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) + synproxy_init_timestamp_cookie(info, opts); + else + opts->options &= ~(NF_SYNPROXY_OPT_WSCALE | + NF_SYNPROXY_OPT_SACK_PERM | + NF_SYNPROXY_OPT_ECN); +} + +static void nft_synproxy_eval_v4(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt, + const struct tcphdr *tcp, + struct tcphdr *_tcph, + struct synproxy_options *opts) +{ + struct nft_synproxy *priv = nft_expr_priv(expr); + struct nf_synproxy_info info = priv->info; + struct net *net = nft_net(pkt); + struct synproxy_net *snet = synproxy_pernet(net); + struct sk_buff *skb = pkt->skb; + + if (tcp->syn) { + /* Initial SYN from client */ + nft_synproxy_tcp_options(opts, tcp, snet, &info, priv); + synproxy_send_client_synack(net, skb, tcp, opts); + consume_skb(skb); + regs->verdict.code = NF_STOLEN; + } else if (tcp->ack) { + /* ACK from client */ + if (synproxy_recv_client_ack(net, skb, tcp, opts, + ntohl(tcp->seq))) { + consume_skb(skb); + regs->verdict.code = NF_STOLEN; + } else { + regs->verdict.code = NF_DROP; + } + } +} + +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) +static void nft_synproxy_eval_v6(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt, + const struct tcphdr *tcp, + struct tcphdr *_tcph, + struct synproxy_options *opts) +{ + struct nft_synproxy *priv = nft_expr_priv(expr); + struct nf_synproxy_info info = priv->info; + struct net *net = nft_net(pkt); + struct synproxy_net *snet = synproxy_pernet(net); + struct sk_buff *skb = pkt->skb; + + if (tcp->syn) { + /* Initial SYN from client */ + nft_synproxy_tcp_options(opts, tcp, snet, &info, priv); + synproxy_send_client_synack_ipv6(net, skb, tcp, opts); + consume_skb(skb); + regs->verdict.code = NF_STOLEN; + } else if (tcp->ack) { + /* ACK from client */ + if (synproxy_recv_client_ack_ipv6(net, skb, tcp, opts, + ntohl(tcp->seq))) { + consume_skb(skb); + regs->verdict.code = NF_STOLEN; + } else { + regs->verdict.code = NF_DROP; + } + } +} +#endif /* CONFIG_NF_TABLES_IPV6*/ + +static void nft_synproxy_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct synproxy_options opts = {}; + struct sk_buff *skb = pkt->skb; + int thoff = pkt->xt.thoff; + const struct tcphdr *tcp; + struct tcphdr _tcph; + + if (pkt->tprot != IPPROTO_TCP) { + regs->verdict.code = NFT_BREAK; + return; + } + + if (nf_ip_checksum(skb, nft_hook(pkt), thoff, IPPROTO_TCP)) { + regs->verdict.code = NF_DROP; + return; + } + + tcp = skb_header_pointer(skb, pkt->xt.thoff, + sizeof(struct tcphdr), + &_tcph); + if (!tcp) { + regs->verdict.code = NF_DROP; + return; + } + + if (!synproxy_parse_options(skb, thoff, tcp, &opts)) { + regs->verdict.code = NF_DROP; + return; + } + + switch (skb->protocol) { + case htons(ETH_P_IP): + nft_synproxy_eval_v4(expr, regs, pkt, tcp, &_tcph, &opts); + return; +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + case htons(ETH_P_IPV6): + nft_synproxy_eval_v6(expr, regs, pkt, tcp, &_tcph, &opts); + return; +#endif + } + regs->verdict.code = NFT_BREAK; +} + +static int nft_synproxy_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct synproxy_net *snet = synproxy_pernet(ctx->net); + struct nft_synproxy *priv = nft_expr_priv(expr); + u32 flags; + int err; + + if (tb[NFTA_SYNPROXY_MSS]) + priv->info.mss = ntohs(nla_get_be16(tb[NFTA_SYNPROXY_MSS])); + if (tb[NFTA_SYNPROXY_WSCALE]) + priv->info.wscale = nla_get_u8(tb[NFTA_SYNPROXY_WSCALE]); + if (tb[NFTA_SYNPROXY_FLAGS]) { + flags = ntohl(nla_get_be32(tb[NFTA_SYNPROXY_FLAGS])); + if (flags & ~NF_SYNPROXY_OPT_MASK) + return -EOPNOTSUPP; + priv->info.options = flags; + } + + err = nf_ct_netns_get(ctx->net, ctx->family); + if (err) + return err; + + switch (ctx->family) { + case NFPROTO_IPV4: + err = nf_synproxy_ipv4_init(snet, ctx->net); + if (err) + goto nf_ct_failure; + break; +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + case NFPROTO_IPV6: + err = nf_synproxy_ipv6_init(snet, ctx->net); + if (err) + goto nf_ct_failure; + break; +#endif + case NFPROTO_INET: + case NFPROTO_BRIDGE: + err = nf_synproxy_ipv4_init(snet, ctx->net); + if (err) + goto nf_ct_failure; + err = nf_synproxy_ipv6_init(snet, ctx->net); + if (err) + goto nf_ct_failure; + break; + } + + return 0; + +nf_ct_failure: + nf_ct_netns_put(ctx->net, ctx->family); + return err; +} + +static void nft_synproxy_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct synproxy_net *snet = synproxy_pernet(ctx->net); + + switch (ctx->family) { + case NFPROTO_IPV4: + nf_synproxy_ipv4_fini(snet, ctx->net); + break; +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + case NFPROTO_IPV6: + nf_synproxy_ipv6_fini(snet, ctx->net); + break; +#endif + case NFPROTO_INET: + case NFPROTO_BRIDGE: + nf_synproxy_ipv4_fini(snet, ctx->net); + nf_synproxy_ipv6_fini(snet, ctx->net); + break; + } + nf_ct_netns_put(ctx->net, ctx->family); +} + +static int nft_synproxy_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_synproxy *priv = nft_expr_priv(expr); + + if (nla_put_be16(skb, NFTA_SYNPROXY_MSS, htons(priv->info.mss)) || + nla_put_u8(skb, NFTA_SYNPROXY_WSCALE, priv->info.wscale) || + nla_put_be32(skb, NFTA_SYNPROXY_FLAGS, htonl(priv->info.options))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static int nft_synproxy_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD)); +} + +static struct nft_expr_type nft_synproxy_type; +static const struct nft_expr_ops nft_synproxy_ops = { + .eval = nft_synproxy_eval, + .size = NFT_EXPR_SIZE(sizeof(struct nft_synproxy)), + .init = nft_synproxy_init, + .destroy = nft_synproxy_destroy, + .dump = nft_synproxy_dump, + .type = &nft_synproxy_type, + .validate = nft_synproxy_validate, +}; + +static struct nft_expr_type nft_synproxy_type __read_mostly = { + .ops = &nft_synproxy_ops, + .name = "synproxy", + .owner = THIS_MODULE, + .policy = nft_synproxy_policy, + .maxattr = NFTA_SYNPROXY_MAX, +}; + +static int __init nft_synproxy_module_init(void) +{ + return nft_register_expr(&nft_synproxy_type); +} + +static void __exit nft_synproxy_module_exit(void) +{ + return nft_unregister_expr(&nft_synproxy_type); +} + +module_init(nft_synproxy_module_init); +module_exit(nft_synproxy_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>"); +MODULE_ALIAS_NFT_EXPR("synproxy"); |