From cd9ceafc0a761a15b4cbfe6c0024edf88f861d66 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 7 Nov 2017 07:20:08 +0100 Subject: netfilter: conntrack: constify list of builtin trackers Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 89af9d88ca21..bb2c868a5621 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -368,7 +368,7 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); MODULE_ALIAS("ip_conntrack"); MODULE_LICENSE("GPL"); -static struct nf_conntrack_l4proto *builtin_l4proto4[] = { +static const struct nf_conntrack_l4proto * const builtin_l4proto4[] = { &nf_conntrack_l4proto_tcp4, &nf_conntrack_l4proto_udp4, &nf_conntrack_l4proto_icmp, -- cgit v1.2.3 From 9dae47aba0a055f761176d9297371d5bb24289ec Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 7 Nov 2017 07:20:09 +0100 Subject: netfilter: conntrack: l4 protocol trackers can be const previous patches removed all writes to these structs so we can now mark them as const. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 1849fedd9b81..669e586b6b8f 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -351,7 +351,7 @@ static struct nf_proto_net *icmp_get_net_proto(struct net *net) return &net->ct.nf_ct_proto.icmp.pn; } -struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = { .l3proto = PF_INET, .l4proto = IPPROTO_ICMP, -- cgit v1.2.3 From 2c9e8637ea15cf7f060b48f73b79e5055ffa93ad Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 12 Nov 2017 20:42:01 +0100 Subject: netfilter: conntrack: timeouts can be const Nowadays this is just the default template that is used when setting up the net namespace, so nothing writes to these locations. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 669e586b6b8f..5c15beafa711 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -22,7 +22,7 @@ #include #include -static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; +static const unsigned int nf_ct_icmp_timeout = 30*HZ; static inline struct nf_icmp_net *icmp_pernet(struct net *net) { -- cgit v1.2.3 From 2a95183a5e0375df756efb2ca37602d71e8455f9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 7 Dec 2017 16:28:26 +0100 Subject: netfilter: don't allocate space for arp/bridge hooks unless needed no need to define hook points if the family isn't supported. Because we need these hooks for either nftables, arp/ebtables or the 'call-iptables' hack we have in the bridge layer add two new dependencies, NETFILTER_FAMILY_{ARP,BRIDGE}, and have the users select them. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index c11eb1744ab1..cee51045e2f7 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -72,6 +72,7 @@ endif # NF_TABLES_IPV4 config NF_TABLES_ARP tristate "ARP nf_tables support" + select NETFILTER_FAMILY_ARP help This option enables the ARP support for nf_tables. @@ -392,6 +393,7 @@ endif # IP_NF_IPTABLES config IP_NF_ARPTABLES tristate "ARP tables support" select NETFILTER_XTABLES + select NETFILTER_FAMILY_ARP depends on NETFILTER_ADVANCED help arptables is a general, extensible packet identification framework. -- cgit v1.2.3 From 03d13b6868a261f24fbc82b6a2d5823df8d075d3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 8 Dec 2017 17:01:53 +0100 Subject: netfilter: xtables: add and use xt_request_find_table_lock currently we always return -ENOENT to userspace if we can't find a particular table, or if the table initialization fails. Followup patch will make nat table init fail in case nftables already registered a nat hook so this change makes xt_find_table_lock return an ERR_PTR to return the errno value reported from the table init function. Add xt_request_find_table_lock as try_then_request_module replacement and use it where needed. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 26 ++++++++++++-------------- net/ipv4/netfilter/ip_tables.c | 26 ++++++++++++-------------- 2 files changed, 24 insertions(+), 28 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 0c3c944a7b72..bf8a5340f15e 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -810,9 +810,8 @@ static int get_info(struct net *net, void __user *user, if (compat) xt_compat_lock(NFPROTO_ARP); #endif - t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), - "arptable_%s", name); - if (t) { + t = xt_request_find_table_lock(net, NFPROTO_ARP, name); + if (!IS_ERR(t)) { struct arpt_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -841,7 +840,7 @@ static int get_info(struct net *net, void __user *user, xt_table_unlock(t); module_put(t->me); } else - ret = -ENOENT; + ret = PTR_ERR(t); #ifdef CONFIG_COMPAT if (compat) xt_compat_unlock(NFPROTO_ARP); @@ -866,7 +865,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, NFPROTO_ARP, get.name); - if (t) { + if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; if (get.size == private->size) @@ -878,7 +877,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = -ENOENT; + ret = PTR_ERR(t); return ret; } @@ -903,10 +902,9 @@ static int __do_replace(struct net *net, const char *name, goto out; } - t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), - "arptable_%s", name); - if (!t) { - ret = -ENOENT; + t = xt_request_find_table_lock(net, NFPROTO_ARP, name); + if (IS_ERR(t)) { + ret = PTR_ERR(t); goto free_newinfo_counters_untrans; } @@ -1020,8 +1018,8 @@ static int do_add_counters(struct net *net, const void __user *user, return PTR_ERR(paddc); t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name); - if (!t) { - ret = -ENOENT; + if (IS_ERR(t)) { + ret = PTR_ERR(t); goto free; } @@ -1408,7 +1406,7 @@ static int compat_get_entries(struct net *net, xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); - if (t) { + if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; @@ -1423,7 +1421,7 @@ static int compat_get_entries(struct net *net, module_put(t->me); xt_table_unlock(t); } else - ret = -ENOENT; + ret = PTR_ERR(t); xt_compat_unlock(NFPROTO_ARP); return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 2e0d339028bb..0b975aa2d363 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -973,9 +973,8 @@ static int get_info(struct net *net, void __user *user, if (compat) xt_compat_lock(AF_INET); #endif - t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), - "iptable_%s", name); - if (t) { + t = xt_request_find_table_lock(net, AF_INET, name); + if (!IS_ERR(t)) { struct ipt_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -1005,7 +1004,7 @@ static int get_info(struct net *net, void __user *user, xt_table_unlock(t); module_put(t->me); } else - ret = -ENOENT; + ret = PTR_ERR(t); #ifdef CONFIG_COMPAT if (compat) xt_compat_unlock(AF_INET); @@ -1030,7 +1029,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET, get.name); - if (t) { + if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1041,7 +1040,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = -ENOENT; + ret = PTR_ERR(t); return ret; } @@ -1064,10 +1063,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, goto out; } - t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), - "iptable_%s", name); - if (!t) { - ret = -ENOENT; + t = xt_request_find_table_lock(net, AF_INET, name); + if (IS_ERR(t)) { + ret = PTR_ERR(t); goto free_newinfo_counters_untrans; } @@ -1181,8 +1179,8 @@ do_add_counters(struct net *net, const void __user *user, return PTR_ERR(paddc); t = xt_find_table_lock(net, AF_INET, tmp.name); - if (!t) { - ret = -ENOENT; + if (IS_ERR(t)) { + ret = PTR_ERR(t); goto free; } @@ -1625,7 +1623,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); - if (t) { + if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); @@ -1639,7 +1637,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = -ENOENT; + ret = PTR_ERR(t); xt_compat_unlock(AF_INET); return ret; -- cgit v1.2.3 From f92b40a8b2645af38bd6814651c59c1e690db53d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 8 Dec 2017 17:01:54 +0100 Subject: netfilter: core: only allow one nat hook per hook point The netfilter NAT core cannot deal with more than one NAT hook per hook location (prerouting, input ...), because the NAT hooks install a NAT null binding in case the iptables nat table (iptable_nat hooks) or the corresponding nftables chain (nft nat hooks) doesn't specify a nat transformation. Null bindings are needed to detect port collsisions between NAT-ed and non-NAT-ed connections. This causes nftables NAT rules to not work when iptable_nat module is loaded, and vice versa because nat binding has already been attached when the second nat hook is consulted. The netfilter core is not really the correct location to handle this (hooks are just hooks, the core has no notion of what kinds of side effects a hook implements), but its the only place where we can check for conflicts between both iptables hooks and nftables hooks without adding dependencies. So add nat annotation to hook_ops to describe those hooks that will add NAT bindings and then make core reject if such a hook already exists. The annotation fills a padding hole, in case further restrictions appar we might change this to a 'u8 type' instead of bool. iptables error if nft nat hook active: iptables -t nat -A POSTROUTING -j MASQUERADE iptables v1.4.21: can't initialize iptables table `nat': File exists Perhaps iptables or your kernel needs to be upgraded. nftables error if iptables nat table present: nft -f /etc/nftables/ipv4-nat /usr/etc/nftables/ipv4-nat:3:1-2: Error: Could not process rule: File exists table nat { ^^ Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/iptable_nat.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index a1a07b338ccf..0f7255cc65ee 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -72,6 +72,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = { { .hook = iptable_nat_ipv4_in, .pf = NFPROTO_IPV4, + .nat_hook = true, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_NAT_DST, }, @@ -79,6 +80,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = { { .hook = iptable_nat_ipv4_out, .pf = NFPROTO_IPV4, + .nat_hook = true, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_NAT_SRC, }, @@ -86,6 +88,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = { { .hook = iptable_nat_ipv4_local_fn, .pf = NFPROTO_IPV4, + .nat_hook = true, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST, }, @@ -93,6 +96,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = { { .hook = iptable_nat_ipv4_fn, .pf = NFPROTO_IPV4, + .nat_hook = true, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC, }, -- cgit v1.2.3 From fa45a7602166e9a0998b2228b7398b18b58c5579 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 10 Dec 2017 01:42:58 +0100 Subject: netfilter: nf_tables_arp: don't set forward chain 46928a0b49f3 ("netfilter: nf_tables: remove multihook chains and families") already removed this, this is a leftover. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_tables_arp.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 4bbc273b45e8..ec47c12cd137 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -34,7 +34,6 @@ static struct nft_af_info nft_af_arp __read_mostly = { .hooks = { [NF_ARP_IN] = nft_do_chain_arp, [NF_ARP_OUT] = nft_do_chain_arp, - [NF_ARP_FORWARD] = nft_do_chain_arp, }, }; -- cgit v1.2.3 From 7a4473a31a6974c0fbf9afe80ef16ac5bc67cf79 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 10 Dec 2017 01:43:14 +0100 Subject: netfilter: nf_tables: explicit nft_set_pktinfo() call from hook path Instead of calling this function from the family specific variant, this reduces the code size in the fast path for the netdev, bridge and inet families. After this change, we must call nft_set_pktinfo() upfront from the chain hook indirection. Before: text data bss dec hex filename 2145 208 0 2353 931 net/netfilter/nf_tables_netdev.o After: text data bss dec hex filename 2125 208 0 2333 91d net/netfilter/nf_tables_netdev.o Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_tables_arp.c | 3 ++- net/ipv4/netfilter/nf_tables_ipv4.c | 3 ++- net/ipv4/netfilter/nft_chain_nat_ipv4.c | 3 ++- net/ipv4/netfilter/nft_chain_route_ipv4.c | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index ec47c12cd137..3fa7e1b22bdd 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -21,7 +21,8 @@ nft_do_chain_arp(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo_unspec(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb); return nft_do_chain(&pkt, priv); } diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 2840a29b2e04..35fa265d1ce3 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -24,7 +24,8 @@ static unsigned int nft_do_chain_ipv4(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb); return nft_do_chain(&pkt, priv); } diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index f5c66a7a4bf2..f2a490981594 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -33,7 +33,8 @@ static unsigned int nft_nat_do_chain(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb); return nft_do_chain(&pkt, priv); } diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 30493beb611a..fb3d49fb62fe 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -38,7 +38,8 @@ static unsigned int nf_route_table_hook(void *priv, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - nft_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb); mark = skb->mark; iph = ip_hdr(skb); -- cgit v1.2.3 From 12355d3670dac0dde5aae3deefb59f8cc0a9ed2a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 9 Dec 2017 15:36:24 +0100 Subject: netfilter: nf_tables_inet: don't use multihook infrastructure anymore Use new native NFPROTO_INET support in netfilter core, this gets rid of ad-hoc code in the nf_tables API codebase. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_tables_ipv4.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 35fa265d1ce3..b6223f4b1315 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -45,7 +45,7 @@ static unsigned int nft_ipv4_output(void *priv, return nft_do_chain_ipv4(priv, skb, state); } -struct nft_af_info nft_af_ipv4 __read_mostly = { +static struct nft_af_info nft_af_ipv4 __read_mostly = { .family = NFPROTO_IPV4, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, @@ -58,7 +58,6 @@ struct nft_af_info nft_af_ipv4 __read_mostly = { [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, }, }; -EXPORT_SYMBOL_GPL(nft_af_ipv4); static int nf_tables_ipv4_init_net(struct net *net) { -- cgit v1.2.3 From c974a3a36468d1947c96f0c694c8a1b2e7810043 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 9 Dec 2017 15:40:25 +0100 Subject: netfilter: nf_tables: remove multihook chains and families Since NFPROTO_INET is handled from the core, we don't need to maintain extra infrastructure in nf_tables to handle the double hook registration, one for IPv4 and another for IPv6. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_tables_arp.c | 1 - net/ipv4/netfilter/nf_tables_ipv4.c | 1 - 2 files changed, 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 3fa7e1b22bdd..3090f639bd89 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -31,7 +31,6 @@ static struct nft_af_info nft_af_arp __read_mostly = { .family = NFPROTO_ARP, .nhooks = NF_ARP_NUMHOOKS, .owner = THIS_MODULE, - .nops = 1, .hooks = { [NF_ARP_IN] = nft_do_chain_arp, [NF_ARP_OUT] = nft_do_chain_arp, diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index b6223f4b1315..51b363abd541 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -49,7 +49,6 @@ static struct nft_af_info nft_af_ipv4 __read_mostly = { .family = NFPROTO_IPV4, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, - .nops = 1, .hooks = { [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, [NF_INET_LOCAL_OUT] = nft_ipv4_output, -- cgit v1.2.3 From c2f9eafee9aaeedaad9eadbf47913f4681d723df Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 9 Dec 2017 15:43:17 +0100 Subject: netfilter: nf_tables: remove hooks from family definition They don't belong to the family definition, move them to the filter chain type definition instead. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_tables_arp.c | 8 ++++---- net/ipv4/netfilter/nf_tables_ipv4.c | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 3090f639bd89..f84c17763f6f 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -31,10 +31,6 @@ static struct nft_af_info nft_af_arp __read_mostly = { .family = NFPROTO_ARP, .nhooks = NF_ARP_NUMHOOKS, .owner = THIS_MODULE, - .hooks = { - [NF_ARP_IN] = nft_do_chain_arp, - [NF_ARP_OUT] = nft_do_chain_arp, - }, }; static int nf_tables_arp_init_net(struct net *net) @@ -72,6 +68,10 @@ static const struct nf_chain_type filter_arp = { .owner = THIS_MODULE, .hook_mask = (1 << NF_ARP_IN) | (1 << NF_ARP_OUT), + .hooks = { + [NF_ARP_IN] = nft_do_chain_arp, + [NF_ARP_OUT] = nft_do_chain_arp, + }, }; static int __init nf_tables_arp_init(void) diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 51b363abd541..8aeb15c2b9b2 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -49,13 +49,6 @@ static struct nft_af_info nft_af_ipv4 __read_mostly = { .family = NFPROTO_IPV4, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, - .hooks = { - [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, - [NF_INET_LOCAL_OUT] = nft_ipv4_output, - [NF_INET_FORWARD] = nft_do_chain_ipv4, - [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, - [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, - }, }; static int nf_tables_ipv4_init_net(struct net *net) @@ -96,6 +89,13 @@ static const struct nf_chain_type filter_ipv4 = { (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), + .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, + [NF_INET_LOCAL_OUT] = nft_ipv4_output, + [NF_INET_FORWARD] = nft_do_chain_ipv4, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, + }, }; static int __init nf_tables_ipv4_init(void) -- cgit v1.2.3 From ef71fe27ec2f1607e38af160ab261a8d8ef8e121 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 27 Nov 2017 21:55:14 +0100 Subject: netfilter: move checksum indirection to struct nf_ipv6_ops We cannot make a direct call to nf_ip6_checksum() because that would result in autoloading the 'ipv6' module because of symbol dependencies. Therefore, define checksum indirection in nf_ipv6_ops where this really belongs to. For IPv4, we can indeed make a direct function call, which is faster, given IPv4 is built-in in the networking code by default. Still, CONFIG_INET=n and CONFIG_NETFILTER=y is possible, so define empty inline stub for IPv4 in such case. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index c0cc6aa8cfaa..2f7ffefd2732 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -188,7 +188,6 @@ static int nf_ip_route(struct net *net, struct dst_entry **dst, static const struct nf_afinfo nf_ip_afinfo = { .family = AF_INET, - .checksum = nf_ip_checksum, .checksum_partial = nf_ip_checksum_partial, .route = nf_ip_route, .saveroute = nf_ip_saveroute, -- cgit v1.2.3 From f7dcbe2f36a660140ecb286e15f502028d96ffdf Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 20 Dec 2017 16:04:18 +0100 Subject: netfilter: move checksum_partial indirection to struct nf_ipv6_ops We cannot make a direct call to nf_ip6_checksum_partial() because that would result in autoloading the 'ipv6' module because of symbol dependencies. Therefore, define checksum_partial indirection in nf_ipv6_ops where this really belongs to. For IPv4, we can indeed make a direct function call, which is faster, given IPv4 is built-in in the networking code by default. Still, CONFIG_INET=n and CONFIG_NETFILTER=y is possible, so define empty inline stub for IPv4 in such case. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 2f7ffefd2732..010c75fddf7e 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -155,9 +155,9 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, } EXPORT_SYMBOL(nf_ip_checksum); -static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, - unsigned int dataoff, unsigned int len, - u_int8_t protocol) +__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, unsigned int len, + u_int8_t protocol) { const struct iphdr *iph = ip_hdr(skb); __sum16 csum = 0; @@ -175,6 +175,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, } return csum; } +EXPORT_SYMBOL_GPL(nf_ip_checksum_partial); static int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict __always_unused) @@ -188,7 +189,6 @@ static int nf_ip_route(struct net *net, struct dst_entry **dst, static const struct nf_afinfo nf_ip_afinfo = { .family = AF_INET, - .checksum_partial = nf_ip_checksum_partial, .route = nf_ip_route, .saveroute = nf_ip_saveroute, .reroute = nf_ip_reroute, -- cgit v1.2.3 From 7db9a51e0f9931446ed4231feb1040ed5134fc60 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 20 Dec 2017 16:12:55 +0100 Subject: netfilter: remove saveroute indirection in struct nf_afinfo This is only used by nf_queue.c and this function comes with no symbol dependencies with IPv6, it just refers to structure layouts. Therefore, we can replace it by a direct function call from where it belongs. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter.c | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 010c75fddf7e..7878ae6c35b2 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -80,33 +80,6 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t } EXPORT_SYMBOL(ip_route_me_harder); -/* - * Extra routing may needed on local out, as the QUEUE target never - * returns control to the table. - */ - -struct ip_rt_info { - __be32 daddr; - __be32 saddr; - u_int8_t tos; - u_int32_t mark; -}; - -static void nf_ip_saveroute(const struct sk_buff *skb, - struct nf_queue_entry *entry) -{ - struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); - - if (entry->state.hook == NF_INET_LOCAL_OUT) { - const struct iphdr *iph = ip_hdr(skb); - - rt_info->tos = iph->tos; - rt_info->daddr = iph->daddr; - rt_info->saddr = iph->saddr; - rt_info->mark = skb->mark; - } -} - static int nf_ip_reroute(struct net *net, struct sk_buff *skb, const struct nf_queue_entry *entry) { @@ -190,7 +163,6 @@ static int nf_ip_route(struct net *net, struct dst_entry **dst, static const struct nf_afinfo nf_ip_afinfo = { .family = AF_INET, .route = nf_ip_route, - .saveroute = nf_ip_saveroute, .reroute = nf_ip_reroute, .route_key_size = sizeof(struct ip_rt_info), }; -- cgit v1.2.3 From 3f87c08c615f567799b426aff0341ea8010a0ebb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 27 Nov 2017 22:29:52 +0100 Subject: netfilter: move route indirection to struct nf_ipv6_ops We cannot make a direct call to nf_ip6_route() because that would result in autoloading the 'ipv6' module because of symbol dependencies. Therefore, define route indirection in nf_ipv6_ops where this really belongs to. For IPv4, we can indeed make a direct function call, which is faster, given IPv4 is built-in in the networking code by default. Still, CONFIG_INET=n and CONFIG_NETFILTER=y is possible, so define empty inline stub for IPv4 in such case. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 7878ae6c35b2..e9d47e4ec182 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -150,8 +150,8 @@ __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, } EXPORT_SYMBOL_GPL(nf_ip_checksum_partial); -static int nf_ip_route(struct net *net, struct dst_entry **dst, - struct flowi *fl, bool strict __always_unused) +int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, + bool strict __always_unused) { struct rtable *rt = ip_route_output_key(net, &fl->u.ip4); if (IS_ERR(rt)) @@ -159,10 +159,10 @@ static int nf_ip_route(struct net *net, struct dst_entry **dst, *dst = &rt->dst; return 0; } +EXPORT_SYMBOL_GPL(nf_ip_route); static const struct nf_afinfo nf_ip_afinfo = { .family = AF_INET, - .route = nf_ip_route, .reroute = nf_ip_reroute, .route_key_size = sizeof(struct ip_rt_info), }; -- cgit v1.2.3 From ce388f452f0af2013c657dd24be4415d94e7704f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 27 Nov 2017 22:50:26 +0100 Subject: netfilter: move reroute indirection to struct nf_ipv6_ops We cannot make a direct call to nf_ip6_reroute() because that would result in autoloading the 'ipv6' module because of symbol dependencies. Therefore, define reroute indirection in nf_ipv6_ops where this really belongs to. For IPv4, we can indeed make a direct function call, which is faster, given IPv4 is built-in in the networking code by default. Still, CONFIG_INET=n and CONFIG_NETFILTER=y is possible, so define empty inline stub for IPv4 in such case. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index e9d47e4ec182..ec73be3b2f14 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -80,8 +80,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t } EXPORT_SYMBOL(ip_route_me_harder); -static int nf_ip_reroute(struct net *net, struct sk_buff *skb, - const struct nf_queue_entry *entry) +int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) { const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); @@ -92,10 +91,12 @@ static int nf_ip_reroute(struct net *net, struct sk_buff *skb, skb->mark == rt_info->mark && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) - return ip_route_me_harder(net, skb, RTN_UNSPEC); + return ip_route_me_harder(entry->state.net, skb, + RTN_UNSPEC); } return 0; } +EXPORT_SYMBOL_GPL(nf_ip_reroute); __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol) @@ -163,7 +164,6 @@ EXPORT_SYMBOL_GPL(nf_ip_route); static const struct nf_afinfo nf_ip_afinfo = { .family = AF_INET, - .reroute = nf_ip_reroute, .route_key_size = sizeof(struct ip_rt_info), }; -- cgit v1.2.3 From 464356234f88518f7d0678b979013e78607e8266 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 27 Nov 2017 22:58:37 +0100 Subject: netfilter: remove route_key_size field in struct nf_afinfo This is only needed by nf_queue, place this code where it belongs. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index ec73be3b2f14..bd8e161c2f95 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -164,7 +164,6 @@ EXPORT_SYMBOL_GPL(nf_ip_route); static const struct nf_afinfo nf_ip_afinfo = { .family = AF_INET, - .route_key_size = sizeof(struct ip_rt_info), }; static int __init ipv4_netfilter_init(void) -- cgit v1.2.3 From b3a61254d83d577f8a44b86a5e68bc124011336a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 9 Dec 2017 17:05:53 +0100 Subject: netfilter: remove struct nf_afinfo and its helper functions This abstraction has no clients anymore, remove it. This is what remains from previous authors, so correct copyright statement after recent modifications and code removal. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index bd8e161c2f95..e6774ccb7731 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -161,13 +161,3 @@ int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, return 0; } EXPORT_SYMBOL_GPL(nf_ip_route); - -static const struct nf_afinfo nf_ip_afinfo = { - .family = AF_INET, -}; - -static int __init ipv4_netfilter_init(void) -{ - return nf_register_afinfo(&nf_ip_afinfo); -} -subsys_initcall(ipv4_netfilter_init); -- cgit v1.2.3 From a7f87b47e67e4341f6175cdb80e5c2eaadf30dcb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 30 Dec 2017 22:41:46 +0100 Subject: netfilter: remove defensive check on malformed packets from raw sockets Users cannot forge malformed IPv4/IPv6 headers via raw sockets that they can inject into the stack. Specifically, not for IPv4 since 55888dfb6ba7 ("AF_RAW: Augment raw_send_hdrinc to expand skb to fit iphdr->ihl (v2)"). IPv6 raw sockets also ensure that packets have a well-formed IPv6 header available in the skbuff. At quick glance, br_netfilter also validates layer 3 headers and it drops malformed both IPv4 and IPv6 packets. Therefore, let's remove this defensive check all over the place. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/iptable_filter.c | 6 ------ net/ipv4/netfilter/iptable_mangle.c | 5 ----- net/ipv4/netfilter/iptable_raw.c | 6 ------ net/ipv4/netfilter/iptable_security.c | 6 ------ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 5 ----- net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 10 ---------- net/ipv4/netfilter/nf_tables_ipv4.c | 17 +---------------- net/ipv4/netfilter/nft_chain_route_ipv4.c | 5 ----- 8 files changed, 1 insertion(+), 59 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 7667f223d7f8..9ac92ea7b93c 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -38,12 +38,6 @@ static unsigned int iptable_filter_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - if (state->hook == NF_INET_LOCAL_OUT && - (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr))) - /* root is playing with raw sockets. */ - return NF_ACCEPT; - return ipt_do_table(skb, state, state->net->ipv4.iptable_filter); } diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index aebdb337fd7e..dea138ca8925 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -49,11 +49,6 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) u_int32_t mark; int err; - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) - return NF_ACCEPT; - /* Save things which could affect route */ mark = skb->mark; iph = ip_hdr(skb); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 2642ecd2645c..a869d1fea7d9 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -26,12 +26,6 @@ static unsigned int iptable_raw_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - if (state->hook == NF_INET_LOCAL_OUT && - (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr))) - /* root is playing with raw sockets. */ - return NF_ACCEPT; - return ipt_do_table(skb, state, state->net->ipv4.iptable_raw); } diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index ff226596e4b5..e5379fe57b64 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -43,12 +43,6 @@ static unsigned int iptable_security_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - if (state->hook == NF_INET_LOCAL_OUT && - (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr))) - /* Somebody is playing with raw sockets. */ - return NF_ACCEPT; - return ipt_do_table(skb, state, state->net->ipv4.iptable_security); } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index bb2c868a5621..de213a397ea8 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -154,11 +154,6 @@ static unsigned int ipv4_conntrack_local(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) - return NF_ACCEPT; - if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */ return NF_ACCEPT; diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 0443ca4120b0..f7ff6a364d7b 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -356,11 +356,6 @@ nf_nat_ipv4_out(void *priv, struct sk_buff *skb, #endif unsigned int ret; - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) - return NF_ACCEPT; - ret = nf_nat_ipv4_fn(priv, skb, state, do_chain); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && @@ -396,11 +391,6 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, unsigned int ret; int err; - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) - return NF_ACCEPT; - ret = nf_nat_ipv4_fn(priv, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 8aeb15c2b9b2..f4675253f1e6 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -30,21 +30,6 @@ static unsigned int nft_do_chain_ipv4(void *priv, return nft_do_chain(&pkt, priv); } -static unsigned int nft_ipv4_output(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - if (unlikely(skb->len < sizeof(struct iphdr) || - ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) { - if (net_ratelimit()) - pr_info("nf_tables_ipv4: ignoring short SOCK_RAW " - "packet\n"); - return NF_ACCEPT; - } - - return nft_do_chain_ipv4(priv, skb, state); -} - static struct nft_af_info nft_af_ipv4 __read_mostly = { .family = NFPROTO_IPV4, .nhooks = NF_INET_NUMHOOKS, @@ -91,7 +76,7 @@ static const struct nf_chain_type filter_ipv4 = { (1 << NF_INET_POST_ROUTING), .hooks = { [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, - [NF_INET_LOCAL_OUT] = nft_ipv4_output, + [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4, [NF_INET_FORWARD] = nft_do_chain_ipv4, [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index fb3d49fb62fe..d965c225b9f6 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -33,11 +33,6 @@ static unsigned int nf_route_table_hook(void *priv, const struct iphdr *iph; int err; - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) - return NF_ACCEPT; - nft_set_pktinfo(&pkt, skb, state); nft_set_pktinfo_ipv4(&pkt, skb); -- cgit v1.2.3 From 97add9f0d66da9898da325f84e80533db9cc0ced Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 7 Jan 2018 01:04:15 +0100 Subject: netfilter: flow table support for IPv4 This patch adds the IPv4 flow table type, that implements the datapath flow table to forward IPv4 traffic. Rationale is: 1) Look up for the packet in the flow table, from the ingress hook. 2) If there's a hit, decrement ttl and pass it on to the neighbour layer for transmission. 3) If there's a miss, packet is passed up to the classic forwarding path. This patch also supports layer 3 source and destination NAT. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 8 + net/ipv4/netfilter/Makefile | 3 + net/ipv4/netfilter/nf_flow_table_ipv4.c | 283 ++++++++++++++++++++++++++++++++ 3 files changed, 294 insertions(+) create mode 100644 net/ipv4/netfilter/nf_flow_table_ipv4.c (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index cee51045e2f7..7d5d444964aa 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -78,6 +78,14 @@ config NF_TABLES_ARP endif # NF_TABLES +config NF_FLOW_TABLE_IPV4 + select NF_FLOW_TABLE + tristate "Netfilter flow table IPv4 module" + help + This option adds the flow table IPv4 support. + + To compile it as a module, choose M here. + config NF_DUP_IPV4 tristate "Netfilter IPv4 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index adcdae358365..8bb1f0c7a375 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -43,6 +43,9 @@ obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o +# flow table support +obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o + # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c new file mode 100644 index 000000000000..ac56c0f0492a --- /dev/null +++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c @@ -0,0 +1,283 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/* For layer 4 checksum field offset. */ +#include +#include + +static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, + __be32 addr, __be32 new_addr) +{ + struct tcphdr *tcph; + + if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || + skb_try_make_writable(skb, thoff + sizeof(*tcph))) + return -1; + + tcph = (void *)(skb_network_header(skb) + thoff); + inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); + + return 0; +} + +static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, + __be32 addr, __be32 new_addr) +{ + struct udphdr *udph; + + if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || + skb_try_make_writable(skb, thoff + sizeof(*udph))) + return -1; + + udph = (void *)(skb_network_header(skb) + thoff); + if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { + inet_proto_csum_replace4(&udph->check, skb, addr, + new_addr, true); + if (!udph->check) + udph->check = CSUM_MANGLED_0; + } + + return 0; +} + +static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, + unsigned int thoff, __be32 addr, + __be32 new_addr) +{ + switch (iph->protocol) { + case IPPROTO_TCP: + if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0) + return NF_DROP; + break; + case IPPROTO_UDP: + if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0) + return NF_DROP; + break; + } + + return 0; +} + +static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, + struct iphdr *iph, unsigned int thoff, + enum flow_offload_tuple_dir dir) +{ + __be32 addr, new_addr; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = iph->saddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; + iph->saddr = new_addr; + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = iph->daddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; + iph->daddr = new_addr; + break; + default: + return -1; + } + csum_replace4(&iph->check, addr, new_addr); + + return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); +} + +static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, + struct iphdr *iph, unsigned int thoff, + enum flow_offload_tuple_dir dir) +{ + __be32 addr, new_addr; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = iph->daddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; + iph->daddr = new_addr; + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = iph->saddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; + iph->saddr = new_addr; + break; + default: + return -1; + } + + return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); +} + +static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, + enum flow_offload_tuple_dir dir) +{ + struct iphdr *iph = ip_hdr(skb); + unsigned int thoff = iph->ihl * 4; + + if (flow->flags & FLOW_OFFLOAD_SNAT && + (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 || + nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0)) + return -1; + if (flow->flags & FLOW_OFFLOAD_DNAT && + (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 || + nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0)) + return -1; + + return 0; +} + +static bool ip_has_options(unsigned int thoff) +{ + return thoff != sizeof(struct iphdr); +} + +static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, + struct flow_offload_tuple *tuple) +{ + struct flow_ports *ports; + unsigned int thoff; + struct iphdr *iph; + + if (!pskb_may_pull(skb, sizeof(*iph))) + return -1; + + iph = ip_hdr(skb); + thoff = iph->ihl * 4; + + if (ip_is_fragment(iph) || + unlikely(ip_has_options(thoff))) + return -1; + + if (iph->protocol != IPPROTO_TCP && + iph->protocol != IPPROTO_UDP) + return -1; + + thoff = iph->ihl * 4; + if (!pskb_may_pull(skb, thoff + sizeof(*ports))) + return -1; + + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + + tuple->src_v4.s_addr = iph->saddr; + tuple->dst_v4.s_addr = iph->daddr; + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + tuple->l3proto = AF_INET; + tuple->l4proto = iph->protocol; + tuple->iifidx = dev->ifindex; + + return 0; +} + +/* Based on ip_exceeds_mtu(). */ +static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu) + return false; + + if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) + return false; + + if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + return false; + + return true; +} + +static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt) +{ + u32 mtu; + + mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); + if (__nf_flow_exceeds_mtu(skb, mtu)) + return true; + + return false; +} + +static unsigned int +nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct nf_flowtable *flow_table = priv; + struct flow_offload_tuple tuple = {}; + enum flow_offload_tuple_dir dir; + struct flow_offload *flow; + struct net_device *outdev; + const struct rtable *rt; + struct iphdr *iph; + __be32 nexthop; + + if (skb->protocol != htons(ETH_P_IP)) + return NF_ACCEPT; + + if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0) + return NF_ACCEPT; + + tuplehash = flow_offload_lookup(flow_table, &tuple); + if (tuplehash == NULL) + return NF_ACCEPT; + + outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); + if (!outdev) + return NF_ACCEPT; + + dir = tuplehash->tuple.dir; + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); + + rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache; + if (unlikely(nf_flow_exceeds_mtu(skb, rt))) + return NF_ACCEPT; + + if (skb_try_make_writable(skb, sizeof(*iph))) + return NF_DROP; + + if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) && + nf_flow_nat_ip(flow, skb, dir) < 0) + return NF_DROP; + + flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; + iph = ip_hdr(skb); + ip_decrease_ttl(iph); + + skb->dev = outdev; + nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); + neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); + + return NF_STOLEN; +} + +static struct nf_flowtable_type flowtable_ipv4 = { + .family = NFPROTO_IPV4, + .params = &nf_flow_offload_rhash_params, + .gc = nf_flow_offload_work_gc, + .hook = nf_flow_offload_ip_hook, + .owner = THIS_MODULE, +}; + +static int __init nf_flow_ipv4_module_init(void) +{ + nft_register_flowtable_type(&flowtable_ipv4); + + return 0; +} + +static void __exit nf_flow_ipv4_module_exit(void) +{ + nft_unregister_flowtable_type(&flowtable_ipv4); +} + +module_init(nf_flow_ipv4_module_init); +module_exit(nf_flow_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NF_FLOWTABLE(AF_INET); -- cgit v1.2.3 From 7c23b629a8085b11daccd68c62b5116ff498f84a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 7 Jan 2018 01:04:22 +0100 Subject: netfilter: flow table support for the mixed IPv4/IPv6 family This patch adds the IPv6 flow table type, that implements the datapath flow table to forward IPv6 traffic. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_flow_table_ipv4.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c index ac56c0f0492a..b2d01eb25f2c 100644 --- a/net/ipv4/netfilter/nf_flow_table_ipv4.c +++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c @@ -202,7 +202,7 @@ static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt) return false; } -static unsigned int +unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -254,6 +254,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, return NF_STOLEN; } +EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); static struct nf_flowtable_type flowtable_ipv4 = { .family = NFPROTO_IPV4, -- cgit v1.2.3