summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-01-08 20:40:42 -0500
committerDavid S. Miller <davem@davemloft.net>2018-01-08 20:40:42 -0500
commit9f0e896f35020d1e0e513b365a62fb5e06066764 (patch)
tree612b09a6f772149958ae12244c5da8b37337817a /net/ipv4
parent0c3b34d804947fef7fb9e74912b7b7563729231e (diff)
parentf998b6b10144cd9809da6af02758615f789e8aa1 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter/IPVS updates for net-next The following patchset contains Netfilter/IPVS updates for your net-next tree: 1) Free hooks via call_rcu to speed up netns release path, from Florian Westphal. 2) Reduce memory footprint of hook arrays, skip allocation if family is not present - useful in case decnet support is not compiled built-in. Patches from Florian Westphal. 3) Remove defensive check for malformed IPv4 - including ihl field - and IPv6 headers in x_tables and nf_tables. 4) Add generic flow table offload infrastructure for nf_tables, this includes the netlink control plane and support for IPv4, IPv6 and mixed IPv4/IPv6 dataplanes. This comes with NAT support too. This patchset adds the IPS_OFFLOAD conntrack status bit to indicate that this flow has been offloaded. 5) Add secpath matching support for nf_tables, from Florian. 6) Save some code bytes in the fast path for the nf_tables netdev, bridge and inet families. 7) Allow one single NAT hook per point and do not allow to register NAT hooks in nf_tables before the conntrack hook, patches from Florian. 8) Seven patches to remove the struct nf_af_info abstraction, instead we perform direct calls for IPv4 which is faster. IPv6 indirections are still needed to avoid dependencies with the 'ipv6' module, but these now reside in struct nf_ipv6_ops. 9) Seven patches to handle NFPROTO_INET from the Netfilter core, hence we can remove specific code in nf_tables to handle this pseudofamily. 10) No need for synchronize_net() call for nf_queue after conversion to hook arrays. Also from Florian. 11) Call cond_resched_rcu() when dumping large sets in ipset to avoid softlockup. Again from Florian. 12) Pass lockdep_nfnl_is_held() to rcu_dereference_protected(), patch from Florian Westphal. 13) Fix matching of counters in ipset, from Jozsef Kadlecsik. 14) Missing nfnl lock protection in the ip_set_net_exit path, also from Jozsef. 15) Move connlimit code that we can reuse from nf_tables into nf_conncount, from Florian Westhal. And asorted cleanups: 16) Get rid of nft_dereference(), it only has one single caller. 17) Add nft_set_is_anonymous() helper function. 18) Remove NF_ARP_FORWARD leftover chain definition in nf_tables_arp. 19) Remove unnecessary comments in nf_conntrack_h323_asn1.c From Varsha Rao. 20) Remove useless parameters in frag_safe_skb_hp(), from Gao Feng. 21) Constify layer 4 conntrack protocol definitions, function parameters to register/unregister these protocol trackers, and timeouts. Patches from Florian Westphal. 22) Remove nlattr_size indirection, from Florian Westphal. 23) Add fall-through comments as -Wimplicit-fallthrough needs this, from Gustavo A. R. Silva. 24) Use swap() macro to exchange values in ipset, patch from Gustavo A. R. Silva. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/netfilter.c62
-rw-r--r--net/ipv4/netfilter/Kconfig10
-rw-r--r--net/ipv4/netfilter/Makefile3
-rw-r--r--net/ipv4/netfilter/arp_tables.c26
-rw-r--r--net/ipv4/netfilter/ip_tables.c26
-rw-r--r--net/ipv4/netfilter/iptable_filter.c6
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c5
-rw-r--r--net/ipv4/netfilter/iptable_nat.c4
-rw-r--r--net/ipv4/netfilter/iptable_raw.c6
-rw-r--r--net/ipv4/netfilter/iptable_security.c6
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c7
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c4
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c284
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c13
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c36
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c8
18 files changed, 359 insertions, 160 deletions
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index c0cc6aa8cfaa..e6774ccb7731 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -80,35 +80,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
}
EXPORT_SYMBOL(ip_route_me_harder);
-/*
- * Extra routing may needed on local out, as the QUEUE target never
- * returns control to the table.
- */
-
-struct ip_rt_info {
- __be32 daddr;
- __be32 saddr;
- u_int8_t tos;
- u_int32_t mark;
-};
-
-static void nf_ip_saveroute(const struct sk_buff *skb,
- struct nf_queue_entry *entry)
-{
- struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
-
- if (entry->state.hook == NF_INET_LOCAL_OUT) {
- const struct iphdr *iph = ip_hdr(skb);
-
- rt_info->tos = iph->tos;
- rt_info->daddr = iph->daddr;
- rt_info->saddr = iph->saddr;
- rt_info->mark = skb->mark;
- }
-}
-
-static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
- const struct nf_queue_entry *entry)
+int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
{
const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -119,10 +91,12 @@ static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
skb->mark == rt_info->mark &&
iph->daddr == rt_info->daddr &&
iph->saddr == rt_info->saddr))
- return ip_route_me_harder(net, skb, RTN_UNSPEC);
+ return ip_route_me_harder(entry->state.net, skb,
+ RTN_UNSPEC);
}
return 0;
}
+EXPORT_SYMBOL_GPL(nf_ip_reroute);
__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol)
@@ -155,9 +129,9 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
}
EXPORT_SYMBOL(nf_ip_checksum);
-static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
- unsigned int dataoff, unsigned int len,
- u_int8_t protocol)
+__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol)
{
const struct iphdr *iph = ip_hdr(skb);
__sum16 csum = 0;
@@ -175,9 +149,10 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
}
return csum;
}
+EXPORT_SYMBOL_GPL(nf_ip_checksum_partial);
-static int nf_ip_route(struct net *net, struct dst_entry **dst,
- struct flowi *fl, bool strict __always_unused)
+int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
+ bool strict __always_unused)
{
struct rtable *rt = ip_route_output_key(net, &fl->u.ip4);
if (IS_ERR(rt))
@@ -185,19 +160,4 @@ static int nf_ip_route(struct net *net, struct dst_entry **dst,
*dst = &rt->dst;
return 0;
}
-
-static const struct nf_afinfo nf_ip_afinfo = {
- .family = AF_INET,
- .checksum = nf_ip_checksum,
- .checksum_partial = nf_ip_checksum_partial,
- .route = nf_ip_route,
- .saveroute = nf_ip_saveroute,
- .reroute = nf_ip_reroute,
- .route_key_size = sizeof(struct ip_rt_info),
-};
-
-static int __init ipv4_netfilter_init(void)
-{
- return nf_register_afinfo(&nf_ip_afinfo);
-}
-subsys_initcall(ipv4_netfilter_init);
+EXPORT_SYMBOL_GPL(nf_ip_route);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index c11eb1744ab1..7d5d444964aa 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -72,11 +72,20 @@ endif # NF_TABLES_IPV4
config NF_TABLES_ARP
tristate "ARP nf_tables support"
+ select NETFILTER_FAMILY_ARP
help
This option enables the ARP support for nf_tables.
endif # NF_TABLES
+config NF_FLOW_TABLE_IPV4
+ select NF_FLOW_TABLE
+ tristate "Netfilter flow table IPv4 module"
+ help
+ This option adds the flow table IPv4 support.
+
+ To compile it as a module, choose M here.
+
config NF_DUP_IPV4
tristate "Netfilter IPv4 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
@@ -392,6 +401,7 @@ endif # IP_NF_IPTABLES
config IP_NF_ARPTABLES
tristate "ARP tables support"
select NETFILTER_XTABLES
+ select NETFILTER_FAMILY_ARP
depends on NETFILTER_ADVANCED
help
arptables is a general, extensible packet identification framework.
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index adcdae358365..8bb1f0c7a375 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -43,6 +43,9 @@ obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
+# flow table support
+obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o
+
# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 0c3c944a7b72..bf8a5340f15e 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -810,9 +810,8 @@ static int get_info(struct net *net, void __user *user,
if (compat)
xt_compat_lock(NFPROTO_ARP);
#endif
- t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
- "arptable_%s", name);
- if (t) {
+ t = xt_request_find_table_lock(net, NFPROTO_ARP, name);
+ if (!IS_ERR(t)) {
struct arpt_getinfo info;
const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
@@ -841,7 +840,7 @@ static int get_info(struct net *net, void __user *user,
xt_table_unlock(t);
module_put(t->me);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
#ifdef CONFIG_COMPAT
if (compat)
xt_compat_unlock(NFPROTO_ARP);
@@ -866,7 +865,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
get.name[sizeof(get.name) - 1] = '\0';
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
const struct xt_table_info *private = t->private;
if (get.size == private->size)
@@ -878,7 +877,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
return ret;
}
@@ -903,10 +902,9 @@ static int __do_replace(struct net *net, const char *name,
goto out;
}
- t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
- "arptable_%s", name);
- if (!t) {
- ret = -ENOENT;
+ t = xt_request_find_table_lock(net, NFPROTO_ARP, name);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free_newinfo_counters_untrans;
}
@@ -1020,8 +1018,8 @@ static int do_add_counters(struct net *net, const void __user *user,
return PTR_ERR(paddc);
t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name);
- if (!t) {
- ret = -ENOENT;
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free;
}
@@ -1408,7 +1406,7 @@ static int compat_get_entries(struct net *net,
xt_compat_lock(NFPROTO_ARP);
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
@@ -1423,7 +1421,7 @@ static int compat_get_entries(struct net *net,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
xt_compat_unlock(NFPROTO_ARP);
return ret;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2e0d339028bb..0b975aa2d363 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -973,9 +973,8 @@ static int get_info(struct net *net, void __user *user,
if (compat)
xt_compat_lock(AF_INET);
#endif
- t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
- "iptable_%s", name);
- if (t) {
+ t = xt_request_find_table_lock(net, AF_INET, name);
+ if (!IS_ERR(t)) {
struct ipt_getinfo info;
const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
@@ -1005,7 +1004,7 @@ static int get_info(struct net *net, void __user *user,
xt_table_unlock(t);
module_put(t->me);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
#ifdef CONFIG_COMPAT
if (compat)
xt_compat_unlock(AF_INET);
@@ -1030,7 +1029,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
get.name[sizeof(get.name) - 1] = '\0';
t = xt_find_table_lock(net, AF_INET, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
const struct xt_table_info *private = t->private;
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
@@ -1041,7 +1040,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
return ret;
}
@@ -1064,10 +1063,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
goto out;
}
- t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
- "iptable_%s", name);
- if (!t) {
- ret = -ENOENT;
+ t = xt_request_find_table_lock(net, AF_INET, name);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free_newinfo_counters_untrans;
}
@@ -1181,8 +1179,8 @@ do_add_counters(struct net *net, const void __user *user,
return PTR_ERR(paddc);
t = xt_find_table_lock(net, AF_INET, tmp.name);
- if (!t) {
- ret = -ENOENT;
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free;
}
@@ -1625,7 +1623,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
xt_compat_lock(AF_INET);
t = xt_find_table_lock(net, AF_INET, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
ret = compat_table_info(private, &info);
@@ -1639,7 +1637,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
xt_compat_unlock(AF_INET);
return ret;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 7667f223d7f8..9ac92ea7b93c 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -38,12 +38,6 @@ static unsigned int
iptable_filter_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (state->hook == NF_INET_LOCAL_OUT &&
- (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr)))
- /* root is playing with raw sockets. */
- return NF_ACCEPT;
-
return ipt_do_table(skb, state, state->net->ipv4.iptable_filter);
}
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index aebdb337fd7e..dea138ca8925 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -49,11 +49,6 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
u_int32_t mark;
int err;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
/* Save things which could affect route */
mark = skb->mark;
iph = ip_hdr(skb);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index a1a07b338ccf..0f7255cc65ee 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -72,6 +72,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
{
.hook = iptable_nat_ipv4_in,
.pf = NFPROTO_IPV4,
+ .nat_hook = true,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_NAT_DST,
},
@@ -79,6 +80,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
{
.hook = iptable_nat_ipv4_out,
.pf = NFPROTO_IPV4,
+ .nat_hook = true,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_NAT_SRC,
},
@@ -86,6 +88,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
{
.hook = iptable_nat_ipv4_local_fn,
.pf = NFPROTO_IPV4,
+ .nat_hook = true,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST,
},
@@ -93,6 +96,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
{
.hook = iptable_nat_ipv4_fn,
.pf = NFPROTO_IPV4,
+ .nat_hook = true,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC,
},
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 2642ecd2645c..a869d1fea7d9 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -26,12 +26,6 @@ static unsigned int
iptable_raw_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (state->hook == NF_INET_LOCAL_OUT &&
- (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr)))
- /* root is playing with raw sockets. */
- return NF_ACCEPT;
-
return ipt_do_table(skb, state, state->net->ipv4.iptable_raw);
}
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index ff226596e4b5..e5379fe57b64 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -43,12 +43,6 @@ static unsigned int
iptable_security_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (state->hook == NF_INET_LOCAL_OUT &&
- (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr)))
- /* Somebody is playing with raw sockets. */
- return NF_ACCEPT;
-
return ipt_do_table(skb, state, state->net->ipv4.iptable_security);
}
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 89af9d88ca21..de213a397ea8 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -154,11 +154,6 @@ static unsigned int ipv4_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
return NF_ACCEPT;
@@ -368,7 +363,7 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
MODULE_ALIAS("ip_conntrack");
MODULE_LICENSE("GPL");
-static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
+static const struct nf_conntrack_l4proto * const builtin_l4proto4[] = {
&nf_conntrack_l4proto_tcp4,
&nf_conntrack_l4proto_udp4,
&nf_conntrack_l4proto_icmp,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 1849fedd9b81..5c15beafa711 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -22,7 +22,7 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_log.h>
-static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ;
+static const unsigned int nf_ct_icmp_timeout = 30*HZ;
static inline struct nf_icmp_net *icmp_pernet(struct net *net)
{
@@ -351,7 +351,7 @@ static struct nf_proto_net *icmp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.icmp.pn;
}
-struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
{
.l3proto = PF_INET,
.l4proto = IPPROTO_ICMP,
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
new file mode 100644
index 000000000000..b2d01eb25f2c
--- /dev/null
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -0,0 +1,284 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/neighbour.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
+ __be32 addr, __be32 new_addr)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
+
+ return 0;
+}
+
+static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+ __be32 addr, __be32 new_addr)
+{
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
+ return -1;
+
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace4(&udph->check, skb, addr,
+ new_addr, true);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ return 0;
+}
+
+static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
+ unsigned int thoff, __be32 addr,
+ __be32 new_addr)
+{
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ case IPPROTO_UDP:
+ if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ }
+
+ return 0;
+}
+
+static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ struct iphdr *iph, unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ __be32 addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = iph->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
+ iph->saddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = iph->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
+ iph->daddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+ csum_replace4(&iph->check, addr, new_addr);
+
+ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+}
+
+static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ struct iphdr *iph, unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ __be32 addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = iph->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
+ iph->daddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = iph->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
+ iph->saddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+}
+
+static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ enum flow_offload_tuple_dir dir)
+{
+ struct iphdr *iph = ip_hdr(skb);
+ unsigned int thoff = iph->ihl * 4;
+
+ if (flow->flags & FLOW_OFFLOAD_SNAT &&
+ (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+ nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
+ return -1;
+ if (flow->flags & FLOW_OFFLOAD_DNAT &&
+ (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
+ return -1;
+
+ return 0;
+}
+
+static bool ip_has_options(unsigned int thoff)
+{
+ return thoff != sizeof(struct iphdr);
+}
+
+static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
+ struct flow_offload_tuple *tuple)
+{
+ struct flow_ports *ports;
+ unsigned int thoff;
+ struct iphdr *iph;
+
+ if (!pskb_may_pull(skb, sizeof(*iph)))
+ return -1;
+
+ iph = ip_hdr(skb);
+ thoff = iph->ihl * 4;
+
+ if (ip_is_fragment(iph) ||
+ unlikely(ip_has_options(thoff)))
+ return -1;
+
+ if (iph->protocol != IPPROTO_TCP &&
+ iph->protocol != IPPROTO_UDP)
+ return -1;
+
+ thoff = iph->ihl * 4;
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+ return -1;
+
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+ tuple->src_v4.s_addr = iph->saddr;
+ tuple->dst_v4.s_addr = iph->daddr;
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ tuple->l3proto = AF_INET;
+ tuple->l4proto = iph->protocol;
+ tuple->iifidx = dev->ifindex;
+
+ return 0;
+}
+
+/* Based on ip_exceeds_mtu(). */
+static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu)
+ return false;
+
+ if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ return false;
+
+ return true;
+}
+
+static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt)
+{
+ u32 mtu;
+
+ mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
+ if (__nf_flow_exceeds_mtu(skb, mtu))
+ return true;
+
+ return false;
+}
+
+unsigned int
+nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ struct flow_offload_tuple tuple = {};
+ enum flow_offload_tuple_dir dir;
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ const struct rtable *rt;
+ struct iphdr *iph;
+ __be32 nexthop;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ return NF_ACCEPT;
+
+ if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
+ return NF_ACCEPT;
+
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
+ if (tuplehash == NULL)
+ return NF_ACCEPT;
+
+ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+ if (!outdev)
+ return NF_ACCEPT;
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
+ rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+ if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
+ return NF_ACCEPT;
+
+ if (skb_try_make_writable(skb, sizeof(*iph)))
+ return NF_DROP;
+
+ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+ nf_flow_nat_ip(flow, skb, dir) < 0)
+ return NF_DROP;
+
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ iph = ip_hdr(skb);
+ ip_decrease_ttl(iph);
+
+ skb->dev = outdev;
+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+
+ return NF_STOLEN;
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
+
+static struct nf_flowtable_type flowtable_ipv4 = {
+ .family = NFPROTO_IPV4,
+ .params = &nf_flow_offload_rhash_params,
+ .gc = nf_flow_offload_work_gc,
+ .hook = nf_flow_offload_ip_hook,
+ .owner = THIS_MODULE,
+};
+
+static int __init nf_flow_ipv4_module_init(void)
+{
+ nft_register_flowtable_type(&flowtable_ipv4);
+
+ return 0;
+}
+
+static void __exit nf_flow_ipv4_module_exit(void)
+{
+ nft_unregister_flowtable_type(&flowtable_ipv4);
+}
+
+module_init(nf_flow_ipv4_module_init);
+module_exit(nf_flow_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 0443ca4120b0..f7ff6a364d7b 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -356,11 +356,6 @@ nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
#endif
unsigned int ret;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
@@ -396,11 +391,6 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
unsigned int ret;
int err;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index 4bbc273b45e8..f84c17763f6f 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -21,7 +21,8 @@ nft_do_chain_arp(void *priv,
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_unspec(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_unspec(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
@@ -30,12 +31,6 @@ static struct nft_af_info nft_af_arp __read_mostly = {
.family = NFPROTO_ARP,
.nhooks = NF_ARP_NUMHOOKS,
.owner = THIS_MODULE,
- .nops = 1,
- .hooks = {
- [NF_ARP_IN] = nft_do_chain_arp,
- [NF_ARP_OUT] = nft_do_chain_arp,
- [NF_ARP_FORWARD] = nft_do_chain_arp,
- },
};
static int nf_tables_arp_init_net(struct net *net)
@@ -73,6 +68,10 @@ static const struct nf_chain_type filter_arp = {
.owner = THIS_MODULE,
.hook_mask = (1 << NF_ARP_IN) |
(1 << NF_ARP_OUT),
+ .hooks = {
+ [NF_ARP_IN] = nft_do_chain_arp,
+ [NF_ARP_OUT] = nft_do_chain_arp,
+ },
};
static int __init nf_tables_arp_init(void)
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index 2840a29b2e04..f4675253f1e6 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -24,40 +24,17 @@ static unsigned int nft_do_chain_ipv4(void *priv,
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv4(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
-static unsigned int nft_ipv4_output(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- if (unlikely(skb->len < sizeof(struct iphdr) ||
- ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
- if (net_ratelimit())
- pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
- "packet\n");
- return NF_ACCEPT;
- }
-
- return nft_do_chain_ipv4(priv, skb, state);
-}
-
-struct nft_af_info nft_af_ipv4 __read_mostly = {
+static struct nft_af_info nft_af_ipv4 __read_mostly = {
.family = NFPROTO_IPV4,
.nhooks = NF_INET_NUMHOOKS,
.owner = THIS_MODULE,
- .nops = 1,
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
- [NF_INET_LOCAL_OUT] = nft_ipv4_output,
- [NF_INET_FORWARD] = nft_do_chain_ipv4,
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
- },
};
-EXPORT_SYMBOL_GPL(nft_af_ipv4);
static int nf_tables_ipv4_init_net(struct net *net)
{
@@ -97,6 +74,13 @@ static const struct nf_chain_type filter_ipv4 = {
(1 << NF_INET_FORWARD) |
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
+ [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4,
+ [NF_INET_FORWARD] = nft_do_chain_ipv4,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
+ },
};
static int __init nf_tables_ipv4_init(void)
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index f5c66a7a4bf2..f2a490981594 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -33,7 +33,8 @@ static unsigned int nft_nat_do_chain(void *priv,
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv4(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index 30493beb611a..d965c225b9f6 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -33,12 +33,8 @@ static unsigned int nf_route_table_hook(void *priv,
const struct iphdr *iph;
int err;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
- nft_set_pktinfo_ipv4(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb);
mark = skb->mark;
iph = ip_hdr(skb);