diff options
author | David S. Miller <davem@davemloft.net> | 2018-05-23 16:37:11 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-05-23 16:37:11 -0400 |
commit | fb83eb93c6aa74e2a2a210a110069738b2648132 (patch) | |
tree | db27654a3c364ef89e5b8f472a7c2200ce635fb0 /net/netfilter | |
parent | 7c08c41f779eac856f3c8a03e178ee6f506bdcb3 (diff) | |
parent | 0c6bca747111dee19aa48c8f73d77fc85fcb8dd0 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:
====================
Netfilter updates for net-next
The following patchset contains Netfilter updates for your net-next
tree, they are:
1) Remove obsolete nf_log tracing from nf_tables, from Florian Westphal.
2) Add support for map lookups to numgen, random and hash expressions,
from Laura Garcia.
3) Allow to register nat hooks for iptables and nftables at the same
time. Patchset from Florian Westpha.
4) Timeout support for rbtree sets.
5) ip6_rpfilter works needs interface for link-local addresses, from
Vincent Bernat.
6) Add nf_ct_hook and nf_nat_hook structures and use them.
7) Do not drop packets on packets raceing to insert conntrack entries
into hashes, this is particularly a problem in nfqueue setups.
8) Address fallout from xt_osf separation to nf_osf, patches
from Florian Westphal and Fernando Mancera.
9) Remove reference to struct nft_af_info, which doesn't exist anymore.
From Taehee Yoo.
This batch comes with is a conflict between 25fd386e0bc0 ("netfilter:
core: add missing __rcu annotation") in your tree and 2c205dd3981f
("netfilter: add struct nf_nat_hook and use it") coming in this batch.
This conflict can be solved by leaving the __rcu tag on
__netfilter_net_init() - added by 25fd386e0bc0 - and remove all code
related to nf_nat_decode_session_hook - which is gone after
2c205dd3981f, as described by:
diff --cc net/netfilter/core.c
index e0ae4aae96f5,206fb2c4c319..168af54db975
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@@ -611,7 -580,13 +611,8 @@@ const struct nf_conntrack_zone nf_ct_zo
EXPORT_SYMBOL_GPL(nf_ct_zone_dflt);
#endif /* CONFIG_NF_CONNTRACK */
- static void __net_init __netfilter_net_init(struct nf_hook_entries **e, int max)
-#ifdef CONFIG_NF_NAT_NEEDED
-void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
-EXPORT_SYMBOL(nf_nat_decode_session_hook);
-#endif
-
+ static void __net_init
+ __netfilter_net_init(struct nf_hook_entries __rcu **e, int max)
{
int h;
I can also merge your net-next tree into nf-next, solve the conflict and
resend the pull request if you prefer so.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netfilter')
-rw-r--r-- | net/netfilter/Kconfig | 2 | ||||
-rw-r--r-- | net/netfilter/core.c | 102 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 91 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_netlink.c | 10 | ||||
-rw-r--r-- | net/netfilter/nf_internals.h | 5 | ||||
-rw-r--r-- | net/netfilter/nf_nat_core.c | 294 | ||||
-rw-r--r-- | net/netfilter/nf_tables_api.c | 87 | ||||
-rw-r--r-- | net/netfilter/nf_tables_core.c | 29 | ||||
-rw-r--r-- | net/netfilter/nfnetlink_queue.c | 28 | ||||
-rw-r--r-- | net/netfilter/nft_hash.c | 131 | ||||
-rw-r--r-- | net/netfilter/nft_numgen.c | 76 | ||||
-rw-r--r-- | net/netfilter/nft_set_rbtree.c | 75 |
12 files changed, 749 insertions, 181 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e57c9d479503..a5b60e6a983e 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -445,7 +445,7 @@ config NETFILTER_SYNPROXY endif # NF_CONNTRACK config NF_OSF - tristate 'Passive OS fingerprint infrastructure' + tristate config NF_TABLES select NETFILTER_NETLINK diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 206fb2c4c319..168af54db975 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -138,11 +138,6 @@ nf_hook_entries_grow(const struct nf_hook_entries *old, continue; } - if (reg->nat_hook && orig_ops[i]->nat_hook) { - kvfree(new); - return ERR_PTR(-EBUSY); - } - if (inserted || reg->priority > orig_ops[i]->priority) { new_ops[nhooks] = (void *)orig_ops[i]; new->hooks[nhooks] = old->hooks[i]; @@ -186,9 +181,31 @@ static void hooks_validate(const struct nf_hook_entries *hooks) #endif } +int nf_hook_entries_insert_raw(struct nf_hook_entries __rcu **pp, + const struct nf_hook_ops *reg) +{ + struct nf_hook_entries *new_hooks; + struct nf_hook_entries *p; + + p = rcu_dereference_raw(*pp); + new_hooks = nf_hook_entries_grow(p, reg); + if (IS_ERR(new_hooks)) + return PTR_ERR(new_hooks); + + hooks_validate(new_hooks); + + rcu_assign_pointer(*pp, new_hooks); + + BUG_ON(p == new_hooks); + nf_hook_entries_free(p); + return 0; +} +EXPORT_SYMBOL_GPL(nf_hook_entries_insert_raw); + /* * __nf_hook_entries_try_shrink - try to shrink hook array * + * @old -- current hook blob at @pp * @pp -- location of hook blob * * Hook unregistration must always succeed, so to-be-removed hooks @@ -201,14 +218,14 @@ static void hooks_validate(const struct nf_hook_entries *hooks) * * Returns address to free, or NULL. */ -static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp) +static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old, + struct nf_hook_entries __rcu **pp) { - struct nf_hook_entries *old, *new = NULL; unsigned int i, j, skip = 0, hook_entries; + struct nf_hook_entries *new = NULL; struct nf_hook_ops **orig_ops; struct nf_hook_ops **new_ops; - old = nf_entry_dereference(*pp); if (WARN_ON_ONCE(!old)) return NULL; @@ -347,11 +364,10 @@ static int __nf_register_net_hook(struct net *net, int pf, * This cannot fail, hook unregistration must always succeed. * Therefore replace the to-be-removed hook with a dummy hook. */ -static void nf_remove_net_hook(struct nf_hook_entries *old, - const struct nf_hook_ops *unreg, int pf) +static bool nf_remove_net_hook(struct nf_hook_entries *old, + const struct nf_hook_ops *unreg) { struct nf_hook_ops **orig_ops; - bool found = false; unsigned int i; orig_ops = nf_hook_entries_get_hook_ops(old); @@ -360,21 +376,10 @@ static void nf_remove_net_hook(struct nf_hook_entries *old, continue; WRITE_ONCE(old->hooks[i].hook, accept_all); WRITE_ONCE(orig_ops[i], &dummy_ops); - found = true; - break; + return true; } - if (found) { -#ifdef CONFIG_NETFILTER_INGRESS - if (pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS) - net_dec_ingress_queue(); -#endif -#ifdef HAVE_JUMP_LABEL - static_key_slow_dec(&nf_hooks_needed[pf][unreg->hooknum]); -#endif - } else { - WARN_ONCE(1, "hook not found, pf %d num %d", pf, unreg->hooknum); - } + return false; } static void __nf_unregister_net_hook(struct net *net, int pf, @@ -395,9 +400,19 @@ static void __nf_unregister_net_hook(struct net *net, int pf, return; } - nf_remove_net_hook(p, reg, pf); + if (nf_remove_net_hook(p, reg)) { +#ifdef CONFIG_NETFILTER_INGRESS + if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) + net_dec_ingress_queue(); +#endif +#ifdef HAVE_JUMP_LABEL + static_key_slow_dec(&nf_hooks_needed[pf][reg->hooknum]); +#endif + } else { + WARN_ONCE(1, "hook not found, pf %d num %d", pf, reg->hooknum); + } - p = __nf_hook_entries_try_shrink(pp); + p = __nf_hook_entries_try_shrink(p, pp); mutex_unlock(&nf_hook_mutex); if (!p) return; @@ -417,6 +432,19 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) } EXPORT_SYMBOL(nf_unregister_net_hook); +void nf_hook_entries_delete_raw(struct nf_hook_entries __rcu **pp, + const struct nf_hook_ops *reg) +{ + struct nf_hook_entries *p; + + p = rcu_dereference_raw(*pp); + if (nf_remove_net_hook(p, reg)) { + p = __nf_hook_entries_try_shrink(p, pp); + nf_hook_entries_free(p); + } +} +EXPORT_SYMBOL_GPL(nf_hook_entries_delete_raw); + int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) { int err; @@ -535,6 +563,9 @@ EXPORT_SYMBOL(skb_make_writable); struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly; EXPORT_SYMBOL_GPL(nfnl_ct_hook); +struct nf_ct_hook __rcu *nf_ct_hook __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_hook); + #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* This does not belong here, but locally generated errors need it if connection tracking in use: without this, connection may not be in hash table, and hence @@ -543,6 +574,9 @@ void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu __read_mostly; EXPORT_SYMBOL(ip_ct_attach); +struct nf_nat_hook __rcu *nf_nat_hook __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_hook); + void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb) { void (*attach)(struct sk_buff *, const struct sk_buff *); @@ -557,17 +591,14 @@ void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb) } EXPORT_SYMBOL(nf_ct_attach); -void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly; -EXPORT_SYMBOL(nf_ct_destroy); - void nf_conntrack_destroy(struct nf_conntrack *nfct) { - void (*destroy)(struct nf_conntrack *); + struct nf_ct_hook *ct_hook; rcu_read_lock(); - destroy = rcu_dereference(nf_ct_destroy); - BUG_ON(destroy == NULL); - destroy(nfct); + ct_hook = rcu_dereference(nf_ct_hook); + BUG_ON(ct_hook == NULL); + ct_hook->destroy(nfct); rcu_read_unlock(); } EXPORT_SYMBOL(nf_conntrack_destroy); @@ -580,11 +611,6 @@ const struct nf_conntrack_zone nf_ct_zone_dflt = { EXPORT_SYMBOL_GPL(nf_ct_zone_dflt); #endif /* CONFIG_NF_CONNTRACK */ -#ifdef CONFIG_NF_NAT_NEEDED -void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); -EXPORT_SYMBOL(nf_nat_decode_session_hook); -#endif - static void __net_init __netfilter_net_init(struct nf_hook_entries __rcu **e, int max) { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 605441727008..3465da2a98bd 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -58,11 +58,6 @@ #include "nf_internals.h" -int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, - enum nf_nat_manip_type manip, - const struct nlattr *attr) __read_mostly; -EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); - __cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; EXPORT_SYMBOL_GPL(nf_conntrack_locks); @@ -1612,6 +1607,82 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) nf_conntrack_get(skb_nfct(nskb)); } +static int nf_conntrack_update(struct net *net, struct sk_buff *skb) +{ + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; + struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_tuple tuple; + enum ip_conntrack_info ctinfo; + struct nf_nat_hook *nat_hook; + unsigned int dataoff, status; + struct nf_conn *ct; + u16 l3num; + u8 l4num; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || nf_ct_is_confirmed(ct)) + return 0; + + l3num = nf_ct_l3num(ct); + l3proto = nf_ct_l3proto_find_get(l3num); + + if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, + &l4num) <= 0) + return -1; + + l4proto = nf_ct_l4proto_find_get(l3num, l4num); + + if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, + l4num, net, &tuple, l3proto, l4proto)) + return -1; + + if (ct->status & IPS_SRC_NAT) { + memcpy(tuple.src.u3.all, + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.all, + sizeof(tuple.src.u3.all)); + tuple.src.u.all = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all; + } + + if (ct->status & IPS_DST_NAT) { + memcpy(tuple.dst.u3.all, + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.all, + sizeof(tuple.dst.u3.all)); + tuple.dst.u.all = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.all; + } + + h = nf_conntrack_find_get(net, nf_ct_zone(ct), &tuple); + if (!h) + return 0; + + /* Store status bits of the conntrack that is clashing to re-do NAT + * mangling according to what it has been done already to this packet. + */ + status = ct->status; + + nf_ct_put(ct); + ct = nf_ct_tuplehash_to_ctrack(h); + nf_ct_set(skb, ct, ctinfo); + + nat_hook = rcu_dereference(nf_nat_hook); + if (!nat_hook) + return 0; + + if (status & IPS_SRC_NAT && + nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_SRC, + IP_CT_DIR_ORIGINAL) == NF_DROP) + return -1; + + if (status & IPS_DST_NAT && + nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_DST, + IP_CT_DIR_ORIGINAL) == NF_DROP) + return -1; + + return 0; +} + /* Bring out ya dead! */ static struct nf_conn * get_next_corpse(int (*iter)(struct nf_conn *i, void *data), @@ -1813,8 +1884,7 @@ void nf_conntrack_cleanup_start(void) void nf_conntrack_cleanup_end(void) { - RCU_INIT_POINTER(nf_ct_destroy, NULL); - + RCU_INIT_POINTER(nf_ct_hook, NULL); cancel_delayed_work_sync(&conntrack_gc_work.dwork); nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); @@ -2131,11 +2201,16 @@ err_cachep: return ret; } +static struct nf_ct_hook nf_conntrack_hook = { + .update = nf_conntrack_update, + .destroy = destroy_conntrack, +}; + void nf_conntrack_init_end(void) { /* For use by REJECT target */ RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); - RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack); + RCU_INIT_POINTER(nf_ct_hook, &nf_conntrack_hook); } /* diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d807b8770be3..39327a42879f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1431,11 +1431,11 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr) { - typeof(nfnetlink_parse_nat_setup_hook) parse_nat_setup; + struct nf_nat_hook *nat_hook; int err; - parse_nat_setup = rcu_dereference(nfnetlink_parse_nat_setup_hook); - if (!parse_nat_setup) { + nat_hook = rcu_dereference(nf_nat_hook); + if (!nat_hook) { #ifdef CONFIG_MODULES rcu_read_unlock(); nfnl_unlock(NFNL_SUBSYS_CTNETLINK); @@ -1446,13 +1446,13 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, } nfnl_lock(NFNL_SUBSYS_CTNETLINK); rcu_read_lock(); - if (nfnetlink_parse_nat_setup_hook) + if (nat_hook->parse_nat_setup) return -EAGAIN; #endif return -EOPNOTSUPP; } - err = parse_nat_setup(ct, manip, attr); + err = nat_hook->parse_nat_setup(ct, manip, attr); if (err == -EAGAIN) { #ifdef CONFIG_MODULES rcu_read_unlock(); diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 18f6d7ae995b..e15779fd58e3 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -15,4 +15,9 @@ void nf_queue_nf_hook_drop(struct net *net); /* nf_log.c */ int __init netfilter_log_init(void); +/* core.c */ +void nf_hook_entries_delete_raw(struct nf_hook_entries __rcu **pp, + const struct nf_hook_ops *reg); +int nf_hook_entries_insert_raw(struct nf_hook_entries __rcu **pp, + const struct nf_hook_ops *reg); #endif diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 37b3c9913b08..821f8d835f7a 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -32,6 +32,8 @@ #include <net/netfilter/nf_conntrack_zones.h> #include <linux/netfilter/nf_nat.h> +#include "nf_internals.h" + static spinlock_t nf_nat_locks[CONNTRACK_LOCKS]; static DEFINE_MUTEX(nf_nat_proto_mutex); @@ -39,11 +41,27 @@ static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO] __read_mostly; static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO] __read_mostly; +static unsigned int nat_net_id __read_mostly; static struct hlist_head *nf_nat_bysource __read_mostly; static unsigned int nf_nat_htable_size __read_mostly; static unsigned int nf_nat_hash_rnd __read_mostly; +struct nf_nat_lookup_hook_priv { + struct nf_hook_entries __rcu *entries; + + struct rcu_head rcu_head; +}; + +struct nf_nat_hooks_net { + struct nf_hook_ops *nat_hook_ops; + unsigned int users; +}; + +struct nat_net { + struct nf_nat_hooks_net nat_proto_net[NFPROTO_NUMPROTO]; +}; + inline const struct nf_nat_l3proto * __nf_nat_l3proto_find(u8 family) { @@ -475,17 +493,36 @@ nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) } EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding); +static unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct, + enum nf_nat_manip_type mtype, + enum ip_conntrack_dir dir) +{ + const struct nf_nat_l3proto *l3proto; + const struct nf_nat_l4proto *l4proto; + struct nf_conntrack_tuple target; + + /* We are aiming to look like inverse of other direction. */ + nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + + l3proto = __nf_nat_l3proto_find(target.src.l3num); + l4proto = __nf_nat_l4proto_find(target.src.l3num, + target.dst.protonum); + if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype)) + return NF_DROP; + + return NF_ACCEPT; +} + /* Do packet manipulations according to nf_nat_setup_info. */ unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, struct sk_buff *skb) { - const struct nf_nat_l3proto *l3proto; - const struct nf_nat_l4proto *l4proto; + enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + unsigned int verdict = NF_ACCEPT; unsigned long statusbit; - enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); if (mtype == NF_NAT_MANIP_SRC) statusbit = IPS_SRC_NAT; @@ -497,21 +534,87 @@ unsigned int nf_nat_packet(struct nf_conn *ct, statusbit ^= IPS_NAT_MASK; /* Non-atomic: these bits don't change. */ - if (ct->status & statusbit) { - struct nf_conntrack_tuple target; + if (ct->status & statusbit) + verdict = nf_nat_manip_pkt(skb, ct, mtype, dir); + + return verdict; +} +EXPORT_SYMBOL_GPL(nf_nat_packet); + +unsigned int +nf_nat_inet_fn(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; + /* maniptype == SRC for postrouting. */ + enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook); + + ct = nf_ct_get(skb, &ctinfo); + /* Can't track? It's not due to stress, or conntrack would + * have dropped it. Hence it's the user's responsibilty to + * packet filter it out, or implement conntrack/NAT for that + * protocol. 8) --RR + */ + if (!ct) + return NF_ACCEPT; - /* We are aiming to look like inverse of other direction. */ - nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + nat = nfct_nat(ct); - l3proto = __nf_nat_l3proto_find(target.src.l3num); - l4proto = __nf_nat_l4proto_find(target.src.l3num, - target.dst.protonum); - if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype)) - return NF_DROP; + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + /* Only ICMPs can be IP_CT_IS_REPLY. Fallthrough */ + case IP_CT_NEW: + /* Seen it before? This can happen for loopback, retrans, + * or local packets. + */ + if (!nf_nat_initialized(ct, maniptype)) { + struct nf_nat_lookup_hook_priv *lpriv = priv; + struct nf_hook_entries *e = rcu_dereference(lpriv->entries); + unsigned int ret; + int i; + + if (!e) + goto null_bind; + + for (i = 0; i < e->num_hook_entries; i++) { + ret = e->hooks[i].hook(e->hooks[i].priv, skb, + state); + if (ret != NF_ACCEPT) + return ret; + if (nf_nat_initialized(ct, maniptype)) + goto do_nat; + } +null_bind: + ret = nf_nat_alloc_null_binding(ct, state->hook); + if (ret != NF_ACCEPT) + return ret; + } else { + pr_debug("Already setup manip %s for ct %p (status bits 0x%lx)\n", + maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", + ct, ct->status); + if (nf_nat_oif_changed(state->hook, ctinfo, nat, + state->out)) + goto oif_changed; + } + break; + default: + /* ESTABLISHED */ + WARN_ON(ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED_REPLY); + if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out)) + goto oif_changed; } - return NF_ACCEPT; +do_nat: + return nf_nat_packet(ct, ctinfo, state->hook, skb); + +oif_changed: + nf_ct_kill_acct(ct, ctinfo, skb); + return NF_DROP; } -EXPORT_SYMBOL_GPL(nf_nat_packet); +EXPORT_SYMBOL_GPL(nf_nat_inet_fn); struct nf_nat_proto_clean { u8 l3proto; @@ -801,6 +904,146 @@ static struct nf_ct_helper_expectfn follow_master_nat = { .expectfn = nf_nat_follow_master, }; +int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, + const struct nf_hook_ops *orig_nat_ops, unsigned int ops_count) +{ + struct nat_net *nat_net = net_generic(net, nat_net_id); + struct nf_nat_hooks_net *nat_proto_net; + struct nf_nat_lookup_hook_priv *priv; + unsigned int hooknum = ops->hooknum; + struct nf_hook_ops *nat_ops; + int i, ret; + + if (WARN_ON_ONCE(ops->pf >= ARRAY_SIZE(nat_net->nat_proto_net))) + return -EINVAL; + + nat_proto_net = &nat_net->nat_proto_net[ops->pf]; + + for (i = 0; i < ops_count; i++) { + if (WARN_ON(orig_nat_ops[i].pf != ops->pf)) + return -EINVAL; + if (orig_nat_ops[i].hooknum == hooknum) { + hooknum = i; + break; + } + } + + if (WARN_ON_ONCE(i == ops_count)) + return -EINVAL; + + mutex_lock(&nf_nat_proto_mutex); + if (!nat_proto_net->nat_hook_ops) { + WARN_ON(nat_proto_net->users != 0); + + nat_ops = kmemdup(orig_nat_ops, sizeof(*orig_nat_ops) * ops_count, GFP_KERNEL); + if (!nat_ops) { + mutex_unlock(&nf_nat_proto_mutex); + return -ENOMEM; + } + + for (i = 0; i < ops_count; i++) { + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (priv) { + nat_ops[i].priv = priv; + continue; + } + mutex_unlock(&nf_nat_proto_mutex); + while (i) + kfree(nat_ops[--i].priv); + kfree(nat_ops); + return -ENOMEM; + } + + ret = nf_register_net_hooks(net, nat_ops, ops_count); + if (ret < 0) { + mutex_unlock(&nf_nat_proto_mutex); + for (i = 0; i < ops_count; i++) + kfree(nat_ops[i].priv); + kfree(nat_ops); + return ret; + } + + nat_proto_net->nat_hook_ops = nat_ops; + } + + nat_ops = nat_proto_net->nat_hook_ops; + priv = nat_ops[hooknum].priv; + if (WARN_ON_ONCE(!priv)) { + mutex_unlock(&nf_nat_proto_mutex); + return -EOPNOTSUPP; + } + + ret = nf_hook_entries_insert_raw(&priv->entries, ops); + if (ret == 0) + nat_proto_net->users++; + + mutex_unlock(&nf_nat_proto_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_register_fn); + +void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops, + unsigned int ops_count) +{ + struct nat_net *nat_net = net_generic(net, nat_net_id); + struct nf_nat_hooks_net *nat_proto_net; + struct nf_nat_lookup_hook_priv *priv; + struct nf_hook_ops *nat_ops; + int hooknum = ops->hooknum; + int i; + + if (ops->pf >= ARRAY_SIZE(nat_net->nat_proto_net)) + return; + + nat_proto_net = &nat_net->nat_proto_net[ops->pf]; + + mutex_lock(&nf_nat_proto_mutex); + if (WARN_ON(nat_proto_net->users == 0)) + goto unlock; + + nat_proto_net->users--; + + nat_ops = nat_proto_net->nat_hook_ops; + for (i = 0; i < ops_count; i++) { + if (nat_ops[i].hooknum == hooknum) { + hooknum = i; + break; + } + } + if (WARN_ON_ONCE(i == ops_count)) + goto unlock; + priv = nat_ops[hooknum].priv; + nf_hook_entries_delete_raw(&priv->entries, ops); + + if (nat_proto_net->users == 0) { + nf_unregister_net_hooks(net, nat_ops, ops_count); + + for (i = 0; i < ops_count; i++) { + priv = nat_ops[i].priv; + kfree_rcu(priv, rcu_head); + } + + nat_proto_net->nat_hook_ops = NULL; + kfree(nat_ops); + } +unlock: + mutex_unlock(&nf_nat_proto_mutex); +} +EXPORT_SYMBOL_GPL(nf_nat_unregister_fn); + +static struct pernet_operations nat_net_ops = { + .id = &nat_net_id, + .size = sizeof(struct nat_net), +}; + +struct nf_nat_hook nat_hook = { + .parse_nat_setup = nfnetlink_parse_nat_setup, +#ifdef CONFIG_XFRM + .decode_session = __nf_nat_decode_session, +#endif + .manip_pkt = nf_nat_manip_pkt, +}; + static int __init nf_nat_init(void) { int ret, i; @@ -824,15 +1067,17 @@ static int __init nf_nat_init(void) for (i = 0; i < CONNTRACK_LOCKS; i++) spin_lock_init(&nf_nat_locks[i]); + ret = register_pernet_subsys(&nat_net_ops); + if (ret < 0) { + nf_ct_extend_unregister(&nat_extend); + return ret; + } + nf_ct_helper_expectfn_register(&follow_master_nat); - BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); - RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, - nfnetlink_parse_nat_setup); -#ifdef CONFIG_XFRM - BUG_ON(nf_nat_decode_session_hook != NULL); - RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session); -#endif + WARN_ON(nf_nat_hook != NULL); + RCU_INIT_POINTER(nf_nat_hook, &nat_hook); + return 0; } @@ -845,16 +1090,15 @@ static void __exit nf_nat_cleanup(void) nf_ct_extend_unregister(&nat_extend); nf_ct_helper_expectfn_unregister(&follow_master_nat); - RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); -#ifdef CONFIG_XFRM - RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL); -#endif + RCU_INIT_POINTER(nf_nat_hook, NULL); + synchronize_rcu(); for (i = 0; i < NFPROTO_NUMPROTO; i++) kfree(nf_nat_l4protos[i]); synchronize_net(); nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); + unregister_pernet_subsys(&nat_net_ops); } MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a5f3743fda65..87b2a77add65 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -74,88 +74,43 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } -/* removal requests are queued in the commit_list, but not acted upon - * until after all new rules are in place. - * - * Therefore, nf_register_net_hook(net, &nat_hook) runs before pending - * nf_unregister_net_hook(). - * - * nf_register_net_hook thus fails if a nat hook is already in place - * even if the conflicting hook is about to be removed. - * - * If collision is detected, search commit_log for DELCHAIN matching - * the new nat hooknum; if we find one collision is temporary: - * - * Either transaction is aborted (new/colliding hook is removed), or - * transaction is committed (old hook is removed). - */ -static bool nf_tables_allow_nat_conflict(const struct net *net, - const struct nf_hook_ops *ops) -{ - const struct nft_trans *trans; - bool ret = false; - - if (!ops->nat_hook) - return false; - - list_for_each_entry(trans, &net->nft.commit_list, list) { - const struct nf_hook_ops *pending_ops; - const struct nft_chain *pending; - - if (trans->msg_type != NFT_MSG_NEWCHAIN && - trans->msg_type != NFT_MSG_DELCHAIN) - continue; - - pending = trans->ctx.chain; - if (!nft_is_base_chain(pending)) - continue; - - pending_ops = &nft_base_chain(pending)->ops; - if (pending_ops->nat_hook && - pending_ops->pf == ops->pf && - pending_ops->hooknum == ops->hooknum) { - /* other hook registration already pending? */ - if (trans->msg_type == NFT_MSG_NEWCHAIN) - return false; - - ret = true; - } - } - - return ret; -} - static int nf_tables_register_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) { - struct nf_hook_ops *ops; - int ret; + const struct nft_base_chain *basechain; + const struct nf_hook_ops *ops; if (table->flags & NFT_TABLE_F_DORMANT || !nft_is_base_chain(chain)) return 0; - ops = &nft_base_chain(chain)->ops; - ret = nf_register_net_hook(net, ops); - if (ret == -EBUSY && nf_tables_allow_nat_conflict(net, ops)) { - ops->nat_hook = false; - ret = nf_register_net_hook(net, ops); - ops->nat_hook = true; - } + basechain = nft_base_chain(chain); + ops = &basechain->ops; - return ret; + if (basechain->type->ops_register) + return basechain->type->ops_register(net, ops); + + return nf_register_net_hook(net, ops); } static void nf_tables_unregister_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) { + const struct nft_base_chain *basechain; + const struct nf_hook_ops *ops; + if (table->flags & NFT_TABLE_F_DORMANT || !nft_is_base_chain(chain)) return; + basechain = nft_base_chain(chain); + ops = &basechain->ops; - nf_unregister_net_hook(net, &nft_base_chain(chain)->ops); + if (basechain->type->ops_unregister) + return basechain->type->ops_unregister(net, ops); + + nf_unregister_net_hook(net, ops); } static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) @@ -1291,8 +1246,6 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx) if (nft_is_base_chain(chain)) { struct nft_base_chain *basechain = nft_base_chain(chain); - if (basechain->type->free) - basechain->type->free(ctx); module_put(basechain->type->owner); free_percpu(basechain->stats); if (basechain->stats) @@ -1425,9 +1378,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, } basechain->type = hook.type; - if (basechain->type->init) - basechain->type->init(ctx); - chain = &basechain->chain; ops = &basechain->ops; @@ -1438,9 +1388,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, ops->hook = hook.type->hooks[ops->hooknum]; ops->dev = hook.dev; - if (basechain->type->type == NFT_CHAIN_T_NAT) - ops->nat_hook = true; - chain->flags |= NFT_BASE_CHAIN; basechain->policy = policy; } else { diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index ebb9799350ed..4f46d2f4e167 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -41,7 +41,7 @@ static const struct nf_loginfo trace_loginfo = { static noinline void __nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, - int rulenum, enum nft_trace_types type) + enum nft_trace_types type) { const struct nft_pktinfo *pkt = info->pkt; @@ -52,22 +52,16 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info, info->type = type; nft_trace_notify(info); - - nf_log_trace(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb, - nft_in(pkt), nft_out(pkt), &trace_loginfo, - "TRACE: %s:%s:%s:%u ", - chain->table->name, chain->name, comments[type], rulenum); } static inline void nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, const struct nft_rule *rule, - int rulenum, enum nft_trace_types type) { if (static_branch_unlikely(&nft_trace_enabled)) { info->rule = rule; - __nft_trace_packet(info, chain, rulenum, type); + __nft_trace_packet(info, chain, type); } } @@ -140,7 +134,6 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain, struct nft_jumpstack { const struct nft_chain *chain; const struct nft_rule *rule; - int rulenum; }; unsigned int @@ -153,7 +146,6 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) struct nft_regs regs; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; - int rulenum; unsigned int gencursor = nft_genmask_cur(net); struct nft_traceinfo info; @@ -161,7 +153,6 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) if (static_branch_unlikely(&nft_trace_enabled)) nft_trace_init(&info, pkt, ®s.verdict, basechain); do_chain: - rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); next_rule: regs.verdict.code = NFT_CONTINUE; @@ -171,8 +162,6 @@ next_rule: if (unlikely(rule->genmask & gencursor)) continue; - rulenum++; - nft_rule_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, ®s); @@ -190,7 +179,7 @@ next_rule: continue; case NFT_CONTINUE: nft_trace_packet(&info, chain, rule, - rulenum, NFT_TRACETYPE_RULE); + NFT_TRACETYPE_RULE); continue; } break; @@ -202,7 +191,7 @@ next_rule: case NF_QUEUE: case NF_STOLEN: nft_trace_packet(&info, chain, rule, - rulenum, NFT_TRACETYPE_RULE); + NFT_TRACETYPE_RULE); return regs.verdict.code; } @@ -211,21 +200,19 @@ next_rule: BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); jumpstack[stackptr].chain = chain; jumpstack[stackptr].rule = rule; - jumpstack[stackptr].rulenum = rulenum; stackptr++; /* fall through */ case NFT_GOTO: nft_trace_packet(&info, chain, rule, - rulenum, NFT_TRACETYPE_RULE); + NFT_TRACETYPE_RULE); chain = regs.verdict.chain; goto do_chain; case NFT_CONTINUE: - rulenum++; /* fall through */ case NFT_RETURN: nft_trace_packet(&info, chain, rule, - rulenum, NFT_TRACETYPE_RETURN); + NFT_TRACETYPE_RETURN); break; default: WARN_ON(1); @@ -235,12 +222,10 @@ next_rule: stackptr--; chain = jumpstack[stackptr].chain; rule = jumpstack[stackptr].rule; - rulenum = jumpstack[stackptr].rulenum; goto next_rule; } - nft_trace_packet(&info, basechain, NULL, -1, - NFT_TRACETYPE_POLICY); + nft_trace_packet(&info, basechain, NULL, NFT_TRACETYPE_POLICY); if (static_branch_unlikely(&nft_counters_enabled)) nft_update_chain_stats(basechain, pkt); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 74a04638ef03..2c173042ac0e 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -227,6 +227,25 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) return entry; } +static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict) +{ + struct nf_ct_hook *ct_hook; + int err; + + if (verdict == NF_ACCEPT || + verdict == NF_STOP) { + rcu_read_lock(); + ct_hook = rcu_dereference(nf_ct_hook); + if (ct_hook) { + err = ct_hook->update(entry->state.net, entry->skb); + if (err < 0) + verdict = NF_DROP; + } + rcu_read_unlock(); + } + nf_reinject(entry, verdict); +} + static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) { @@ -237,7 +256,7 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) if (!cmpfn || cmpfn(entry, data)) { list_del(&entry->list); queue->queue_total--; - nf_reinject(entry, NF_DROP); + nfqnl_reinject(entry, NF_DROP); } } spin_unlock_bh(&queue->lock); @@ -686,7 +705,7 @@ err_out_free_nskb: err_out_unlock: spin_unlock_bh(&queue->lock); if (failopen) - nf_reinject(entry, NF_ACCEPT); + nfqnl_reinject(entry, NF_ACCEPT); err_out: return err; } @@ -1085,7 +1104,8 @@ static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl, list_for_each_entry_safe(entry, tmp, &batch_list, list) { if (nfqa[NFQA_MARK]) entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); - nf_reinject(entry, verdict); + + nfqnl_reinject(entry, verdict); } return 0; } @@ -1208,7 +1228,7 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, if (nfqa[NFQA_MARK]) entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); - nf_reinject(entry, verdict); + nfqnl_reinject(entry, verdict); return 0; } diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index e235c17f1b8b..f0fc21f88775 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -25,6 +25,7 @@ struct nft_jhash { u32 modulus; u32 seed; u32 offset; + struct nft_set *map; }; static void nft_jhash_eval(const struct nft_expr *expr, @@ -35,14 +36,39 @@ static void nft_jhash_eval(const struct nft_expr *expr, const void *data = ®s->data[priv->sreg]; u32 h; - h = reciprocal_scale(jhash(data, priv->len, priv->seed), priv->modulus); + h = reciprocal_scale(jhash(data, priv->len, priv->seed), + priv->modulus); + regs->data[priv->dreg] = h + priv->offset; } +static void nft_jhash_map_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_jhash *priv = nft_expr_priv(expr); + const void *data = ®s->data[priv->sreg]; + const struct nft_set *map = priv->map; + const struct nft_set_ext *ext; + u32 result; + bool found; + + result = reciprocal_scale(jhash(data, priv->len, priv->seed), + priv->modulus) + priv->offset; + + found = map->ops->lookup(nft_net(pkt), map, &result, &ext); + if (!found) + return; + + nft_data_copy(®s->data[priv->dreg], + nft_set_ext_data(ext), map->dlen); +} + struct nft_symhash { enum nft_registers dreg:8; u32 modulus; u32 offset; + struct nft_set *map; }; static void nft_symhash_eval(const struct nft_expr *expr, @@ -58,6 +84,28 @@ static void nft_symhash_eval(const struct nft_expr *expr, regs->data[priv->dreg] = h + priv->offset; } +static void nft_symhash_map_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_symhash *priv = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + const struct nft_set *map = priv->map; + const struct nft_set_ext *ext; + u32 result; + bool found; + + result = reciprocal_scale(__skb_get_hash_symmetric(skb), + priv->modulus) + priv->offset; + + found = map->ops->lookup(nft_net(pkt), map, &result, &ext); + if (!found) + return; + + nft_data_copy(®s->data[priv->dreg], + nft_set_ext_data(ext), map->dlen); +} + static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_SREG] = { .type = NLA_U32 }, [NFTA_HASH_DREG] = { .type = NLA_U32 }, @@ -66,6 +114,9 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_SEED] = { .type = NLA_U32 }, [NFTA_HASH_OFFSET] = { .type = NLA_U32 }, [NFTA_HASH_TYPE] = { .type = NLA_U32 }, + [NFTA_HASH_SET_NAME] = { .type = NLA_STRING, + .len = NFT_SET_MAXNAMELEN - 1 }, + [NFTA_HASH_SET_ID] = { .type = NLA_U32 }, }; static int nft_jhash_init(const struct nft_ctx *ctx, @@ -115,6 +166,23 @@ static int nft_jhash_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, sizeof(u32)); } +static int nft_jhash_map_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_jhash *priv = nft_expr_priv(expr); + u8 genmask = nft_genmask_next(ctx->net); + + nft_jhash_init(ctx, expr, tb); + priv->map = nft_set_lookup_global(ctx->net, ctx->table, + tb[NFTA_HASH_SET_NAME], + tb[NFTA_HASH_SET_ID], genmask); + if (IS_ERR(priv->map)) + return PTR_ERR(priv->map); + + return 0; +} + static int nft_symhash_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -141,6 +209,23 @@ static int nft_symhash_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, sizeof(u32)); } +static int nft_symhash_map_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_jhash *priv = nft_expr_priv(expr); + u8 genmask = nft_genmask_next(ctx->net); + + nft_symhash_init(ctx, expr, tb); + priv->map = nft_set_lookup_global(ctx->net, ctx->table, + tb[NFTA_HASH_SET_NAME], + tb[NFTA_HASH_SET_ID], genmask); + if (IS_ERR(priv->map)) + return PTR_ERR(priv->map); + + return 0; +} + static int nft_jhash_dump(struct sk_buff *skb, const struct nft_expr *expr) { @@ -168,6 +253,18 @@ nla_put_failure: return -1; } +static int nft_jhash_map_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_jhash *priv = nft_expr_priv(expr); + + if (nft_jhash_dump(skb, expr) || + nla_put_string(skb, NFTA_HASH_SET_NAME, priv->map->name)) + return -1; + + return 0; +} + static int nft_symhash_dump(struct sk_buff *skb, const struct nft_expr *expr) { @@ -188,6 +285,18 @@ nla_put_failure: return -1; } +static int nft_symhash_map_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_symhash *priv = nft_expr_priv(expr); + + if (nft_symhash_dump(skb, expr) || + nla_put_string(skb, NFTA_HASH_SET_NAME, priv->map->name)) + return -1; + + return 0; +} + static struct nft_expr_type nft_hash_type; static const struct nft_expr_ops nft_jhash_ops = { .type = &nft_hash_type, @@ -197,6 +306,14 @@ static const struct nft_expr_ops nft_jhash_ops = { .dump = nft_jhash_dump, }; +static const struct nft_expr_ops nft_jhash_map_ops = { + .type = &nft_hash_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_jhash)), + .eval = nft_jhash_map_eval, + .init = nft_jhash_map_init, + .dump = nft_jhash_map_dump, +}; + static const struct nft_expr_ops nft_symhash_ops = { .type = &nft_hash_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_symhash)), @@ -205,6 +322,14 @@ static const struct nft_expr_ops nft_symhash_ops = { .dump = nft_symhash_dump, }; +static const struct nft_expr_ops nft_symhash_map_ops = { + .type = &nft_hash_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_symhash)), + .eval = nft_symhash_map_eval, + .init = nft_symhash_map_init, + .dump = nft_symhash_map_dump, +}; + static const struct nft_expr_ops * nft_hash_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -217,8 +342,12 @@ nft_hash_select_ops(const struct nft_ctx *ctx, type = ntohl(nla_get_be32(tb[NFTA_HASH_TYPE])); switch (type) { case NFT_HASH_SYM: + if (tb[NFTA_HASH_SET_NAME]) + return &nft_symhash_map_ops; return &nft_symhash_ops; case NFT_HASH_JENKINS: + if (tb[NFTA_HASH_SET_NAME]) + return &nft_jhash_map_ops; return &nft_jhash_ops; default: break; diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 8a64db8f2e69..cdbc62a53933 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -166,18 +166,43 @@ struct nft_ng_random { enum nft_registers dreg:8; u32 modulus; u32 offset; + struct nft_set *map; }; +static u32 nft_ng_random_gen(struct nft_ng_random *priv) +{ + struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state); + + return reciprocal_scale(prandom_u32_state(state), priv->modulus) + + priv->offset; +} + static void nft_ng_random_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_ng_random *priv = nft_expr_priv(expr); - struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state); - u32 val; - val = reciprocal_scale(prandom_u32_state(state), priv->modulus); - regs->data[priv->dreg] = val + priv->offset; + regs->data[priv->dreg] = nft_ng_random_gen(priv); +} + +static void nft_ng_random_map_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_ng_random *priv = nft_expr_priv(expr); + const struct nft_set *map = priv->map; + const struct nft_set_ext *ext; + u32 result; + bool found; + + result = nft_ng_random_gen(priv); + found = map->ops->lookup(nft_net(pkt), map, &result, &ext); + if (!found) + return; + + nft_data_copy(®s->data[priv->dreg], + nft_set_ext_data(ext), map->dlen); } static int nft_ng_random_init(const struct nft_ctx *ctx, @@ -204,6 +229,23 @@ static int nft_ng_random_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, sizeof(u32)); } +static int nft_ng_random_map_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_ng_random *priv = nft_expr_priv(expr); + u8 genmask = nft_genmask_next(ctx->net); + + nft_ng_random_init(ctx, expr, tb); + priv->map = nft_set_lookup_global(ctx->net, ctx->table, + tb[NFTA_NG_SET_NAME], + tb[NFTA_NG_SET_ID], genmask); + if (IS_ERR(priv->map)) + return PTR_ERR(priv->map); + + return 0; +} + static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ng_random *priv = nft_expr_priv(expr); @@ -212,6 +254,22 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) priv->offset); } +static int nft_ng_random_map_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_ng_random *priv = nft_expr_priv(expr); + + if (nft_ng_dump(skb, priv->dreg, priv->modulus, + NFT_NG_RANDOM, priv->offset) || + nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + static struct nft_expr_type nft_ng_type; static const struct nft_expr_ops nft_ng_inc_ops = { .type = &nft_ng_type, @@ -237,6 +295,14 @@ static const struct nft_expr_ops nft_ng_random_ops = { .dump = nft_ng_random_dump, }; +static const struct nft_expr_ops nft_ng_random_map_ops = { + .type = &nft_ng_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)), + .eval = nft_ng_random_map_eval, + .init = nft_ng_random_map_init, + .dump = nft_ng_random_map_dump, +}; + static const struct nft_expr_ops * nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { @@ -255,6 +321,8 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) return &nft_ng_inc_map_ops; return &nft_ng_inc_ops; case NFT_NG_RANDOM: + if (tb[NFTA_NG_SET_NAME]) + return &nft_ng_random_map_ops; return &nft_ng_random_ops; } diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 22c57d7612c4..d260ce2d6671 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -22,6 +22,7 @@ struct nft_rbtree { struct rb_root root; rwlock_t lock; seqcount_t count; + struct delayed_work gc_work; }; struct nft_rbtree_elem { @@ -265,6 +266,7 @@ static void nft_rbtree_activate(const struct net *net, struct nft_rbtree_elem *rbe = elem->priv; nft_set_elem_change_active(net, set, &rbe->ext); + nft_set_elem_clear_busy(&rbe->ext); } static bool nft_rbtree_flush(const struct net *net, @@ -272,8 +274,12 @@ static bool nft_rbtree_flush(const struct net *net, { struct nft_rbtree_elem *rbe = priv; - nft_set_elem_change_active(net, set, &rbe->ext); - return true; + if (!nft_set_elem_mark_busy(&rbe->ext) || + !nft_is_active(net, &rbe->ext)) { + nft_set_elem_change_active(net, set, &rbe->ext); + return true; + } + return false; } static void *nft_rbtree_deactivate(const struct net *net, @@ -347,6 +353,62 @@ cont: read_unlock_bh(&priv->lock); } +static void nft_rbtree_gc(struct work_struct *work) +{ + struct nft_set_gc_batch *gcb = NULL; + struct rb_node *node, *prev = NULL; + struct nft_rbtree_elem *rbe; + struct nft_rbtree *priv; + struct nft_set *set; + int i; + + priv = container_of(work, struct nft_rbtree, gc_work.work); + set = nft_set_container_of(priv); + + write_lock_bh(&priv->lock); + write_seqcount_begin(&priv->count); + for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + rbe = rb_entry(node, struct nft_rbtree_elem, node); + + if (nft_rbtree_interval_end(rbe)) { + prev = node; + continue; + } + if (!nft_set_elem_expired(&rbe->ext)) + continue; + if (nft_set_elem_mark_busy(&rbe->ext)) + continue; + + gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); + if (!gcb) + goto out; + + atomic_dec(&set->nelems); + nft_set_gc_batch_add(gcb, rbe); + + if (prev) { + rbe = rb_entry(prev, struct nft_rbtree_elem, node); + atomic_dec(&set->nelems); + nft_set_gc_batch_add(gcb, rbe); + } + node = rb_next(node); + } +out: + if (gcb) { + for (i = 0; i < gcb->head.cnt; i++) { + rbe = gcb->elems[i]; + rb_erase(&rbe->node, &priv->root); + } + } + write_seqcount_end(&priv->count); + write_unlock_bh(&priv->lock); + + nft_set_gc_batch_complete(gcb); + + queue_delayed_work(system_power_efficient_wq, &priv->gc_work, + nft_set_gc_interval(set)); +} + static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[], const struct nft_set_desc *desc) { @@ -362,6 +424,12 @@ static int nft_rbtree_init(const struct nft_set *set, rwlock_init(&priv->lock); seqcount_init(&priv->count); priv->root = RB_ROOT; + + INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc); + if (set->flags & NFT_SET_TIMEOUT) + queue_delayed_work(system_power_efficient_wq, &priv->gc_work, + nft_set_gc_interval(set)); + return 0; } @@ -371,6 +439,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) struct nft_rbtree_elem *rbe; struct rb_node *node; + cancel_delayed_work_sync(&priv->gc_work); while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); @@ -395,7 +464,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, static struct nft_set_type nft_rbtree_type __read_mostly = { .owner = THIS_MODULE, - .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT, + .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, .ops = { .privsize = nft_rbtree_privsize, .elemsize = offsetof(struct nft_rbtree_elem, ext), |