From b21613aeb649293946556410ff10dbb8c639f298 Mon Sep 17 00:00:00 2001 From: "holger@eitzenberger.org" Date: Fri, 20 Sep 2013 22:43:04 +0200 Subject: netfilter: nf_ct_sip: extend RCU read lock in set_expected_rtp_rtcp() Currently set_expected_rtp_rtcp() in the SIP helper uses rcu_dereference() two times to access two different NAT hook functions. However, only the first one is protected by the RCU reader lock, but the 2nd isn't. Fix it by extending the RCU protected area. This is more a cosmetic thing since we rely on all netfilter hooks being rcu_read_lock()ed by nf_hook_slow() in many places anyways, as Patrick McHardy clarified. Signed-off-by: Holger Eitzenberger Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index e0c4373b4747..5ed8c441dffd 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -966,7 +966,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, #endif skip_expect = 1; } while (!skip_expect); - rcu_read_unlock(); base_port = ntohs(tuple.dst.u.udp.port) & ~1; rtp_port = htons(base_port); @@ -980,8 +979,10 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, goto err1; } - if (skip_expect) + if (skip_expect) { + rcu_read_unlock(); return NF_ACCEPT; + } rtp_exp = nf_ct_expect_alloc(ct); if (rtp_exp == NULL) @@ -1012,6 +1013,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, err2: nf_ct_expect_put(rtp_exp); err1: + rcu_read_unlock(); return ret; } -- cgit v1.2.3 From de1389b11686f436c81d696b5a33eec2bc975665 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 26 Sep 2013 15:00:30 +0800 Subject: netfilter: xt_TCPMSS: Get mtu only if clamp-mss-to-pmtu is specified This patch refactors the code to skip tcpmss_reverse_mtu if no clamp-mss-to-pmtu is specified. Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_TCPMSS.c | 70 ++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 34 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index cd24290f3b2f..62776de8293c 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -43,10 +43,41 @@ optlen(const u_int8_t *opt, unsigned int offset) return opt[offset+1]; } +static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, + unsigned int family) +{ + struct flowi fl; + const struct nf_afinfo *ai; + struct rtable *rt = NULL; + u_int32_t mtu = ~0U; + + if (family == PF_INET) { + struct flowi4 *fl4 = &fl.u.ip4; + memset(fl4, 0, sizeof(*fl4)); + fl4->daddr = ip_hdr(skb)->saddr; + } else { + struct flowi6 *fl6 = &fl.u.ip6; + + memset(fl6, 0, sizeof(*fl6)); + fl6->daddr = ipv6_hdr(skb)->saddr; + } + rcu_read_lock(); + ai = nf_get_afinfo(family); + if (ai != NULL) + ai->route(&init_net, (struct dst_entry **)&rt, &fl, false); + rcu_read_unlock(); + + if (rt != NULL) { + mtu = dst_mtu(&rt->dst); + dst_release(&rt->dst); + } + return mtu; +} + static int tcpmss_mangle_packet(struct sk_buff *skb, const struct xt_action_param *par, - unsigned int in_mtu, + unsigned int family, unsigned int tcphoff, unsigned int minlen) { @@ -76,6 +107,8 @@ tcpmss_mangle_packet(struct sk_buff *skb, return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { + unsigned int in_mtu = tcpmss_reverse_mtu(skb, family); + if (dst_mtu(skb_dst(skb)) <= minlen) { net_err_ratelimited("unknown or invalid path-MTU (%u)\n", dst_mtu(skb_dst(skb))); @@ -165,37 +198,6 @@ tcpmss_mangle_packet(struct sk_buff *skb, return TCPOLEN_MSS; } -static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, - unsigned int family) -{ - struct flowi fl; - const struct nf_afinfo *ai; - struct rtable *rt = NULL; - u_int32_t mtu = ~0U; - - if (family == PF_INET) { - struct flowi4 *fl4 = &fl.u.ip4; - memset(fl4, 0, sizeof(*fl4)); - fl4->daddr = ip_hdr(skb)->saddr; - } else { - struct flowi6 *fl6 = &fl.u.ip6; - - memset(fl6, 0, sizeof(*fl6)); - fl6->daddr = ipv6_hdr(skb)->saddr; - } - rcu_read_lock(); - ai = nf_get_afinfo(family); - if (ai != NULL) - ai->route(&init_net, (struct dst_entry **)&rt, &fl, false); - rcu_read_unlock(); - - if (rt != NULL) { - mtu = dst_mtu(&rt->dst); - dst_release(&rt->dst); - } - return mtu; -} - static unsigned int tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par) { @@ -204,7 +206,7 @@ tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par) int ret; ret = tcpmss_mangle_packet(skb, par, - tcpmss_reverse_mtu(skb, PF_INET), + PF_INET, iph->ihl * 4, sizeof(*iph) + sizeof(struct tcphdr)); if (ret < 0) @@ -233,7 +235,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par) if (tcphoff < 0) return NF_DROP; ret = tcpmss_mangle_packet(skb, par, - tcpmss_reverse_mtu(skb, PF_INET6), + PF_INET6, tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); if (ret < 0) -- cgit v1.2.3 From 7722e0d1c076d9610f00d79bde8af977157aa23b Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 26 Sep 2013 15:00:31 +0800 Subject: netfilter: xt_TCPMSS: lookup route from proper net namespace Otherwise the pmtu will be incorrect. Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_TCPMSS.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 62776de8293c..e762de5ee89b 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -43,7 +43,8 @@ optlen(const u_int8_t *opt, unsigned int offset) return opt[offset+1]; } -static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, +static u_int32_t tcpmss_reverse_mtu(struct net *net, + const struct sk_buff *skb, unsigned int family) { struct flowi fl; @@ -64,7 +65,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, rcu_read_lock(); ai = nf_get_afinfo(family); if (ai != NULL) - ai->route(&init_net, (struct dst_entry **)&rt, &fl, false); + ai->route(net, (struct dst_entry **)&rt, &fl, false); rcu_read_unlock(); if (rt != NULL) { @@ -107,7 +108,8 @@ tcpmss_mangle_packet(struct sk_buff *skb, return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { - unsigned int in_mtu = tcpmss_reverse_mtu(skb, family); + struct net *net = dev_net(par->in ? par->in : par->out); + unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); if (dst_mtu(skb_dst(skb)) <= minlen) { net_err_ratelimited("unknown or invalid path-MTU (%u)\n", -- cgit v1.2.3 From 122ebbf24c33ac13a9243248c9b8da0fa4c012bd Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 27 Apr 2013 21:02:59 +0200 Subject: netfilter: ipset: Don't call ip_nest_end needlessly in the error path Suggested-by: Pablo Neira Ayuso Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_gen.h | 2 +- net/netfilter/ipset/ip_set_hash_gen.h | 2 +- net/netfilter/ipset/ip_set_list_set.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 25243379b887..f6af97cf8d3e 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -228,11 +228,11 @@ mtype_list(const struct ip_set *set, nla_put_failure: nla_nest_cancel(skb, nested); - ipset_nest_end(skb, adt); if (unlikely(id == first)) { cb->args[2] = 0; return -EMSGSIZE; } + ipset_nest_end(skb, adt); return 0; } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 707bc520d629..7ff20ecbe185 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -909,13 +909,13 @@ mtype_list(const struct ip_set *set, nla_put_failure: nlmsg_trim(skb, incomplete); - ipset_nest_end(skb, atd); if (unlikely(first == cb->args[2])) { pr_warning("Can't list set %s: one bucket does not fit into " "a message. Please report it!\n", set->name); cb->args[2] = 0; return -EMSGSIZE; } + ipset_nest_end(skb, atd); return 0; } diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 979b8c90e422..68299ee15847 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -550,11 +550,11 @@ finish: nla_put_failure: nla_nest_cancel(skb, nested); - ipset_nest_end(skb, atd); if (unlikely(i == first)) { cb->args[2] = 0; return -EMSGSIZE; } + ipset_nest_end(skb, atd); return 0; } -- cgit v1.2.3 From b3aabd149c201a7c2a5f9ee673376948ae0724d8 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 30 Apr 2013 18:40:43 +0200 Subject: netfilter: ipset: Sparse warning about shadowed variable fixed net/netfilter/ipset/ip_set_hash_ipportnet.c:275:20: warning: symbol 'cidr' shadows an earlier one Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_ipportnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index f15f3e28b9c3..9a80d8bc9f18 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -272,7 +272,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (ip > ip_to) swap(ip, ip_to); } else if (tb[IPSET_ATTR_CIDR]) { - u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; -- cgit v1.2.3 From a0f28dc754bb6e78158eb41bf729342f7c2bcd70 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 30 Apr 2013 21:23:18 +0200 Subject: netfilter: ipset: Fix sparse warnings due to missing rcu annotations Reported-by: Pablo Neira Ayuso Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_gen.h | 87 ++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 32 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 7ff20ecbe185..09a21dd5f120 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -15,6 +15,8 @@ #define rcu_dereference_bh(p) rcu_dereference(p) #endif +#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1) + #define CONCAT(a, b) a##b #define TOKEN(a, b) CONCAT(a, b) @@ -269,7 +271,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) /* The generic hash structure */ struct htype { - struct htable *table; /* the hash table */ + struct htable __rcu *table; /* the hash table */ u32 maxelem; /* max elements in the hash */ u32 elements; /* current element (vs timeout) */ u32 initval; /* random jhash init value */ @@ -347,10 +349,10 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length) /* Calculate the actual memory size of the set data */ static size_t -mtype_ahash_memsize(const struct htype *h, u8 nets_length) +mtype_ahash_memsize(const struct htype *h, const struct htable *t, + u8 nets_length) { u32 i; - struct htable *t = h->table; size_t memsize = sizeof(*h) + sizeof(*t) #ifdef IP_SET_HASH_WITH_NETS @@ -369,10 +371,11 @@ static void mtype_flush(struct ip_set *set) { struct htype *h = set->data; - struct htable *t = h->table; + struct htable *t; struct hbucket *n; u32 i; + t = rcu_dereference_bh_nfnl(h->table); for (i = 0; i < jhash_size(t->htable_bits); i++) { n = hbucket(t, i); if (n->size) { @@ -397,7 +400,7 @@ mtype_destroy(struct ip_set *set) if (set->extensions & IPSET_EXT_TIMEOUT) del_timer_sync(&h->gc); - ahash_destroy(h->table); + ahash_destroy(rcu_dereference_bh_nfnl(h->table)); #ifdef IP_SET_HASH_WITH_RBTREE rbtree_destroy(&h->rbtree); #endif @@ -443,12 +446,14 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) static void mtype_expire(struct htype *h, u8 nets_length, size_t dsize) { - struct htable *t = h->table; + struct htable *t; struct hbucket *n; struct mtype_elem *data; u32 i; int j; + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); for (i = 0; i < jhash_size(t->htable_bits); i++) { n = hbucket(t, i); for (j = 0; j < n->pos; j++) { @@ -481,6 +486,7 @@ mtype_expire(struct htype *h, u8 nets_length, size_t dsize) n->value = tmp; } } + rcu_read_unlock_bh(); } static void @@ -505,7 +511,7 @@ static int mtype_resize(struct ip_set *set, bool retried) { struct htype *h = set->data; - struct htable *t, *orig = h->table; + struct htable *t, *orig = rcu_dereference_bh_nfnl(h->table); u8 htable_bits = orig->htable_bits; #ifdef IP_SET_HASH_WITH_NETS u8 flags; @@ -682,13 +688,15 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { struct htype *h = set->data; - struct htable *t = h->table; + struct htable *t; const struct mtype_elem *d = value; struct mtype_elem *data; struct hbucket *n; - int i; + int i, ret = -IPSET_ERR_EXIST; u32 key, multi = 0; + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); key = HKEY(value, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { @@ -697,7 +705,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, continue; if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(data, h))) - return -IPSET_ERR_EXIST; + goto out; if (i != n->pos - 1) /* Not last one */ memcpy(data, ahash_data(n, n->pos - 1, h->dsize), @@ -712,17 +720,22 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) * h->dsize, GFP_ATOMIC); - if (!tmp) - return 0; + if (!tmp) { + ret = 0; + goto out; + } n->size -= AHASH_INIT_SIZE; memcpy(tmp, n->value, n->size * h->dsize); kfree(n->value); n->value = tmp; } - return 0; + ret = 0; + goto out; } - return -IPSET_ERR_EXIST; +out: + rcu_read_unlock_bh(); + return ret; } static inline int @@ -745,7 +758,7 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, struct ip_set_ext *mext, u32 flags) { struct htype *h = set->data; - struct htable *t = h->table; + struct htable *t = rcu_dereference_bh(h->table); struct hbucket *n; struct mtype_elem *data; int i, j = 0; @@ -785,18 +798,22 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { struct htype *h = set->data; - struct htable *t = h->table; + struct htable *t; struct mtype_elem *d = value; struct hbucket *n; struct mtype_elem *data; - int i; + int i, ret = 0; u32 key, multi = 0; + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); #ifdef IP_SET_HASH_WITH_NETS /* If we test an IP address and not a network address, * try all possible network sizes */ - if (CIDR(d->cidr) == SET_HOST_MASK(set->family)) - return mtype_test_cidrs(set, d, ext, mext, flags); + if (CIDR(d->cidr) == SET_HOST_MASK(set->family)) { + ret = mtype_test_cidrs(set, d, ext, mext, flags); + goto out; + } #endif key = HKEY(d, h->initval, t->htable_bits); @@ -805,10 +822,14 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, data = ahash_data(n, i, h->dsize); if (mtype_data_equal(data, d, &multi) && !(SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)))) - return mtype_data_match(data, ext, mext, set, flags); + ip_set_timeout_expired(ext_timeout(data, h)))) { + ret = mtype_data_match(data, ext, mext, set, flags); + goto out; + } } - return 0; +out: + rcu_read_unlock_bh(); + return ret; } /* Reply a HEADER request: fill out the header part of the set */ @@ -816,18 +837,18 @@ static int mtype_head(struct ip_set *set, struct sk_buff *skb) { const struct htype *h = set->data; + const struct htable *t; struct nlattr *nested; size_t memsize; - read_lock_bh(&set->lock); - memsize = mtype_ahash_memsize(h, NETS_LENGTH(set->family)); - read_unlock_bh(&set->lock); + t = rcu_dereference_bh_nfnl(h->table); + memsize = mtype_ahash_memsize(h, t, NETS_LENGTH(set->family)); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE, - htonl(jhash_size(h->table->htable_bits))) || + htonl(jhash_size(t->htable_bits))) || nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem))) goto nla_put_failure; #ifdef IP_SET_HASH_WITH_NETMASK @@ -856,7 +877,7 @@ mtype_list(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb) { const struct htype *h = set->data; - const struct htable *t = h->table; + const struct htable *t = rcu_dereference_bh_nfnl(h->table); struct nlattr *atd, *nested; const struct hbucket *n; const struct mtype_elem *e; @@ -956,6 +977,7 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) #endif size_t hsize; struct HTYPE *h; + struct htable *t; if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) return -IPSET_ERR_INVALID_FAMILY; @@ -1013,12 +1035,13 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) kfree(h); return -ENOMEM; } - h->table = ip_set_alloc(hsize); - if (!h->table) { + t = ip_set_alloc(hsize); + if (!t) { kfree(h); return -ENOMEM; } - h->table->htable_bits = hbits; + t->htable_bits = hbits; + rcu_assign_pointer(h->table, t); set->data = h; if (set->family == NFPROTO_IPV4) @@ -1096,8 +1119,8 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", - set->name, jhash_size(h->table->htable_bits), - h->table->htable_bits, h->maxelem, set->data, h->table); + set->name, jhash_size(t->htable_bits), + t->htable_bits, h->maxelem, set->data, t); return 0; } -- cgit v1.2.3 From 35b8dcf8c3a0be1feb1c8b29b22e1685ba0c2e14 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 30 Apr 2013 23:02:43 +0200 Subject: netfilter: ipset: Rename simple macro names to avoid namespace issues. Reported-by: David Laight Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_gen.h | 47 +++++---- net/netfilter/ipset/ip_set_bitmap_ip.c | 10 +- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 10 +- net/netfilter/ipset/ip_set_bitmap_port.c | 10 +- net/netfilter/ipset/ip_set_hash_gen.h | 147 +++++++++++++++------------- net/netfilter/ipset/ip_set_hash_ip.c | 10 +- net/netfilter/ipset/ip_set_hash_ipport.c | 12 +-- net/netfilter/ipset/ip_set_hash_ipportip.c | 12 +-- net/netfilter/ipset/ip_set_hash_ipportnet.c | 16 +-- net/netfilter/ipset/ip_set_hash_net.c | 14 +-- net/netfilter/ipset/ip_set_hash_netiface.c | 14 +-- net/netfilter/ipset/ip_set_hash_netport.c | 16 +-- net/netfilter/ipset/ip_set_list_set.c | 10 +- 13 files changed, 166 insertions(+), 162 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index f6af97cf8d3e..d39905e7e881 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -8,31 +8,28 @@ #ifndef __IP_SET_BITMAP_IP_GEN_H #define __IP_SET_BITMAP_IP_GEN_H -#define CONCAT(a, b) a##b -#define TOKEN(a,b) CONCAT(a, b) - -#define mtype_do_test TOKEN(MTYPE, _do_test) -#define mtype_gc_test TOKEN(MTYPE, _gc_test) -#define mtype_is_filled TOKEN(MTYPE, _is_filled) -#define mtype_do_add TOKEN(MTYPE, _do_add) -#define mtype_do_del TOKEN(MTYPE, _do_del) -#define mtype_do_list TOKEN(MTYPE, _do_list) -#define mtype_do_head TOKEN(MTYPE, _do_head) -#define mtype_adt_elem TOKEN(MTYPE, _adt_elem) -#define mtype_add_timeout TOKEN(MTYPE, _add_timeout) -#define mtype_gc_init TOKEN(MTYPE, _gc_init) -#define mtype_kadt TOKEN(MTYPE, _kadt) -#define mtype_uadt TOKEN(MTYPE, _uadt) -#define mtype_destroy TOKEN(MTYPE, _destroy) -#define mtype_flush TOKEN(MTYPE, _flush) -#define mtype_head TOKEN(MTYPE, _head) -#define mtype_same_set TOKEN(MTYPE, _same_set) -#define mtype_elem TOKEN(MTYPE, _elem) -#define mtype_test TOKEN(MTYPE, _test) -#define mtype_add TOKEN(MTYPE, _add) -#define mtype_del TOKEN(MTYPE, _del) -#define mtype_list TOKEN(MTYPE, _list) -#define mtype_gc TOKEN(MTYPE, _gc) +#define mtype_do_test IPSET_TOKEN(MTYPE, _do_test) +#define mtype_gc_test IPSET_TOKEN(MTYPE, _gc_test) +#define mtype_is_filled IPSET_TOKEN(MTYPE, _is_filled) +#define mtype_do_add IPSET_TOKEN(MTYPE, _do_add) +#define mtype_do_del IPSET_TOKEN(MTYPE, _do_del) +#define mtype_do_list IPSET_TOKEN(MTYPE, _do_list) +#define mtype_do_head IPSET_TOKEN(MTYPE, _do_head) +#define mtype_adt_elem IPSET_TOKEN(MTYPE, _adt_elem) +#define mtype_add_timeout IPSET_TOKEN(MTYPE, _add_timeout) +#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) +#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) +#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) +#define mtype_flush IPSET_TOKEN(MTYPE, _flush) +#define mtype_head IPSET_TOKEN(MTYPE, _head) +#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) +#define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_test IPSET_TOKEN(MTYPE, _test) +#define mtype_add IPSET_TOKEN(MTYPE, _add) +#define mtype_del IPSET_TOKEN(MTYPE, _del) +#define mtype_list IPSET_TOKEN(MTYPE, _list) +#define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype MTYPE #define ext_timeout(e, m) \ diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index f1a8128bef01..c2f89b1c1d92 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -25,12 +25,12 @@ #include #include -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counter support added */ +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 1 /* Counter support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:ip"); #define MTYPE bitmap_ip @@ -401,8 +401,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_IPV4, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_ip_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 3b30e0bef890..1d6551cc590e 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -25,12 +25,12 @@ #include #include -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counter support added */ +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 1 /* Counter support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:ip,mac"); #define MTYPE bitmap_ipmac @@ -460,8 +460,8 @@ static struct ip_set_type bitmap_ipmac_type = { .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, .dimension = IPSET_DIM_TWO, .family = NFPROTO_IPV4, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_ipmac_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 8207d1fda528..b22048965d2a 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -20,12 +20,12 @@ #include #include -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counter support added */ +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 1 /* Counter support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("bitmap:port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:port"); #define MTYPE bitmap_port @@ -333,8 +333,8 @@ static struct ip_set_type bitmap_port_type = { .features = IPSET_TYPE_PORT, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_port_create, .create_policy = { [IPSET_ATTR_PORT] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 09a21dd5f120..68b9ccebabaa 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -17,9 +17,6 @@ #define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1) -#define CONCAT(a, b) a##b -#define TOKEN(a, b) CONCAT(a, b) - /* Hashing which uses arrays to resolve clashing. The hash table is resized * (doubled) when searching becomes too long. * Internally jhash is used with the assumption that the size of the @@ -222,41 +219,41 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #undef HKEY -#define mtype_data_equal TOKEN(MTYPE, _data_equal) +#define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal) #ifdef IP_SET_HASH_WITH_NETS -#define mtype_do_data_match TOKEN(MTYPE, _do_data_match) +#define mtype_do_data_match IPSET_TOKEN(MTYPE, _do_data_match) #else #define mtype_do_data_match(d) 1 #endif -#define mtype_data_set_flags TOKEN(MTYPE, _data_set_flags) -#define mtype_data_reset_flags TOKEN(MTYPE, _data_reset_flags) -#define mtype_data_netmask TOKEN(MTYPE, _data_netmask) -#define mtype_data_list TOKEN(MTYPE, _data_list) -#define mtype_data_next TOKEN(MTYPE, _data_next) -#define mtype_elem TOKEN(MTYPE, _elem) -#define mtype_add_cidr TOKEN(MTYPE, _add_cidr) -#define mtype_del_cidr TOKEN(MTYPE, _del_cidr) -#define mtype_ahash_memsize TOKEN(MTYPE, _ahash_memsize) -#define mtype_flush TOKEN(MTYPE, _flush) -#define mtype_destroy TOKEN(MTYPE, _destroy) -#define mtype_gc_init TOKEN(MTYPE, _gc_init) -#define mtype_same_set TOKEN(MTYPE, _same_set) -#define mtype_kadt TOKEN(MTYPE, _kadt) -#define mtype_uadt TOKEN(MTYPE, _uadt) +#define mtype_data_set_flags IPSET_TOKEN(MTYPE, _data_set_flags) +#define mtype_data_reset_flags IPSET_TOKEN(MTYPE, _data_reset_flags) +#define mtype_data_netmask IPSET_TOKEN(MTYPE, _data_netmask) +#define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) +#define mtype_data_next IPSET_TOKEN(MTYPE, _data_next) +#define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr) +#define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr) +#define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize) +#define mtype_flush IPSET_TOKEN(MTYPE, _flush) +#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) +#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) +#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) +#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) #define mtype MTYPE -#define mtype_elem TOKEN(MTYPE, _elem) -#define mtype_add TOKEN(MTYPE, _add) -#define mtype_del TOKEN(MTYPE, _del) -#define mtype_test_cidrs TOKEN(MTYPE, _test_cidrs) -#define mtype_test TOKEN(MTYPE, _test) -#define mtype_expire TOKEN(MTYPE, _expire) -#define mtype_resize TOKEN(MTYPE, _resize) -#define mtype_head TOKEN(MTYPE, _head) -#define mtype_list TOKEN(MTYPE, _list) -#define mtype_gc TOKEN(MTYPE, _gc) -#define mtype_variant TOKEN(MTYPE, _variant) -#define mtype_data_match TOKEN(MTYPE, _data_match) +#define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_add IPSET_TOKEN(MTYPE, _add) +#define mtype_del IPSET_TOKEN(MTYPE, _del) +#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) +#define mtype_test IPSET_TOKEN(MTYPE, _test) +#define mtype_expire IPSET_TOKEN(MTYPE, _expire) +#define mtype_resize IPSET_TOKEN(MTYPE, _resize) +#define mtype_head IPSET_TOKEN(MTYPE, _head) +#define mtype_list IPSET_TOKEN(MTYPE, _list) +#define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_variant IPSET_TOKEN(MTYPE, _variant) +#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) #ifndef HKEY_DATALEN #define HKEY_DATALEN sizeof(struct mtype_elem) @@ -941,13 +938,13 @@ nla_put_failure: } static int -TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt); +IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt); static int -TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); +IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); static const struct ip_set_type_variant mtype_variant = { .kadt = mtype_kadt, @@ -967,7 +964,7 @@ static const struct ip_set_type_variant mtype_variant = { #ifdef IP_SET_EMIT_CREATE static int -TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) +IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) { u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; u32 cadt_flags = 0; @@ -1045,9 +1042,9 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) set->data = h; if (set->family == NFPROTO_IPV4) - set->variant = &TOKEN(HTYPE, 4_variant); + set->variant = &IPSET_TOKEN(HTYPE, 4_variant); else - set->variant = &TOKEN(HTYPE, 6_variant); + set->variant = &IPSET_TOKEN(HTYPE, 6_variant); if (tb[IPSET_ATTR_CADT_FLAGS]) cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); @@ -1058,64 +1055,74 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; if (set->family == NFPROTO_IPV4) { - h->dsize = - sizeof(struct TOKEN(HTYPE, 4ct_elem)); + h->dsize = sizeof(struct + IPSET_TOKEN(HTYPE, 4ct_elem)); h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 4ct_elem), - timeout); + offsetof(struct + IPSET_TOKEN(HTYPE, 4ct_elem), + timeout); h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 4ct_elem), - counter); - TOKEN(HTYPE, 4_gc_init)(set, - TOKEN(HTYPE, 4_gc)); + offsetof(struct + IPSET_TOKEN(HTYPE, 4ct_elem), + counter); + IPSET_TOKEN(HTYPE, 4_gc_init)(set, + IPSET_TOKEN(HTYPE, 4_gc)); } else { - h->dsize = - sizeof(struct TOKEN(HTYPE, 6ct_elem)); + h->dsize = sizeof(struct + IPSET_TOKEN(HTYPE, 6ct_elem)); h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 6ct_elem), - timeout); + offsetof(struct + IPSET_TOKEN(HTYPE, 6ct_elem), + timeout); h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 6ct_elem), - counter); - TOKEN(HTYPE, 6_gc_init)(set, - TOKEN(HTYPE, 6_gc)); + offsetof(struct + IPSET_TOKEN(HTYPE, 6ct_elem), + counter); + IPSET_TOKEN(HTYPE, 6_gc_init)(set, + IPSET_TOKEN(HTYPE, 6_gc)); } } else { if (set->family == NFPROTO_IPV4) { h->dsize = - sizeof(struct TOKEN(HTYPE, 4c_elem)); + sizeof(struct + IPSET_TOKEN(HTYPE, 4c_elem)); h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 4c_elem), - counter); + offsetof(struct + IPSET_TOKEN(HTYPE, 4c_elem), + counter); } else { h->dsize = - sizeof(struct TOKEN(HTYPE, 6c_elem)); + sizeof(struct + IPSET_TOKEN(HTYPE, 6c_elem)); h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 6c_elem), - counter); + offsetof(struct + IPSET_TOKEN(HTYPE, 6c_elem), + counter); } } } else if (tb[IPSET_ATTR_TIMEOUT]) { h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; if (set->family == NFPROTO_IPV4) { - h->dsize = sizeof(struct TOKEN(HTYPE, 4t_elem)); + h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem)); h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 4t_elem), + offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem), timeout); - TOKEN(HTYPE, 4_gc_init)(set, TOKEN(HTYPE, 4_gc)); + IPSET_TOKEN(HTYPE, 4_gc_init)(set, + IPSET_TOKEN(HTYPE, 4_gc)); } else { - h->dsize = sizeof(struct TOKEN(HTYPE, 6t_elem)); + h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem)); h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 6t_elem), + offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem), timeout); - TOKEN(HTYPE, 6_gc_init)(set, TOKEN(HTYPE, 6_gc)); + IPSET_TOKEN(HTYPE, 6_gc_init)(set, + IPSET_TOKEN(HTYPE, 6_gc)); } } else { if (set->family == NFPROTO_IPV4) - h->dsize = sizeof(struct TOKEN(HTYPE, 4_elem)); + h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)); else - h->dsize = sizeof(struct TOKEN(HTYPE, 6_elem)); + h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)); } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index c74e6e14cd93..de44fca687c3 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -23,12 +23,12 @@ #include #include -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counters support */ +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 1 /* Counters support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip"); /* Type specific function prefix */ @@ -304,8 +304,8 @@ static struct ip_set_type hash_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 7a2d2bd98d04..b514ff4e83ae 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -24,13 +24,13 @@ #include #include -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -#define REVISION_MAX 2 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +#define IPSET_TYPE_REV_MAX 2 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port"); /* Type specific function prefix */ @@ -396,8 +396,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ipport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 34e8a1acce42..d05070d74604 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -24,13 +24,13 @@ #include #include -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -#define REVISION_MAX 2 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +#define IPSET_TYPE_REV_MAX 2 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip,port,ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,ip"); /* Type specific function prefix */ @@ -414,8 +414,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ipportip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 9a80d8bc9f18..7d1dede4ab6d 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -24,15 +24,15 @@ #include #include -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -/* 2 Range as input support for IPv4 added */ -/* 3 nomatch flag support added */ -#define REVISION_MAX 4 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Range as input support for IPv4 added */ +/* 3 nomatch flag support added */ +#define IPSET_TYPE_REV_MAX 4 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,net"); /* Type specific function prefix */ @@ -574,8 +574,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ipportnet_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 223e9f546d0f..9cb9ef43d941 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -22,14 +22,14 @@ #include #include -#define REVISION_MIN 0 -/* 1 Range as input support for IPv4 added */ -/* 2 nomatch flag support added */ -#define REVISION_MAX 3 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 Range as input support for IPv4 added */ +/* 2 nomatch flag support added */ +#define IPSET_TYPE_REV_MAX 3 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net"); /* Type specific function prefix */ @@ -406,8 +406,8 @@ static struct ip_set_type hash_net_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_net_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 7d798d5d5cd3..2310fc29e89e 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -23,14 +23,14 @@ #include #include -#define REVISION_MIN 0 -/* 1 nomatch flag support added */ -/* 2 /0 support added */ -#define REVISION_MAX 3 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 nomatch flag support added */ +/* 2 /0 support added */ +#define IPSET_TYPE_REV_MAX 3 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:net,iface", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net,iface"); /* Interface name rbtree */ @@ -645,8 +645,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = { IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_netiface_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 09d6690bee6f..1601d489fdbc 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -23,15 +23,15 @@ #include #include -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -/* 2 Range as input support for IPv4 added */ -/* 3 nomatch flag support added */ -#define REVISION_MAX 4 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Range as input support for IPv4 added */ +/* 3 nomatch flag support added */ +#define IPSET_TYPE_REV_MAX 4 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:net,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net,port"); /* Type specific function prefix */ @@ -518,8 +518,8 @@ static struct ip_set_type hash_netport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_netport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 68299ee15847..a9e301f6e093 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -15,12 +15,12 @@ #include #include -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 1 /* Counters support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); -IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("list:set", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_list:set"); /* Member elements */ @@ -703,8 +703,8 @@ static struct ip_set_type list_set_type __read_mostly = { .features = IPSET_TYPE_NAME | IPSET_DUMP_LAST, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = list_set_create, .create_policy = { [IPSET_ATTR_SIZE] = { .type = NLA_U32 }, -- cgit v1.2.3 From 20b2fab483094d51c8d26784b81e12149474e0f2 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 1 May 2013 18:47:32 +0200 Subject: netfilter: ipset: Fix "may be used uninitialized" warnings Reported-by: Pablo Neira Ayuso Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_ip.c | 4 ++-- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 4 ++-- net/netfilter/ipset/ip_set_hash_ip.c | 2 +- net/netfilter/ipset/ip_set_hash_ipport.c | 2 +- net/netfilter/ipset/ip_set_hash_ipportip.c | 2 +- net/netfilter/ipset/ip_set_hash_ipportnet.c | 4 ++-- net/netfilter/ipset/ip_set_hash_net.c | 2 +- net/netfilter/ipset/ip_set_hash_netiface.c | 2 +- net/netfilter/ipset/ip_set_hash_netport.c | 2 +- 9 files changed, 12 insertions(+), 12 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index c2f89b1c1d92..ce99d2652d47 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -131,7 +131,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], { struct bitmap_ip *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - u32 ip, ip_to; + u32 ip = 0, ip_to = 0; struct bitmap_ip_adt_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(map); int ret = 0; @@ -264,7 +264,7 @@ static int bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { struct bitmap_ip *map; - u32 first_ip, last_ip, hosts, cadt_flags = 0; + u32 first_ip = 0, last_ip = 0, hosts, cadt_flags = 0; u64 elements; u8 netmask = 32; int ret; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 1d6551cc590e..6d5bad93026b 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -246,7 +246,7 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ipmac_adt_elem e = {}; struct ip_set_ext ext = IP_SET_INIT_UEXT(map); - u32 ip; + u32 ip = 0; int ret = 0; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -355,7 +355,7 @@ static int bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { - u32 first_ip, last_ip, cadt_flags = 0; + u32 first_ip = 0, last_ip = 0, cadt_flags = 0; u64 elements; struct bitmap_ipmac *map; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index de44fca687c3..260c9a80d8a5 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -119,7 +119,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = {}; struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip, ip_to, hosts; + u32 ip = 0, ip_to = 0, hosts; int ret = 0; if (unlikely(!tb[IPSET_ATTR_IP] || diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index b514ff4e83ae..64caad35a391 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -137,7 +137,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip, ip_to, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to; bool with_ports = false; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index d05070d74604..2873bbc20d7a 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -142,7 +142,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip, ip_to, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to; bool with_ports = false; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 7d1dede4ab6d..f111558c4597 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -196,8 +196,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip, ip_to, p = 0, port, port_to; - u32 ip2_from, ip2_to, ip2_last, ip2; + u32 ip = 0, ip_to = 0, p = 0, port, port_to; + u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; bool with_ports = false; u8 cidr; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 9cb9ef43d941..0a64dad156d9 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -166,7 +166,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip = 0, ip_to, last; + u32 ip = 0, ip_to = 0, last; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 2310fc29e89e..846371b08630 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -320,7 +320,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip = 0, ip_to, last; + u32 ip = 0, ip_to = 0, last; char iface[IFNAMSIZ]; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 1601d489fdbc..d98a685cd916 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -187,7 +187,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 port, port_to, p = 0, ip = 0, ip_to, last; + u32 port, port_to, p = 0, ip = 0, ip_to = 0, last; bool with_ports = false; u8 cidr; int ret; -- cgit v1.2.3 From 60b0fe372499f66e0c965dc0594320438a3b711c Mon Sep 17 00:00:00 2001 From: "Anders K. Pedersen" Date: Fri, 3 May 2013 23:15:48 +0200 Subject: netfilter: ipset: Support package fragments for IPv4 protos without ports Enable ipset port set types to match IPv4 package fragments for protocols that doesn't have ports (or the port information isn't supported by ipset). For example this allows a hash:ip,port ipset containing the entry 192.168.0.1,gre:0 to match all package fragments for PPTP VPN tunnels to/from the host. Without this patch only the first package fragment (with fragment offset 0) was matched, while subsequent fragments wasn't. This is not possible for IPv6, where the protocol is in the fragmented part of the package unlike IPv4, where the protocol is in the IP header. IPPROTO_ICMPV6 is deliberately not included, because it isn't relevant for IPv4. Signed-off-by: Anders K. Pedersen Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_getport.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index dac156f819ac..29fb01ddff93 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -102,9 +102,25 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src, int protocol = iph->protocol; /* See comments at tcp_match in ip_tables.c */ - if (protocol <= 0 || (ntohs(iph->frag_off) & IP_OFFSET)) + if (protocol <= 0) return false; + if (ntohs(iph->frag_off) & IP_OFFSET) + switch (protocol) { + case IPPROTO_TCP: + case IPPROTO_SCTP: + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_ICMP: + /* Port info not available for fragment offset > 0 */ + return false; + default: + /* Other protocols doesn't have ports, + so we can match fragments */ + *proto = protocol; + return true; + } + return get_port(skb, protocol, protooff, src, port, proto); } EXPORT_SYMBOL_GPL(ip_set_get_ip4_port); -- cgit v1.2.3 From bd3129fc5e8868df74ffa803c7fec527976bbf1c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 30 Sep 2013 07:49:47 +0200 Subject: netfilter: ipset: order matches and targets separatedly in xt_set.c Signed-off-by: Jozsef Kadlecsik --- net/netfilter/xt_set.c | 188 +++++++++++++++++++++++++------------------------ 1 file changed, 96 insertions(+), 92 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 31790e789e22..2095488684ff 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -109,6 +109,101 @@ set_match_v0_destroy(const struct xt_mtdtor_param *par) ip_set_nfnl_put(info->match_set.index); } +/* Revision 1 match */ + +static bool +set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_set_info_match_v1 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.dim, + info->match_set.flags, 0, UINT_MAX); + + if (opt.flags & IPSET_RETURN_NOMATCH) + opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH; + + return match_set(info->match_set.index, skb, par, &opt, + info->match_set.flags & IPSET_INV_MATCH); +} + +static int +set_match_v1_checkentry(const struct xt_mtchk_param *par) +{ + struct xt_set_info_match_v1 *info = par->matchinfo; + ip_set_id_t index; + + index = ip_set_nfnl_get_byindex(info->match_set.index); + + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find set indentified by id %u to match\n", + info->match_set.index); + return -ENOENT; + } + if (info->match_set.dim > IPSET_DIM_MAX) { + pr_warning("Protocol error: set match dimension " + "is over the limit!\n"); + ip_set_nfnl_put(info->match_set.index); + return -ERANGE; + } + + return 0; +} + +static void +set_match_v1_destroy(const struct xt_mtdtor_param *par) +{ + struct xt_set_info_match_v1 *info = par->matchinfo; + + ip_set_nfnl_put(info->match_set.index); +} + +/* Revision 3 match */ + +static bool +match_counter(u64 counter, const struct ip_set_counter_match *info) +{ + switch (info->op) { + case IPSET_COUNTER_NONE: + return true; + case IPSET_COUNTER_EQ: + return counter == info->value; + case IPSET_COUNTER_NE: + return counter != info->value; + case IPSET_COUNTER_LT: + return counter < info->value; + case IPSET_COUNTER_GT: + return counter > info->value; + } + return false; +} + +static bool +set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_set_info_match_v3 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.dim, + info->match_set.flags, info->flags, UINT_MAX); + int ret; + + if (info->packets.op != IPSET_COUNTER_NONE || + info->bytes.op != IPSET_COUNTER_NONE) + opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS; + + ret = match_set(info->match_set.index, skb, par, &opt, + info->match_set.flags & IPSET_INV_MATCH); + + if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS)) + return ret; + + if (!match_counter(opt.ext.packets, &info->packets)) + return 0; + return match_counter(opt.ext.bytes, &info->bytes); +} + +#define set_match_v3_checkentry set_match_v1_checkentry +#define set_match_v3_destroy set_match_v1_destroy + +/* Revision 0 interface: backward compatible with netfilter/iptables */ + static unsigned int set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { @@ -180,52 +275,7 @@ set_target_v0_destroy(const struct xt_tgdtor_param *par) ip_set_nfnl_put(info->del_set.index); } -/* Revision 1 match and target */ - -static bool -set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) -{ - const struct xt_set_info_match_v1 *info = par->matchinfo; - ADT_OPT(opt, par->family, info->match_set.dim, - info->match_set.flags, 0, UINT_MAX); - - if (opt.flags & IPSET_RETURN_NOMATCH) - opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH; - - return match_set(info->match_set.index, skb, par, &opt, - info->match_set.flags & IPSET_INV_MATCH); -} - -static int -set_match_v1_checkentry(const struct xt_mtchk_param *par) -{ - struct xt_set_info_match_v1 *info = par->matchinfo; - ip_set_id_t index; - - index = ip_set_nfnl_get_byindex(info->match_set.index); - - if (index == IPSET_INVALID_ID) { - pr_warning("Cannot find set indentified by id %u to match\n", - info->match_set.index); - return -ENOENT; - } - if (info->match_set.dim > IPSET_DIM_MAX) { - pr_warning("Protocol error: set match dimension " - "is over the limit!\n"); - ip_set_nfnl_put(info->match_set.index); - return -ERANGE; - } - - return 0; -} - -static void -set_match_v1_destroy(const struct xt_mtdtor_param *par) -{ - struct xt_set_info_match_v1 *info = par->matchinfo; - - ip_set_nfnl_put(info->match_set.index); -} +/* Revision 1 target */ static unsigned int set_target_v1(struct sk_buff *skb, const struct xt_action_param *par) @@ -320,52 +370,6 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) #define set_target_v2_checkentry set_target_v1_checkentry #define set_target_v2_destroy set_target_v1_destroy -/* Revision 3 match */ - -static bool -match_counter(u64 counter, const struct ip_set_counter_match *info) -{ - switch (info->op) { - case IPSET_COUNTER_NONE: - return true; - case IPSET_COUNTER_EQ: - return counter == info->value; - case IPSET_COUNTER_NE: - return counter != info->value; - case IPSET_COUNTER_LT: - return counter < info->value; - case IPSET_COUNTER_GT: - return counter > info->value; - } - return false; -} - -static bool -set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) -{ - const struct xt_set_info_match_v3 *info = par->matchinfo; - ADT_OPT(opt, par->family, info->match_set.dim, - info->match_set.flags, info->flags, UINT_MAX); - int ret; - - if (info->packets.op != IPSET_COUNTER_NONE || - info->bytes.op != IPSET_COUNTER_NONE) - opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS; - - ret = match_set(info->match_set.index, skb, par, &opt, - info->match_set.flags & IPSET_INV_MATCH); - - if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS)) - return ret; - - if (!match_counter(opt.ext.packets, &info->packets)) - return 0; - return match_counter(opt.ext.bytes, &info->bytes); -} - -#define set_match_v3_checkentry set_match_v1_checkentry -#define set_match_v3_destroy set_match_v1_destroy - static struct xt_match set_matches[] __read_mostly = { { .name = "set", -- cgit v1.2.3 From 5e04c0c38c90f1f11a0e87800e4c22d4aba1d733 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 30 Sep 2013 07:57:18 +0200 Subject: netfilter: ipset: Introduce new operation to get both setname and family ip[6]tables set match and SET target need to know the family of the set in order to reject adding rules which refer to a set with a non-mathcing family. Currently such rules are silently accepted and then ignored instead of generating a clear error message to the user, which is not helpful. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_core.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index f2e30fb31e78..428c30a8586f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1788,6 +1788,23 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; } + case IP_SET_OP_GET_FNAME: { + struct ip_set_req_get_set_family *req_get = data; + ip_set_id_t id; + + if (*len != sizeof(struct ip_set_req_get_set_family)) { + ret = -EINVAL; + goto done; + } + req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; + nfnl_lock(NFNL_SUBSYS_IPSET); + find_set_and_id(req_get->set.name, &id); + req_get->set.index = id; + if (id != IPSET_INVALID_ID) + req_get->family = nfnl_set(id)->family; + nfnl_unlock(NFNL_SUBSYS_IPSET); + goto copy; + } case IP_SET_OP_GET_BYINDEX: { struct ip_set_req_get_set *req_get = data; struct ip_set *set; -- cgit v1.2.3 From a04d8b6bd9113f3e7f0c216dcaa3c1ad498f2a96 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 30 Sep 2013 09:05:54 +0200 Subject: netfilter: ipset: Prepare ipset to support multiple networks for hash types In order to support hash:net,net, hash:net,port,net etc. types, arrays are introduced for the book-keeping of existing cidr sizes and network numbers in a set. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_gen.h | 78 +++++++++++++++-------------- net/netfilter/ipset/ip_set_hash_ipportnet.c | 4 +- net/netfilter/ipset/ip_set_hash_net.c | 4 +- net/netfilter/ipset/ip_set_hash_netiface.c | 4 +- net/netfilter/ipset/ip_set_hash_netport.c | 4 +- 5 files changed, 48 insertions(+), 46 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 68b9ccebabaa..a8332404d9b4 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -77,10 +77,14 @@ struct htable { #define hbucket(h, i) (&((h)->bucket[i])) +#ifndef IPSET_NET_COUNT +#define IPSET_NET_COUNT 1 +#endif + /* Book-keeping of the prefixes added to the set */ struct net_prefixes { - u8 cidr; /* the different cidr values in the set */ - u32 nets; /* number of elements per cidr */ + u32 nets[IPSET_NET_COUNT]; /* number of elements per cidr */ + u8 cidr[IPSET_NET_COUNT]; /* the different cidr values in the set */ }; /* Compute the hash table size */ @@ -165,13 +169,13 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) #ifdef IP_SET_HASH_WITH_MULTI -#define NETS_LENGTH(family) (SET_HOST_MASK(family) + 1) +#define NLEN(family) (SET_HOST_MASK(family) + 1) #else -#define NETS_LENGTH(family) SET_HOST_MASK(family) +#define NLEN(family) SET_HOST_MASK(family) #endif #else -#define NETS_LENGTH(family) 0 +#define NLEN(family) 0 #endif /* IP_SET_HASH_WITH_NETS */ #define ext_timeout(e, h) \ @@ -296,49 +300,49 @@ struct htype { /* Network cidr size book keeping when the hash stores different * sized networks */ static void -mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length) +mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) { int i, j; /* Add in increasing prefix order, so larger cidr first */ - for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) { + for (i = 0, j = -1; i < nets_length && h->nets[i].nets[n]; i++) { if (j != -1) continue; - else if (h->nets[i].cidr < cidr) + else if (h->nets[i].cidr[n] < cidr) j = i; - else if (h->nets[i].cidr == cidr) { - h->nets[i].nets++; + else if (h->nets[i].cidr[n] == cidr) { + h->nets[i].nets[n]++; return; } } if (j != -1) { for (; i > j; i--) { - h->nets[i].cidr = h->nets[i - 1].cidr; - h->nets[i].nets = h->nets[i - 1].nets; + h->nets[i].cidr[n] = h->nets[i - 1].cidr[n]; + h->nets[i].nets[n] = h->nets[i - 1].nets[n]; } } - h->nets[i].cidr = cidr; - h->nets[i].nets = 1; + h->nets[i].cidr[n] = cidr; + h->nets[i].nets[n] = 1; } static void -mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length) +mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) { u8 i, j, net_end = nets_length - 1; for (i = 0; i < nets_length; i++) { - if (h->nets[i].cidr != cidr) + if (h->nets[i].cidr[n] != cidr) continue; - if (h->nets[i].nets > 1 || i == net_end || - h->nets[i + 1].nets == 0) { - h->nets[i].nets--; + if (h->nets[i].nets[n] > 1 || i == net_end || + h->nets[i + 1].nets[n] == 0) { + h->nets[i].nets[n]--; return; } - for (j = i; j < net_end && h->nets[j].nets; j++) { - h->nets[j].cidr = h->nets[j + 1].cidr; - h->nets[j].nets = h->nets[j + 1].nets; + for (j = i; j < net_end && h->nets[j].nets[n]; j++) { + h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; + h->nets[j].nets[n] = h->nets[j + 1].nets[n]; } - h->nets[j].nets = 0; + h->nets[j].nets[n] = 0; return; } } @@ -382,8 +386,7 @@ mtype_flush(struct ip_set *set) } } #ifdef IP_SET_HASH_WITH_NETS - memset(h->nets, 0, sizeof(struct net_prefixes) - * NETS_LENGTH(set->family)); + memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family)); #endif h->elements = 0; } @@ -459,7 +462,7 @@ mtype_expire(struct htype *h, u8 nets_length, size_t dsize) pr_debug("expired %u/%u\n", i, j); #ifdef IP_SET_HASH_WITH_NETS mtype_del_cidr(h, CIDR(data->cidr), - nets_length); + nets_length, 0); #endif if (j != n->pos - 1) /* Not last one */ @@ -494,7 +497,7 @@ mtype_gc(unsigned long ul_set) pr_debug("called\n"); write_lock_bh(&set->lock); - mtype_expire(h, NETS_LENGTH(set->family), h->dsize); + mtype_expire(h, NLEN(set->family), h->dsize); write_unlock_bh(&set->lock); h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; @@ -523,8 +526,7 @@ mtype_resize(struct ip_set *set, bool retried) if (SET_WITH_TIMEOUT(set) && !retried) { i = h->elements; write_lock_bh(&set->lock); - mtype_expire(set->data, NETS_LENGTH(set->family), - h->dsize); + mtype_expire(set->data, NLEN(set->family), h->dsize); write_unlock_bh(&set->lock); if (h->elements < i) return 0; @@ -607,7 +609,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) /* FIXME: when set is full, we slow down here */ - mtype_expire(h, NETS_LENGTH(set->family), h->dsize); + mtype_expire(h, NLEN(set->family), h->dsize); if (h->elements >= h->maxelem) { if (net_ratelimit()) @@ -645,8 +647,8 @@ reuse_slot: /* Fill out reused slot */ data = ahash_data(n, j, h->dsize); #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family)); - mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); + mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0); + mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif } else { /* Use/create a new slot */ @@ -659,7 +661,7 @@ reuse_slot: } data = ahash_data(n, n->pos++, h->dsize); #ifdef IP_SET_HASH_WITH_NETS - mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); + mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif h->elements++; } @@ -711,7 +713,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, n->pos--; h->elements--; #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); + mtype_del_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif if (n->pos + AHASH_INIT_SIZE < n->size) { void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) @@ -760,11 +762,11 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, struct mtype_elem *data; int i, j = 0; u32 key, multi = 0; - u8 nets_length = NETS_LENGTH(set->family); + u8 nets_length = NLEN(set->family); pr_debug("test by nets\n"); - for (; j < nets_length && h->nets[j].nets && !multi; j++) { - mtype_data_netmask(d, h->nets[j].cidr); + for (; j < nets_length && h->nets[j].nets[0] && !multi; j++) { + mtype_data_netmask(d, h->nets[j].cidr[0]); key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { @@ -839,7 +841,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) size_t memsize; t = rcu_dereference_bh_nfnl(h->table); - memsize = mtype_ahash_memsize(h, t, NETS_LENGTH(set->family)); + memsize = mtype_ahash_memsize(h, t, NLEN(set->family)); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index f111558c4597..6ce5a8e2c44e 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -170,7 +170,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); @@ -454,7 +454,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 0a64dad156d9..ec1c7dc10489 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -143,7 +143,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); @@ -338,7 +338,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 846371b08630..814b4e31a7f8 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -265,7 +265,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK, + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); @@ -534,7 +534,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK, + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index d98a685cd916..3bd923d3f179 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -162,7 +162,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); @@ -407,7 +407,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1, + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); -- cgit v1.2.3 From f925f7056920213889c5e61445f9529f1a86ae41 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 6 Sep 2013 22:31:40 +0200 Subject: netfilter: ipset: Rename extension offset ids to extension ids Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_gen.h | 4 ++-- net/netfilter/ipset/ip_set_bitmap_ip.c | 10 +++++----- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 10 +++++----- net/netfilter/ipset/ip_set_bitmap_port.c | 10 +++++----- net/netfilter/ipset/ip_set_hash_gen.h | 22 +++++++++++----------- net/netfilter/ipset/ip_set_list_set.c | 14 +++++++------- 6 files changed, 35 insertions(+), 35 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index d39905e7e881..889a929b908f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -33,9 +33,9 @@ #define mtype MTYPE #define ext_timeout(e, m) \ - (unsigned long *)((e) + (m)->offset[IPSET_OFFSET_TIMEOUT]) + (unsigned long *)((e) + (m)->offset[IPSET_EXT_ID_TIMEOUT]) #define ext_counter(e, m) \ - (struct ip_set_counter *)((e) + (m)->offset[IPSET_OFFSET_COUNTER]) + (struct ip_set_counter *)((e) + (m)->offset[IPSET_EXT_ID_COUNTER]) #define get_ext(map, id) ((map)->extensions + (map)->dsize * (id)) static void diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index ce99d2652d47..2ee210ef49a2 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -45,7 +45,7 @@ struct bitmap_ip { u32 hosts; /* number of hosts in a subnet */ size_t memsize; /* members size */ size_t dsize; /* extensions struct size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ u8 netmask; /* subnet netmask */ u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ @@ -342,9 +342,9 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_ipct_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipct_elem, counter); if (!init_map_ip(set, map, first_ip, last_ip, @@ -360,7 +360,7 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) bitmap_ip_gc_init(set, bitmap_ip_gc); } else { map->dsize = sizeof(struct bitmap_ipc_elem); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipc_elem, counter); if (!init_map_ip(set, map, first_ip, last_ip, @@ -371,7 +371,7 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) } } else if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_ipt_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipt_elem, timeout); if (!init_map_ip(set, map, first_ip, last_ip, diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 6d5bad93026b..e711875ea452 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -52,7 +52,7 @@ struct bitmap_ipmac { struct timer_list gc; /* garbage collector */ size_t memsize; /* members size */ size_t dsize; /* size of element */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ }; /* ADT structure for generic function args */ @@ -405,9 +405,9 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_ipmacct_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipmacct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipmacct_elem, counter); if (!init_map_ipmac(set, map, first_ip, last_ip, @@ -421,7 +421,7 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); } else { map->dsize = sizeof(struct bitmap_ipmacc_elem); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipmacc_elem, counter); if (!init_map_ipmac(set, map, first_ip, last_ip, @@ -432,7 +432,7 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], } } else if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_ipmact_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipmact_elem, timeout); if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index b22048965d2a..bebc137a5737 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -39,7 +39,7 @@ struct bitmap_port { u32 elements; /* number of max elements in the set */ size_t memsize; /* members size */ size_t dsize; /* extensions struct size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ }; @@ -282,9 +282,9 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_portct_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_portct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_portct_elem, counter); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); @@ -297,7 +297,7 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) bitmap_port_gc_init(set, bitmap_port_gc); } else { map->dsize = sizeof(struct bitmap_portc_elem); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_portc_elem, counter); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); @@ -306,7 +306,7 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) } } else if (tb[IPSET_ATTR_TIMEOUT]) { map->dsize = sizeof(struct bitmap_portt_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_portt_elem, timeout); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index a8332404d9b4..e4db9250f337 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -179,9 +179,9 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #endif /* IP_SET_HASH_WITH_NETS */ #define ext_timeout(e, h) \ -(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_TIMEOUT]) +(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_TIMEOUT]) #define ext_counter(e, h) \ -(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_COUNTER]) +(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_COUNTER]) #endif /* _IP_SET_HASH_GEN_H */ @@ -278,7 +278,7 @@ struct htype { u32 initval; /* random jhash init value */ u32 timeout; /* timeout value, if enabled */ size_t dsize; /* data struct size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ struct timer_list gc; /* garbage collection when timeout enabled */ struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_MULTI @@ -1059,11 +1059,11 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) if (set->family == NFPROTO_IPV4) { h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4ct_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = + h->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 4ct_elem), timeout); - h->offset[IPSET_OFFSET_COUNTER] = + h->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 4ct_elem), counter); @@ -1072,11 +1072,11 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) } else { h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6ct_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = + h->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 6ct_elem), timeout); - h->offset[IPSET_OFFSET_COUNTER] = + h->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 6ct_elem), counter); @@ -1088,7 +1088,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4c_elem)); - h->offset[IPSET_OFFSET_COUNTER] = + h->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 4c_elem), counter); @@ -1096,7 +1096,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6c_elem)); - h->offset[IPSET_OFFSET_COUNTER] = + h->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 6c_elem), counter); @@ -1107,14 +1107,14 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) set->extensions |= IPSET_EXT_TIMEOUT; if (set->family == NFPROTO_IPV4) { h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = + h->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem), timeout); IPSET_TOKEN(HTYPE, 4_gc_init)(set, IPSET_TOKEN(HTYPE, 4_gc)); } else { h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = + h->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem), timeout); IPSET_TOKEN(HTYPE, 6_gc_init)(set, diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index a9e301f6e093..0ed19b5e675e 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -59,7 +59,7 @@ struct set_adt_elem { /* Type structure */ struct list_set { size_t dsize; /* element size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ u32 size; /* size of set list array */ u32 timeout; /* timeout value */ struct timer_list gc; /* garbage collection */ @@ -73,9 +73,9 @@ list_set_elem(const struct list_set *map, u32 id) } #define ext_timeout(e, m) \ -(unsigned long *)((void *)(e) + (m)->offset[IPSET_OFFSET_TIMEOUT]) +(unsigned long *)((void *)(e) + (m)->offset[IPSET_EXT_ID_TIMEOUT]) #define ext_counter(e, m) \ -(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_OFFSET_COUNTER]) +(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_EXT_ID_COUNTER]) static int list_set_ktest(struct ip_set *set, const struct sk_buff *skb, @@ -667,9 +667,9 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (!map) return -ENOMEM; set->extensions |= IPSET_EXT_TIMEOUT; - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct setct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct setct_elem, counter); list_set_gc_init(set, list_set_gc); } else { @@ -677,7 +677,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) sizeof(struct setc_elem), 0); if (!map) return -ENOMEM; - map->offset[IPSET_OFFSET_COUNTER] = + map->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct setc_elem, counter); } } else if (tb[IPSET_ATTR_TIMEOUT]) { @@ -686,7 +686,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (!map) return -ENOMEM; set->extensions |= IPSET_EXT_TIMEOUT; - map->offset[IPSET_OFFSET_TIMEOUT] = + map->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct sett_elem, timeout); list_set_gc_init(set, list_set_gc); } else { -- cgit v1.2.3 From ca134ce86451f3f5ac45ffbf1494a1f42110bf93 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 7 Sep 2013 00:10:07 +0200 Subject: netfilter: ipset: Move extension data to set structure Default timeout and extension offsets are moved to struct set, because all set types supports all extensions and it makes possible to generalize extension support. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_gen.h | 59 ++++++------- net/netfilter/ipset/ip_set_bitmap_ip.c | 45 +++++----- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 64 +++++++------- net/netfilter/ipset/ip_set_bitmap_port.c | 44 +++++----- net/netfilter/ipset/ip_set_hash_gen.h | 127 +++++++++++++--------------- net/netfilter/ipset/ip_set_hash_ip.c | 8 +- net/netfilter/ipset/ip_set_hash_ipport.c | 10 +-- net/netfilter/ipset/ip_set_hash_ipportip.c | 10 +-- net/netfilter/ipset/ip_set_hash_ipportnet.c | 14 +-- net/netfilter/ipset/ip_set_hash_net.c | 9 +- net/netfilter/ipset/ip_set_hash_netiface.c | 8 +- net/netfilter/ipset/ip_set_hash_netport.c | 8 +- net/netfilter/ipset/ip_set_list_set.c | 116 ++++++++++++------------- 13 files changed, 244 insertions(+), 278 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 889a929b908f..f32ddbca3923 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -32,11 +32,7 @@ #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype MTYPE -#define ext_timeout(e, m) \ - (unsigned long *)((e) + (m)->offset[IPSET_EXT_ID_TIMEOUT]) -#define ext_counter(e, m) \ - (struct ip_set_counter *)((e) + (m)->offset[IPSET_EXT_ID_COUNTER]) -#define get_ext(map, id) ((map)->extensions + (map)->dsize * (id)) +#define get_ext(set, map, id) ((map)->extensions + (set)->dsize * (id)) static void mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) @@ -46,7 +42,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) init_timer(&map->gc); map->gc.data = (unsigned long) set; map->gc.function = gc; - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } @@ -59,7 +55,7 @@ mtype_destroy(struct ip_set *set) del_timer_sync(&map->gc); ip_set_free(map->members); - if (map->dsize) + if (set->dsize) ip_set_free(map->extensions); kfree(map); @@ -88,9 +84,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize + - map->dsize * map->elements)) || + set->dsize * map->elements)) || (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) || (SET_WITH_COUNTER(set) && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(IPSET_FLAG_WITH_COUNTERS)))) @@ -108,16 +104,16 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - void *x = get_ext(map, e->id); - int ret = mtype_do_test(e, map); + void *x = get_ext(set, map, e->id); + int ret = mtype_do_test(e, map, set->dsize); if (ret <= 0) return ret; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, map))) + ip_set_timeout_expired(ext_timeout(x, set))) return 0; if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(x, map), ext, mext, flags); + ip_set_update_counter(ext_counter(x, set), ext, mext, flags); return 1; } @@ -127,12 +123,12 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - void *x = get_ext(map, e->id); - int ret = mtype_do_add(e, map, flags); + void *x = get_ext(set, map, e->id); + int ret = mtype_do_add(e, map, flags, set->dsize); if (ret == IPSET_ADD_FAILED) { if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, map))) + ip_set_timeout_expired(ext_timeout(x, set))) ret = 0; else if (!(flags & IPSET_FLAG_EXIST)) return -IPSET_ERR_EXIST; @@ -140,13 +136,13 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_TIMEOUT(set)) #ifdef IP_SET_BITMAP_STORED_TIMEOUT - mtype_add_timeout(ext_timeout(x, map), e, ext, map, ret); + mtype_add_timeout(ext_timeout(x, set), e, ext, set, map, ret); #else - ip_set_timeout_set(ext_timeout(x, map), ext->timeout); + ip_set_timeout_set(ext_timeout(x, set), ext->timeout); #endif if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(x, map), ext); + ip_set_init_counter(ext_counter(x, set), ext); return 0; } @@ -156,11 +152,11 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - const void *x = get_ext(map, e->id); + const void *x = get_ext(set, map, e->id); if (mtype_do_del(e, map) || (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, map)))) + ip_set_timeout_expired(ext_timeout(x, set)))) return -IPSET_ERR_EXIST; return 0; @@ -180,13 +176,13 @@ mtype_list(const struct ip_set *set, return -EMSGSIZE; for (; cb->args[2] < map->elements; cb->args[2]++) { id = cb->args[2]; - x = get_ext(map, id); + x = get_ext(set, map, id); if (!test_bit(id, map->members) || (SET_WITH_TIMEOUT(set) && #ifdef IP_SET_BITMAP_STORED_TIMEOUT mtype_is_filled((const struct mtype_elem *) x) && #endif - ip_set_timeout_expired(ext_timeout(x, map)))) + ip_set_timeout_expired(ext_timeout(x, set)))) continue; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { @@ -196,23 +192,24 @@ mtype_list(const struct ip_set *set, } else goto nla_put_failure; } - if (mtype_do_list(skb, map, id)) + if (mtype_do_list(skb, map, id, set->dsize)) goto nla_put_failure; if (SET_WITH_TIMEOUT(set)) { #ifdef IP_SET_BITMAP_STORED_TIMEOUT if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(ip_set_timeout_stored(map, id, - ext_timeout(x, map))))) + ext_timeout(x, set), + set->dsize)))) goto nla_put_failure; #else if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(ip_set_timeout_get( - ext_timeout(x, map))))) + ext_timeout(x, set))))) goto nla_put_failure; #endif } if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(x, map))) + ip_set_put_counter(skb, ext_counter(x, set))) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -245,14 +242,14 @@ mtype_gc(unsigned long ul_set) * but adding/deleting new entries is locked out */ read_lock_bh(&set->lock); for (id = 0; id < map->elements; id++) - if (mtype_gc_test(id, map)) { - x = get_ext(map, id); - if (ip_set_timeout_expired(ext_timeout(x, map))) + if (mtype_gc_test(id, map, set->dsize)) { + x = get_ext(set, map, id); + if (ip_set_timeout_expired(ext_timeout(x, set))) clear_bit(id, map->members); } read_unlock_bh(&set->lock); - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 2ee210ef49a2..363022edb8fb 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -44,10 +44,7 @@ struct bitmap_ip { u32 elements; /* number of max elements in the set */ u32 hosts; /* number of hosts in a subnet */ size_t memsize; /* members size */ - size_t dsize; /* extensions struct size */ - size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ u8 netmask; /* subnet netmask */ - u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ }; @@ -65,20 +62,21 @@ ip_to_id(const struct bitmap_ip *m, u32 ip) /* Common functions */ static inline int -bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map) +bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, + struct bitmap_ip *map, size_t dsize) { return !!test_bit(e->id, map->members); } static inline int -bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map) +bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map, size_t dsize) { return !!test_bit(id, map->members); } static inline int bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map, - u32 flags) + u32 flags, size_t dsize) { return !!test_and_set_bit(e->id, map->members); } @@ -90,7 +88,8 @@ bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map) } static inline int -bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id) +bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id, + size_t dsize) { return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip + id * map->hosts)); @@ -113,7 +112,7 @@ bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb, struct bitmap_ip *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ip_adt_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip; ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); @@ -133,7 +132,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; u32 ip = 0, ip_to = 0; struct bitmap_ip_adt_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret = 0; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -200,7 +199,7 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b) return x->first_ip == y->first_ip && x->last_ip == y->last_ip && x->netmask == y->netmask && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -240,8 +239,8 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, map->members = ip_set_alloc(map->memsize); if (!map->members) return false; - if (map->dsize) { - map->extensions = ip_set_alloc(map->dsize * elements); + if (set->dsize) { + map->extensions = ip_set_alloc(set->dsize * elements); if (!map->extensions) { kfree(map->members); return false; @@ -252,7 +251,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, map->elements = elements; map->hosts = hosts; map->netmask = netmask; - map->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; set->data = map; set->family = NFPROTO_IPV4; @@ -341,10 +340,10 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipct_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_ipct_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipct_elem, timeout); - map->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipct_elem, counter); if (!init_map_ip(set, map, first_ip, last_ip, @@ -353,14 +352,14 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) return -ENOMEM; } - map->timeout = ip_set_timeout_uget( + set->timeout = ip_set_timeout_uget( tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_ip_gc_init(set, bitmap_ip_gc); } else { - map->dsize = sizeof(struct bitmap_ipc_elem); - map->offset[IPSET_EXT_ID_COUNTER] = + set->dsize = sizeof(struct bitmap_ipc_elem); + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipc_elem, counter); if (!init_map_ip(set, map, first_ip, last_ip, @@ -370,8 +369,8 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) } } } else if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipt_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_ipt_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipt_elem, timeout); if (!init_map_ip(set, map, first_ip, last_ip, @@ -380,12 +379,12 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) return -ENOMEM; } - map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_ip_gc_init(set, bitmap_ip_gc); } else { - map->dsize = 0; + set->dsize = 0; if (!init_map_ip(set, map, first_ip, last_ip, elements, hosts, netmask)) { kfree(map); diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index e711875ea452..74576cb19264 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -48,11 +48,8 @@ struct bitmap_ipmac { u32 first_ip; /* host byte order, included in range */ u32 last_ip; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ - u32 timeout; /* timeout value */ - struct timer_list gc; /* garbage collector */ size_t memsize; /* members size */ - size_t dsize; /* size of element */ - size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ + struct timer_list gc; /* garbage collector */ }; /* ADT structure for generic function args */ @@ -82,13 +79,13 @@ get_elem(void *extensions, u16 id, size_t dsize) static inline int bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e, - const struct bitmap_ipmac *map) + const struct bitmap_ipmac *map, size_t dsize) { const struct bitmap_ipmac_elem *elem; if (!test_bit(e->id, map->members)) return 0; - elem = get_elem(map->extensions, e->id, map->dsize); + elem = get_elem(map->extensions, e->id, dsize); if (elem->filled == MAC_FILLED) return e->ether == NULL || ether_addr_equal(e->ether, elem->ether); @@ -97,13 +94,13 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e, } static inline int -bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map) +bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize) { const struct bitmap_ipmac_elem *elem; if (!test_bit(id, map->members)) return 0; - elem = get_elem(map->extensions, id, map->dsize); + elem = get_elem(map->extensions, id, dsize); /* Timer not started for the incomplete elements */ return elem->filled == MAC_FILLED; } @@ -117,13 +114,13 @@ bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem) static inline int bitmap_ipmac_add_timeout(unsigned long *timeout, const struct bitmap_ipmac_adt_elem *e, - const struct ip_set_ext *ext, + const struct ip_set_ext *ext, struct ip_set *set, struct bitmap_ipmac *map, int mode) { u32 t = ext->timeout; if (mode == IPSET_ADD_START_STORED_TIMEOUT) { - if (t == map->timeout) + if (t == set->timeout) /* Timeout was not specified, get stored one */ t = *timeout; ip_set_timeout_set(timeout, t); @@ -142,11 +139,11 @@ bitmap_ipmac_add_timeout(unsigned long *timeout, static inline int bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e, - struct bitmap_ipmac *map, u32 flags) + struct bitmap_ipmac *map, u32 flags, size_t dsize) { struct bitmap_ipmac_elem *elem; - elem = get_elem(map->extensions, e->id, map->dsize); + elem = get_elem(map->extensions, e->id, dsize); if (test_and_set_bit(e->id, map->members)) { if (elem->filled == MAC_FILLED) { if (e->ether && (flags & IPSET_FLAG_EXIST)) @@ -179,10 +176,11 @@ bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e, } static inline unsigned long -ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout) +ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout, + size_t dsize) { const struct bitmap_ipmac_elem *elem = - get_elem(map->extensions, id, map->dsize); + get_elem(map->extensions, id, dsize); return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) : *timeout; @@ -190,10 +188,10 @@ ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout) static inline int bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map, - u32 id) + u32 id, size_t dsize) { const struct bitmap_ipmac_elem *elem = - get_elem(map->extensions, id, map->dsize); + get_elem(map->extensions, id, dsize); return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip + id)) || @@ -216,7 +214,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ipmac_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip; /* MAC can be src only */ @@ -245,7 +243,7 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], const struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ipmac_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0; int ret = 0; @@ -285,7 +283,7 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b) return x->first_ip == y->first_ip && x->last_ip == y->last_ip && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -330,11 +328,11 @@ static bool init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, u32 first_ip, u32 last_ip, u32 elements) { - map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize); + map->members = ip_set_alloc((last_ip - first_ip + 1) * set->dsize); if (!map->members) return false; - if (map->dsize) { - map->extensions = ip_set_alloc(map->dsize * elements); + if (set->dsize) { + map->extensions = ip_set_alloc(set->dsize * elements); if (!map->extensions) { kfree(map->members); return false; @@ -343,7 +341,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, map->first_ip = first_ip; map->last_ip = last_ip; map->elements = elements; - map->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; set->data = map; set->family = NFPROTO_IPV4; @@ -404,10 +402,10 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipmacct_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_ipmacct_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipmacct_elem, timeout); - map->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipmacct_elem, counter); if (!init_map_ipmac(set, map, first_ip, last_ip, @@ -415,13 +413,13 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], kfree(map); return -ENOMEM; } - map->timeout = ip_set_timeout_uget( + set->timeout = ip_set_timeout_uget( tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); } else { - map->dsize = sizeof(struct bitmap_ipmacc_elem); - map->offset[IPSET_EXT_ID_COUNTER] = + set->dsize = sizeof(struct bitmap_ipmacc_elem); + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_ipmacc_elem, counter); if (!init_map_ipmac(set, map, first_ip, last_ip, @@ -431,19 +429,19 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], } } } else if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipmact_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_ipmact_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_ipmact_elem, timeout); if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { kfree(map); return -ENOMEM; } - map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); } else { - map->dsize = sizeof(struct bitmap_ipmac_elem); + set->dsize = sizeof(struct bitmap_ipmac_elem); if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { kfree(map); diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index bebc137a5737..71da31935499 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -38,9 +38,6 @@ struct bitmap_port { u16 last_port; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ size_t memsize; /* members size */ - size_t dsize; /* extensions struct size */ - size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ - u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ }; @@ -59,20 +56,20 @@ port_to_id(const struct bitmap_port *m, u16 port) static inline int bitmap_port_do_test(const struct bitmap_port_adt_elem *e, - const struct bitmap_port *map) + const struct bitmap_port *map, size_t dsize) { return !!test_bit(e->id, map->members); } static inline int -bitmap_port_gc_test(u16 id, const struct bitmap_port *map) +bitmap_port_gc_test(u16 id, const struct bitmap_port *map, size_t dsize) { return !!test_bit(id, map->members); } static inline int bitmap_port_do_add(const struct bitmap_port_adt_elem *e, - struct bitmap_port *map, u32 flags) + struct bitmap_port *map, u32 flags, size_t dsize) { return !!test_and_set_bit(e->id, map->members); } @@ -85,7 +82,8 @@ bitmap_port_do_del(const struct bitmap_port_adt_elem *e, } static inline int -bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id) +bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id, + size_t dsize) { return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port + id)); @@ -106,7 +104,7 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb, struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_port_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); __be16 __port; u16 port = 0; @@ -131,7 +129,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_port_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port; /* wraparound */ u16 port_to; int ret = 0; @@ -191,7 +189,7 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b) return x->first_port == y->first_port && x->last_port == y->last_port && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -230,8 +228,8 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, map->members = ip_set_alloc(map->memsize); if (!map->members) return false; - if (map->dsize) { - map->extensions = ip_set_alloc(map->dsize * map->elements); + if (set->dsize) { + map->extensions = ip_set_alloc(set->dsize * map->elements); if (!map->extensions) { kfree(map->members); return false; @@ -239,7 +237,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, } map->first_port = first_port; map->last_port = last_port; - map->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; set->data = map; set->family = NFPROTO_UNSPEC; @@ -281,23 +279,23 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_portct_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_portct_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_portct_elem, timeout); - map->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_portct_elem, counter); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); return -ENOMEM; } - map->timeout = + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_port_gc_init(set, bitmap_port_gc); } else { - map->dsize = sizeof(struct bitmap_portc_elem); - map->offset[IPSET_EXT_ID_COUNTER] = + set->dsize = sizeof(struct bitmap_portc_elem); + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct bitmap_portc_elem, counter); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); @@ -305,19 +303,19 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) } } } else if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_portt_elem); - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct bitmap_portt_elem); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct bitmap_portt_elem, timeout); if (!init_map_port(set, map, first_port, last_port)) { kfree(map); return -ENOMEM; } - map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; bitmap_port_gc_init(set, bitmap_port_gc); } else { - map->dsize = 0; + set->dsize = 0; if (!init_map_port(set, map, first_port, last_port)) { kfree(map); return -ENOMEM; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index e4db9250f337..0cb840e1f8ae 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -178,11 +178,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define NLEN(family) 0 #endif /* IP_SET_HASH_WITH_NETS */ -#define ext_timeout(e, h) \ -(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_TIMEOUT]) -#define ext_counter(e, h) \ -(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_EXT_ID_COUNTER]) - #endif /* _IP_SET_HASH_GEN_H */ /* Family dependent templates */ @@ -276,9 +271,6 @@ struct htype { u32 maxelem; /* max elements in the hash */ u32 elements; /* current element (vs timeout) */ u32 initval; /* random jhash init value */ - u32 timeout; /* timeout value, if enabled */ - size_t dsize; /* data struct size */ - size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ struct timer_list gc; /* garbage collection when timeout enabled */ struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_MULTI @@ -351,7 +343,7 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) /* Calculate the actual memory size of the set data */ static size_t mtype_ahash_memsize(const struct htype *h, const struct htable *t, - u8 nets_length) + u8 nets_length, size_t dsize) { u32 i; size_t memsize = sizeof(*h) @@ -362,7 +354,7 @@ mtype_ahash_memsize(const struct htype *h, const struct htable *t, + jhash_size(t->htable_bits) * sizeof(struct hbucket); for (i = 0; i < jhash_size(t->htable_bits); i++) - memsize += t->bucket[i].size * h->dsize; + memsize += t->bucket[i].size * dsize; return memsize; } @@ -417,10 +409,10 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) init_timer(&h->gc); h->gc.data = (unsigned long) set; h->gc.function = gc; - h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&h->gc); pr_debug("gc initialized, run in every %u\n", - IPSET_GC_PERIOD(h->timeout)); + IPSET_GC_PERIOD(set->timeout)); } static bool @@ -431,7 +423,7 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) /* Resizing changes htable_bits, so we ignore it */ return x->maxelem == y->maxelem && - x->timeout == y->timeout && + a->timeout == b->timeout && #ifdef IP_SET_HASH_WITH_NETMASK x->netmask == y->netmask && #endif @@ -444,7 +436,7 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) /* Delete expired elements from the hashtable */ static void -mtype_expire(struct htype *h, u8 nets_length, size_t dsize) +mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) { struct htable *t; struct hbucket *n; @@ -458,7 +450,7 @@ mtype_expire(struct htype *h, u8 nets_length, size_t dsize) n = hbucket(t, i); for (j = 0; j < n->pos; j++) { data = ahash_data(n, j, dsize); - if (ip_set_timeout_expired(ext_timeout(data, h))) { + if (ip_set_timeout_expired(ext_timeout(data, set))) { pr_debug("expired %u/%u\n", i, j); #ifdef IP_SET_HASH_WITH_NETS mtype_del_cidr(h, CIDR(data->cidr), @@ -497,10 +489,10 @@ mtype_gc(unsigned long ul_set) pr_debug("called\n"); write_lock_bh(&set->lock); - mtype_expire(h, NLEN(set->family), h->dsize); + mtype_expire(set, h, NLEN(set->family), set->dsize); write_unlock_bh(&set->lock); - h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&h->gc); } @@ -526,7 +518,7 @@ mtype_resize(struct ip_set *set, bool retried) if (SET_WITH_TIMEOUT(set) && !retried) { i = h->elements; write_lock_bh(&set->lock); - mtype_expire(set->data, NLEN(set->family), h->dsize); + mtype_expire(set, set->data, NLEN(set->family), set->dsize); write_unlock_bh(&set->lock); if (h->elements < i) return 0; @@ -553,13 +545,13 @@ retry: for (i = 0; i < jhash_size(orig->htable_bits); i++) { n = hbucket(orig, i); for (j = 0; j < n->pos; j++) { - data = ahash_data(n, j, h->dsize); + data = ahash_data(n, j, set->dsize); #ifdef IP_SET_HASH_WITH_NETS flags = 0; mtype_data_reset_flags(data, &flags); #endif m = hbucket(t, HKEY(data, h->initval, htable_bits)); - ret = hbucket_elem_add(m, AHASH_MAX(h), h->dsize); + ret = hbucket_elem_add(m, AHASH_MAX(h), set->dsize); if (ret < 0) { #ifdef IP_SET_HASH_WITH_NETS mtype_data_reset_flags(data, &flags); @@ -570,8 +562,8 @@ retry: goto retry; return ret; } - d = ahash_data(m, m->pos++, h->dsize); - memcpy(d, data, h->dsize); + d = ahash_data(m, m->pos++, set->dsize); + memcpy(d, data, set->dsize); #ifdef IP_SET_HASH_WITH_NETS mtype_data_reset_flags(d, &flags); #endif @@ -609,7 +601,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) /* FIXME: when set is full, we slow down here */ - mtype_expire(h, NLEN(set->family), h->dsize); + mtype_expire(set, h, NLEN(set->family), set->dsize); if (h->elements >= h->maxelem) { if (net_ratelimit()) @@ -623,11 +615,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, key = HKEY(value, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (mtype_data_equal(data, d, &multi)) { if (flag_exist || (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)))) { + ip_set_timeout_expired(ext_timeout(data, set)))) { /* Just the extensions could be overwritten */ j = i; goto reuse_slot; @@ -638,14 +630,14 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, } /* Reuse first timed out entry */ if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)) && + ip_set_timeout_expired(ext_timeout(data, set)) && j != AHASH_MAX(h) + 1) j = i; } reuse_slot: if (j != AHASH_MAX(h) + 1) { /* Fill out reused slot */ - data = ahash_data(n, j, h->dsize); + data = ahash_data(n, j, set->dsize); #ifdef IP_SET_HASH_WITH_NETS mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0); mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); @@ -653,13 +645,13 @@ reuse_slot: } else { /* Use/create a new slot */ TUNE_AHASH_MAX(h, multi); - ret = hbucket_elem_add(n, AHASH_MAX(h), h->dsize); + ret = hbucket_elem_add(n, AHASH_MAX(h), set->dsize); if (ret != 0) { if (ret == -EAGAIN) mtype_data_next(&h->next, d); goto out; } - data = ahash_data(n, n->pos++, h->dsize); + data = ahash_data(n, n->pos++, set->dsize); #ifdef IP_SET_HASH_WITH_NETS mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif @@ -670,9 +662,9 @@ reuse_slot: mtype_data_set_flags(data, flags); #endif if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(data, h), ext->timeout); + ip_set_timeout_set(ext_timeout(data, set), ext->timeout); if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(data, h), ext); + ip_set_init_counter(ext_counter(data, set), ext); out: rcu_read_unlock_bh(); @@ -699,16 +691,16 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, key = HKEY(value, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (!mtype_data_equal(data, d, &multi)) continue; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h))) + ip_set_timeout_expired(ext_timeout(data, set))) goto out; if (i != n->pos - 1) /* Not last one */ - memcpy(data, ahash_data(n, n->pos - 1, h->dsize), - h->dsize); + memcpy(data, ahash_data(n, n->pos - 1, set->dsize), + set->dsize); n->pos--; h->elements--; @@ -717,14 +709,14 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, #endif if (n->pos + AHASH_INIT_SIZE < n->size) { void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) - * h->dsize, + * set->dsize, GFP_ATOMIC); if (!tmp) { ret = 0; goto out; } n->size -= AHASH_INIT_SIZE; - memcpy(tmp, n->value, n->size * h->dsize); + memcpy(tmp, n->value, n->size * set->dsize); kfree(n->value); n->value = tmp; } @@ -742,8 +734,7 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, struct ip_set_ext *mext, struct ip_set *set, u32 flags) { if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(data, - (struct htype *)(set->data)), + ip_set_update_counter(ext_counter(data, set), ext, mext, flags); return mtype_do_data_match(data); } @@ -770,12 +761,12 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (!mtype_data_equal(data, d, &multi)) continue; if (SET_WITH_TIMEOUT(set)) { if (!ip_set_timeout_expired( - ext_timeout(data, h))) + ext_timeout(data, set))) return mtype_data_match(data, ext, mext, set, flags); @@ -818,10 +809,10 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (mtype_data_equal(data, d, &multi) && !(SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)))) { + ip_set_timeout_expired(ext_timeout(data, set)))) { ret = mtype_data_match(data, ext, mext, set, flags); goto out; } @@ -841,7 +832,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) size_t memsize; t = rcu_dereference_bh_nfnl(h->table); - memsize = mtype_ahash_memsize(h, t, NLEN(set->family)); + memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) @@ -858,7 +849,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || ((set->extensions & IPSET_EXT_TIMEOUT) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout))) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) || ((set->extensions & IPSET_EXT_COUNTER) && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(IPSET_FLAG_WITH_COUNTERS)))) @@ -894,9 +885,9 @@ mtype_list(const struct ip_set *set, n = hbucket(t, cb->args[2]); pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n); for (i = 0; i < n->pos; i++) { - e = ahash_data(n, i, h->dsize); + e = ahash_data(n, i, set->dsize); if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, h))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; pr_debug("list hash %lu hbucket %p i %u, data %p\n", cb->args[2], n, i, e); @@ -913,10 +904,10 @@ mtype_list(const struct ip_set *set, if (SET_WITH_TIMEOUT(set) && nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(ip_set_timeout_get( - ext_timeout(e, h))))) + ext_timeout(e, set))))) goto nla_put_failure; if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, h))) + ip_set_put_counter(skb, ext_counter(e, set))) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -1026,7 +1017,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) h->netmask = netmask; #endif get_random_bytes(&h->initval, sizeof(h->initval)); - h->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; hbits = htable_bits(hashsize); hsize = htable_size(hbits); @@ -1053,30 +1044,30 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { set->extensions |= IPSET_EXT_COUNTER; if (tb[IPSET_ATTR_TIMEOUT]) { - h->timeout = + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; if (set->family == NFPROTO_IPV4) { - h->dsize = sizeof(struct + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4ct_elem)); - h->offset[IPSET_EXT_ID_TIMEOUT] = + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 4ct_elem), timeout); - h->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 4ct_elem), counter); IPSET_TOKEN(HTYPE, 4_gc_init)(set, IPSET_TOKEN(HTYPE, 4_gc)); } else { - h->dsize = sizeof(struct + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6ct_elem)); - h->offset[IPSET_EXT_ID_TIMEOUT] = + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 6ct_elem), timeout); - h->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 6ct_elem), counter); @@ -1085,36 +1076,36 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) } } else { if (set->family == NFPROTO_IPV4) { - h->dsize = + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4c_elem)); - h->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 4c_elem), counter); } else { - h->dsize = + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6c_elem)); - h->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct IPSET_TOKEN(HTYPE, 6c_elem), counter); } } } else if (tb[IPSET_ATTR_TIMEOUT]) { - h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->extensions |= IPSET_EXT_TIMEOUT; if (set->family == NFPROTO_IPV4) { - h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem)); - h->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem)); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem), timeout); IPSET_TOKEN(HTYPE, 4_gc_init)(set, IPSET_TOKEN(HTYPE, 4_gc)); } else { - h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem)); - h->offset[IPSET_EXT_ID_TIMEOUT] = + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem)); + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem), timeout); IPSET_TOKEN(HTYPE, 6_gc_init)(set, @@ -1122,9 +1113,9 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) } } else { if (set->family == NFPROTO_IPV4) - h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)); + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)); else - h->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)); + set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)); } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 260c9a80d8a5..bbde7c304622 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -99,7 +99,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); __be32 ip; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip); @@ -118,7 +118,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, hosts; int ret = 0; @@ -253,7 +253,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); hash_ip6_netmask(&e.ip, h->netmask); @@ -270,7 +270,7 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (unlikely(!tb[IPSET_ATTR_IP] || diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 64caad35a391..dd175d6f3965 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -116,10 +116,9 @@ hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -136,7 +135,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip, ip_to = 0, p = 0, port, port_to; bool with_ports = false; int ret; @@ -306,10 +305,9 @@ hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -326,7 +324,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 2873bbc20d7a..87a2cfab2568 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -120,10 +120,9 @@ hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -141,7 +140,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip, ip_to = 0, p = 0, port, port_to; bool with_ports = false; int ret; @@ -319,10 +318,9 @@ hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -340,7 +338,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 6ce5a8e2c44e..0b9a28d7c740 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -172,7 +172,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_ipportnet4_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -195,7 +195,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; bool with_ports = false; @@ -306,9 +306,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], : port; for (; p <= port_to; p++) { e.port = htons(p); - ip2 = retried - && ip == ntohl(h->next.ip) - && p == ntohs(h->next.port) + ip2 = retried && + ip == ntohl(h->next.ip) && + p == ntohs(h->next.port) ? ntohl(h->next.ip2) : ip2_from; while (!after(ip2, ip2_to)) { e.ip2 = htonl(ip2); @@ -456,7 +456,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_ipportnet6_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -479,7 +479,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; u8 cidr; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index ec1c7dc10489..1d4caa50dacb 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -145,7 +145,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_net4_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (e.cidr == 0) return -EINVAL; @@ -165,7 +165,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, last; int ret; @@ -340,7 +340,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_net6_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (e.cidr == 0) return -EINVAL; @@ -357,10 +357,9 @@ static int hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { .cidr = HOST_MASK }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (unlikely(!tb[IPSET_ATTR_IP] || diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 814b4e31a7f8..2f0ffe35c408 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -268,7 +268,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); int ret; if (e.cidr == 0) @@ -319,7 +319,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, last; char iface[IFNAMSIZ]; int ret; @@ -537,7 +537,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); int ret; if (e.cidr == 0) @@ -584,7 +584,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); char iface[IFNAMSIZ]; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 3bd923d3f179..cab236625f97 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -164,7 +164,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netport4_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -186,7 +186,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to, p = 0, ip = 0, ip_to = 0, last; bool with_ports = false; u8 cidr; @@ -409,7 +409,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netport6_elem e = { .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -431,7 +431,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; u8 cidr; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 0ed19b5e675e..f22d05d6e366 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -58,24 +58,13 @@ struct set_adt_elem { /* Type structure */ struct list_set { - size_t dsize; /* element size */ - size_t offset[IPSET_EXT_ID_MAX]; /* Offsets to extensions */ u32 size; /* size of set list array */ - u32 timeout; /* timeout value */ struct timer_list gc; /* garbage collection */ struct set_elem members[0]; /* the set members */ }; -static inline struct set_elem * -list_set_elem(const struct list_set *map, u32 id) -{ - return (struct set_elem *)((void *)map->members + id * map->dsize); -} - -#define ext_timeout(e, m) \ -(unsigned long *)((void *)(e) + (m)->offset[IPSET_EXT_ID_TIMEOUT]) -#define ext_counter(e, m) \ -(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_EXT_ID_COUNTER]) +#define list_set_elem(set, map, id) \ + (struct set_elem *)((void *)(map)->members + (id) * (set)->dsize) static int list_set_ktest(struct ip_set *set, const struct sk_buff *skb, @@ -92,16 +81,16 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE) opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_test(e->id, skb, par, opt); if (ret > 0) { if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(e, map), + ip_set_update_counter(ext_counter(e, set), ext, &opt->ext, cmdflags); return ret; @@ -121,11 +110,11 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb, int ret; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_add(e->id, skb, par, opt); if (ret == 0) @@ -145,11 +134,11 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb, int ret; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_del(e->id, skb, par, opt); if (ret == 0) @@ -163,8 +152,7 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - struct list_set *map = set->data; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); switch (adt) { case IPSET_TEST: @@ -188,10 +176,10 @@ id_eq(const struct ip_set *set, u32 i, ip_set_id_t id) if (i >= map->size) return 0; - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); return !!(e->id == id && !(SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map)))); + ip_set_timeout_expired(ext_timeout(e, set)))); } static int @@ -199,28 +187,29 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, const struct ip_set_ext *ext) { struct list_set *map = set->data; - struct set_elem *e = list_set_elem(map, i); + struct set_elem *e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { if (i == map->size - 1) /* Last element replaced: e.g. add new,before,last */ ip_set_put_byindex(e->id); else { - struct set_elem *x = list_set_elem(map, map->size - 1); + struct set_elem *x = list_set_elem(set, map, + map->size - 1); /* Last element pushed off */ if (x->id != IPSET_INVALID_ID) ip_set_put_byindex(x->id); - memmove(list_set_elem(map, i + 1), e, - map->dsize * (map->size - (i + 1))); + memmove(list_set_elem(set, map, i + 1), e, + set->dsize * (map->size - (i + 1))); } } e->id = d->id; if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(e, map), ext->timeout); + ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(e, map), ext); + ip_set_init_counter(ext_counter(e, set), ext); return 0; } @@ -228,16 +217,16 @@ static int list_set_del(struct ip_set *set, u32 i) { struct list_set *map = set->data; - struct set_elem *e = list_set_elem(map, i); + struct set_elem *e = list_set_elem(set, map, i); ip_set_put_byindex(e->id); if (i < map->size - 1) - memmove(e, list_set_elem(map, i + 1), - map->dsize * (map->size - (i + 1))); + memmove(e, list_set_elem(set, map, i + 1), + set->dsize * (map->size - (i + 1))); /* Last element */ - e = list_set_elem(map, map->size - 1); + e = list_set_elem(set, map, map->size - 1); e->id = IPSET_INVALID_ID; return 0; } @@ -250,9 +239,9 @@ set_cleanup_entries(struct ip_set *set) u32 i; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) list_set_del(set, i); } } @@ -268,11 +257,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, int ret; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; else if (e->id != d->id) continue; @@ -301,11 +290,11 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* Check already added element */ for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto insert; else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; else if (e->id != d->id) continue; @@ -320,9 +309,9 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, return -IPSET_ERR_EXIST; /* Update extensions */ if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(e, map), ext->timeout); + ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(e, map), ext); + ip_set_init_counter(ext_counter(e, set), ext); /* Set is already added to the list */ ip_set_put_byindex(d->id); return 0; @@ -330,7 +319,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, insert: ret = -IPSET_ERR_LIST_FULL; for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) ret = d->before != 0 ? -IPSET_ERR_REF_EXIST : list_set_add(set, i, d, ext); @@ -355,12 +344,12 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, u32 i; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return d->before != 0 ? -IPSET_ERR_REF_EXIST : -IPSET_ERR_EXIST; else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; else if (e->id != d->id) continue; @@ -383,10 +372,9 @@ static int list_set_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - struct list_set *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct set_adt_elem e = { .refid = IPSET_INVALID_ID }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); struct ip_set *s; int ret = 0; @@ -454,7 +442,7 @@ list_set_flush(struct ip_set *set) u32 i; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { ip_set_put_byindex(e->id); e->id = IPSET_INVALID_ID; @@ -486,13 +474,13 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) || + nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) || (SET_WITH_COUNTER(set) && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(IPSET_FLAG_WITH_COUNTERS))) || nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, - htonl(sizeof(*map) + map->size * map->dsize))) + htonl(sizeof(*map) + map->size * set->dsize))) goto nla_put_failure; ipset_nest_end(skb, nested); @@ -515,11 +503,11 @@ list_set_list(const struct ip_set *set, return -EMSGSIZE; for (; cb->args[2] < map->size; cb->args[2]++) { i = cb->args[2]; - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto finish; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { @@ -535,10 +523,10 @@ list_set_list(const struct ip_set *set, if (SET_WITH_TIMEOUT(set) && nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(ip_set_timeout_get( - ext_timeout(e, map))))) + ext_timeout(e, set))))) goto nla_put_failure; if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, map))) + ip_set_put_counter(skb, ext_counter(e, set))) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -565,7 +553,7 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b) const struct list_set *y = b->data; return x->size == y->size && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -594,7 +582,7 @@ list_set_gc(unsigned long ul_set) set_cleanup_entries(set); write_unlock_bh(&set->lock); - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } @@ -606,7 +594,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) init_timer(&map->gc); map->gc.data = (unsigned long) set; map->gc.function = gc; - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } @@ -625,12 +613,12 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize, return NULL; map->size = size; - map->dsize = dsize; - map->timeout = timeout; + set->dsize = dsize; + set->timeout = timeout; set->data = map; for (i = 0; i < size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); e->id = IPSET_INVALID_ID; } @@ -667,9 +655,9 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (!map) return -ENOMEM; set->extensions |= IPSET_EXT_TIMEOUT; - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct setct_elem, timeout); - map->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct setct_elem, counter); list_set_gc_init(set, list_set_gc); } else { @@ -677,7 +665,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) sizeof(struct setc_elem), 0); if (!map) return -ENOMEM; - map->offset[IPSET_EXT_ID_COUNTER] = + set->offset[IPSET_EXT_ID_COUNTER] = offsetof(struct setc_elem, counter); } } else if (tb[IPSET_ATTR_TIMEOUT]) { @@ -686,7 +674,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (!map) return -ENOMEM; set->extensions |= IPSET_EXT_TIMEOUT; - map->offset[IPSET_EXT_ID_TIMEOUT] = + set->offset[IPSET_EXT_ID_TIMEOUT] = offsetof(struct sett_elem, timeout); list_set_gc_init(set, list_set_gc); } else { -- cgit v1.2.3 From 03c8b234e61a9a3aab8d970b3bf681934ecfe443 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 7 Sep 2013 00:43:52 +0200 Subject: netfilter: ipset: Generalize extensions support Get rid of the structure based extensions and introduce a blob for the extensions. Thus we can support more extension types easily. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_ip.c | 81 +++---------------------- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 91 +++-------------------------- net/netfilter/ipset/ip_set_bitmap_port.c | 71 ++-------------------- net/netfilter/ipset/ip_set_core.c | 46 +++++++++++++++ net/netfilter/ipset/ip_set_hash_gen.h | 86 ++++----------------------- net/netfilter/ipset/ip_set_hash_ip.c | 36 +----------- net/netfilter/ipset/ip_set_hash_ipport.c | 54 +---------------- net/netfilter/ipset/ip_set_hash_ipportip.c | 60 +------------------ net/netfilter/ipset/ip_set_hash_ipportnet.c | 66 +-------------------- net/netfilter/ipset/ip_set_hash_net.c | 54 +---------------- net/netfilter/ipset/ip_set_hash_netiface.c | 68 +-------------------- net/netfilter/ipset/ip_set_hash_netport.c | 60 +------------------ net/netfilter/ipset/ip_set_list_set.c | 81 ++++--------------------- 13 files changed, 105 insertions(+), 749 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 363022edb8fb..94d985457c51 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -208,25 +208,6 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b) struct bitmap_ip_elem { }; -/* Timeout variant */ - -struct bitmap_ipt_elem { - unsigned long timeout; -}; - -/* Plain variant with counter */ - -struct bitmap_ipc_elem { - struct ip_set_counter counter; -}; - -/* Timeout variant with counter */ - -struct bitmap_ipct_elem { - unsigned long timeout; - struct ip_set_counter counter; -}; - #include "ip_set_bitmap_gen.h" /* Create bitmap:ip type of sets */ @@ -263,7 +244,7 @@ static int bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { struct bitmap_ip *map; - u32 first_ip = 0, last_ip = 0, hosts, cadt_flags = 0; + u32 first_ip = 0, last_ip = 0, hosts; u64 elements; u8 netmask = 32; int ret; @@ -335,61 +316,15 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) map->memsize = bitmap_bytes(0, elements - 1); set->variant = &bitmap_ip; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_ipct_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_ipct_elem, timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_ipct_elem, counter); - - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } - - set->timeout = ip_set_timeout_uget( - tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - - bitmap_ip_gc_init(set, bitmap_ip_gc); - } else { - set->dsize = sizeof(struct bitmap_ipc_elem); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_ipc_elem, counter); - - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_ipt_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_ipt_elem, timeout); - - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } - + set->dsize = ip_set_elem_len(set, tb, 0); + if (!init_map_ip(set, map, first_ip, last_ip, + elements, hosts, netmask)) { + kfree(map); + return -ENOMEM; + } + if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - bitmap_ip_gc_init(set, bitmap_ip_gc); - } else { - set->dsize = 0; - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } } return 0; } diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 74576cb19264..654a97bedfe9 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -289,37 +289,6 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b) /* Plain variant */ -/* Timeout variant */ - -struct bitmap_ipmact_elem { - struct { - unsigned char ether[ETH_ALEN]; - unsigned char filled; - } __attribute__ ((aligned)); - unsigned long timeout; -}; - -/* Plain variant with counter */ - -struct bitmap_ipmacc_elem { - struct { - unsigned char ether[ETH_ALEN]; - unsigned char filled; - } __attribute__ ((aligned)); - struct ip_set_counter counter; -}; - -/* Timeout variant with counter */ - -struct bitmap_ipmacct_elem { - struct { - unsigned char ether[ETH_ALEN]; - unsigned char filled; - } __attribute__ ((aligned)); - unsigned long timeout; - struct ip_set_counter counter; -}; - #include "ip_set_bitmap_gen.h" /* Create bitmap:ip,mac type of sets */ @@ -328,7 +297,7 @@ static bool init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, u32 first_ip, u32 last_ip, u32 elements) { - map->members = ip_set_alloc((last_ip - first_ip + 1) * set->dsize); + map->members = ip_set_alloc(map->memsize); if (!map->members) return false; if (set->dsize) { @@ -353,7 +322,7 @@ static int bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { - u32 first_ip = 0, last_ip = 0, cadt_flags = 0; + u32 first_ip = 0, last_ip = 0; u64 elements; struct bitmap_ipmac *map; int ret; @@ -397,57 +366,15 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], map->memsize = bitmap_bytes(0, elements - 1); set->variant = &bitmap_ipmac; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_ipmacct_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_ipmacct_elem, timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_ipmacct_elem, counter); - - if (!init_map_ipmac(set, map, first_ip, last_ip, - elements)) { - kfree(map); - return -ENOMEM; - } - set->timeout = ip_set_timeout_uget( - tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); - } else { - set->dsize = sizeof(struct bitmap_ipmacc_elem); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_ipmacc_elem, counter); - - if (!init_map_ipmac(set, map, first_ip, last_ip, - elements)) { - kfree(map); - return -ENOMEM; - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_ipmact_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_ipmact_elem, timeout); - - if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { - kfree(map); - return -ENOMEM; - } + set->dsize = ip_set_elem_len(set, tb, + sizeof(struct bitmap_ipmac_elem)); + if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { + kfree(map); + return -ENOMEM; + } + if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); - } else { - set->dsize = sizeof(struct bitmap_ipmac_elem); - - if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { - kfree(map); - return -ENOMEM; - } - set->variant = &bitmap_ipmac; } return 0; } diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 71da31935499..1ef2f3186b80 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -198,25 +198,6 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b) struct bitmap_port_elem { }; -/* Timeout variant */ - -struct bitmap_portt_elem { - unsigned long timeout; -}; - -/* Plain variant with counter */ - -struct bitmap_portc_elem { - struct ip_set_counter counter; -}; - -/* Timeout variant with counter */ - -struct bitmap_portct_elem { - unsigned long timeout; - struct ip_set_counter counter; -}; - #include "ip_set_bitmap_gen.h" /* Create bitmap:ip type of sets */ @@ -250,7 +231,6 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { struct bitmap_port *map; u16 first_port, last_port; - u32 cadt_flags = 0; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) || @@ -274,53 +254,14 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) map->elements = last_port - first_port + 1; map->memsize = map->elements * sizeof(unsigned long); set->variant = &bitmap_port; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_portct_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_portct_elem, timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_portct_elem, counter); - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - - set->timeout = - ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - bitmap_port_gc_init(set, bitmap_port_gc); - } else { - set->dsize = sizeof(struct bitmap_portc_elem); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct bitmap_portc_elem, counter); - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - set->dsize = sizeof(struct bitmap_portt_elem); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct bitmap_portt_elem, timeout); - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - + set->dsize = ip_set_elem_len(set, tb, 0); + if (!init_map_port(set, map, first_port, last_port)) { + kfree(map); + return -ENOMEM; + } + if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; bitmap_port_gc_init(set, bitmap_port_gc); - } else { - set->dsize = 0; - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - } return 0; } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 428c30a8586f..f35afed3814f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -315,6 +315,52 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); +/* ipset data extension types, in size order */ + +const struct ip_set_ext_type ip_set_extensions[] = { + [IPSET_EXT_ID_COUNTER] = { + .type = IPSET_EXT_COUNTER, + .flag = IPSET_FLAG_WITH_COUNTERS, + .len = sizeof(struct ip_set_counter), + .align = __alignof__(struct ip_set_counter), + }, + [IPSET_EXT_ID_TIMEOUT] = { + .type = IPSET_EXT_TIMEOUT, + .len = sizeof(unsigned long), + .align = __alignof__(unsigned long), + }, +}; +EXPORT_SYMBOL_GPL(ip_set_extensions); + +static inline bool +add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[]) +{ + return ip_set_extensions[id].flag ? + (flags & ip_set_extensions[id].flag) : + !!tb[IPSET_ATTR_TIMEOUT]; +} + +size_t +ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len) +{ + enum ip_set_ext_id id; + size_t offset = 0; + u32 cadt_flags = 0; + + if (tb[IPSET_ATTR_CADT_FLAGS]) + cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + for (id = 0; id < IPSET_EXT_ID_MAX; id++) { + if (!add_extension(id, cadt_flags, tb)) + continue; + offset += ALIGN(len + offset, ip_set_extensions[id].align); + set->offset[id] = offset; + set->extensions |= ip_set_extensions[id].type; + offset += ip_set_extensions[id].len; + } + return len + offset; +} +EXPORT_SYMBOL_GPL(ip_set_elem_len); + int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 0cb840e1f8ae..3999f1719f69 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -960,7 +960,6 @@ static int IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) { u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; - u32 cadt_flags = 0; u8 hbits; #ifdef IP_SET_HASH_WITH_NETMASK u8 netmask; @@ -1034,88 +1033,23 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) rcu_assign_pointer(h->table, t); set->data = h; - if (set->family == NFPROTO_IPV4) + if (set->family == NFPROTO_IPV4) { set->variant = &IPSET_TOKEN(HTYPE, 4_variant); - else + set->dsize = ip_set_elem_len(set, tb, + sizeof(struct IPSET_TOKEN(HTYPE, 4_elem))); + } else { set->variant = &IPSET_TOKEN(HTYPE, 6_variant); - - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - set->timeout = - ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - if (set->family == NFPROTO_IPV4) { - set->dsize = sizeof(struct - IPSET_TOKEN(HTYPE, 4ct_elem)); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct - IPSET_TOKEN(HTYPE, 4ct_elem), - timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct - IPSET_TOKEN(HTYPE, 4ct_elem), - counter); - IPSET_TOKEN(HTYPE, 4_gc_init)(set, - IPSET_TOKEN(HTYPE, 4_gc)); - } else { - set->dsize = sizeof(struct - IPSET_TOKEN(HTYPE, 6ct_elem)); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct - IPSET_TOKEN(HTYPE, 6ct_elem), - timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct - IPSET_TOKEN(HTYPE, 6ct_elem), - counter); - IPSET_TOKEN(HTYPE, 6_gc_init)(set, - IPSET_TOKEN(HTYPE, 6_gc)); - } - } else { - if (set->family == NFPROTO_IPV4) { - set->dsize = - sizeof(struct - IPSET_TOKEN(HTYPE, 4c_elem)); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct - IPSET_TOKEN(HTYPE, 4c_elem), - counter); - } else { - set->dsize = - sizeof(struct - IPSET_TOKEN(HTYPE, 6c_elem)); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct - IPSET_TOKEN(HTYPE, 6c_elem), - counter); - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { + set->dsize = ip_set_elem_len(set, tb, + sizeof(struct IPSET_TOKEN(HTYPE, 6_elem))); + } + if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - if (set->family == NFPROTO_IPV4) { - set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4t_elem)); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct IPSET_TOKEN(HTYPE, 4t_elem), - timeout); + if (set->family == NFPROTO_IPV4) IPSET_TOKEN(HTYPE, 4_gc_init)(set, IPSET_TOKEN(HTYPE, 4_gc)); - } else { - set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6t_elem)); - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct IPSET_TOKEN(HTYPE, 6t_elem), - timeout); + else IPSET_TOKEN(HTYPE, 6_gc_init)(set, IPSET_TOKEN(HTYPE, 6_gc)); - } - } else { - if (set->family == NFPROTO_IPV4) - set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)); - else - set->dsize = sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)); } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index bbde7c304622..a111ffe40b46 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -35,7 +35,7 @@ MODULE_ALIAS("ip_set_hash:ip"); #define HTYPE hash_ip #define IP_SET_HASH_WITH_NETMASK -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ip4_elem { @@ -43,22 +43,6 @@ struct hash_ip4_elem { __be32 ip; }; -struct hash_ip4t_elem { - __be32 ip; - unsigned long timeout; -}; - -struct hash_ip4c_elem { - __be32 ip; - struct ip_set_counter counter; -}; - -struct hash_ip4ct_elem { - __be32 ip; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -178,29 +162,13 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ /* Member elements */ struct hash_ip6_elem { union nf_inet_addr ip; }; -struct hash_ip6t_elem { - union nf_inet_addr ip; - unsigned long timeout; -}; - -struct hash_ip6c_elem { - union nf_inet_addr ip; - struct ip_set_counter counter; -}; - -struct hash_ip6ct_elem { - union nf_inet_addr ip; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index dd175d6f3965..5dc735c4dac2 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -36,7 +36,7 @@ MODULE_ALIAS("ip_set_hash:ip,port"); /* Type specific function prefix */ #define HTYPE hash_ipport -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ipport4_elem { @@ -46,31 +46,6 @@ struct hash_ipport4_elem { u8 padding; }; -struct hash_ipport4t_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipport4c_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipport4ct_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -221,7 +196,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_ipport6_elem { union nf_inet_addr ip; @@ -230,31 +205,6 @@ struct hash_ipport6_elem { u8 padding; }; -struct hash_ipport6t_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipport6c_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipport6ct_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 87a2cfab2568..8c43dc7811cb 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -36,7 +36,7 @@ MODULE_ALIAS("ip_set_hash:ip,port,ip"); /* Type specific function prefix */ #define HTYPE hash_ipportip -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ipportip4_elem { @@ -47,34 +47,6 @@ struct hash_ipportip4_elem { u8 padding; }; -struct hash_ipportip4t_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipportip4c_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipportip4ct_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - static inline bool hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1, const struct hash_ipportip4_elem *ip2, @@ -230,7 +202,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_ipportip6_elem { union nf_inet_addr ip; @@ -240,34 +212,6 @@ struct hash_ipportip6_elem { u8 padding; }; -struct hash_ipportip6t_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipportip6c_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipportip6ct_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 0b9a28d7c740..34890452366c 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -46,7 +46,7 @@ MODULE_ALIAS("ip_set_hash:ip,port,net"); #define IP_SET_HASH_WITH_PROTO #define IP_SET_HASH_WITH_NETS -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ipportnet4_elem { @@ -58,37 +58,6 @@ struct hash_ipportnet4_elem { u8 proto; }; -struct hash_ipportnet4t_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - unsigned long timeout; -}; - -struct hash_ipportnet4c_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; -}; - -struct hash_ipportnet4ct_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -328,7 +297,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_ipportnet6_elem { union nf_inet_addr ip; @@ -339,37 +308,6 @@ struct hash_ipportnet6_elem { u8 proto; }; -struct hash_ipportnet6t_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - unsigned long timeout; -}; - -struct hash_ipportnet6c_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; -}; - -struct hash_ipportnet6ct_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 1d4caa50dacb..d5598557f4a9 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -36,7 +36,7 @@ MODULE_ALIAS("ip_set_hash:net"); #define HTYPE hash_net #define IP_SET_HASH_WITH_NETS -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_net4_elem { @@ -46,31 +46,6 @@ struct hash_net4_elem { u8 cidr; }; -struct hash_net4t_elem { - __be32 ip; - u16 padding0; - u8 nomatch; - u8 cidr; - unsigned long timeout; -}; - -struct hash_net4c_elem { - __be32 ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; -}; - -struct hash_net4ct_elem { - __be32 ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -228,7 +203,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_net6_elem { union nf_inet_addr ip; @@ -237,31 +212,6 @@ struct hash_net6_elem { u8 cidr; }; -struct hash_net6t_elem { - union nf_inet_addr ip; - u16 padding0; - u8 nomatch; - u8 cidr; - unsigned long timeout; -}; - -struct hash_net6c_elem { - union nf_inet_addr ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; -}; - -struct hash_net6ct_elem { - union nf_inet_addr ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 2f0ffe35c408..26703e9e5082 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -134,7 +134,7 @@ iface_add(struct rb_root *root, const char **iface) #define STREQ(a, b) (strcmp(a, b) == 0) -/* IPv4 variants */ +/* IPv4 variant */ struct hash_netiface4_elem_hashed { __be32 ip; @@ -144,7 +144,7 @@ struct hash_netiface4_elem_hashed { u8 elem; }; -/* Member elements without timeout */ +/* Member elements */ struct hash_netiface4_elem { __be32 ip; u8 physdev; @@ -154,37 +154,6 @@ struct hash_netiface4_elem { const char *iface; }; -struct hash_netiface4t_elem { - __be32 ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - unsigned long timeout; -}; - -struct hash_netiface4c_elem { - __be32 ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; -}; - -struct hash_netiface4ct_elem { - __be32 ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -399,7 +368,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_netiface6_elem_hashed { union nf_inet_addr ip; @@ -418,37 +387,6 @@ struct hash_netiface6_elem { const char *iface; }; -struct hash_netiface6t_elem { - union nf_inet_addr ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - unsigned long timeout; -}; - -struct hash_netiface6c_elem { - union nf_inet_addr ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; -}; - -struct hash_netiface6ct_elem { - union nf_inet_addr ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index cab236625f97..45b6e91b0636 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -45,7 +45,7 @@ MODULE_ALIAS("ip_set_hash:net,port"); */ #define IP_SET_HASH_WITH_NETS_PACKED -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_netport4_elem { @@ -56,34 +56,6 @@ struct hash_netport4_elem { u8 nomatch:1; }; -struct hash_netport4t_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - unsigned long timeout; -}; - -struct hash_netport4c_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; -}; - -struct hash_netport4ct_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -287,7 +259,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_netport6_elem { union nf_inet_addr ip; @@ -297,34 +269,6 @@ struct hash_netport6_elem { u8 nomatch:1; }; -struct hash_netport6t_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - unsigned long timeout; -}; - -struct hash_netport6c_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; -}; - -struct hash_netport6ct_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index f22d05d6e366..7fd11c79aff4 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -28,28 +28,6 @@ struct set_elem { ip_set_id_t id; }; -struct sett_elem { - struct { - ip_set_id_t id; - } __attribute__ ((aligned)); - unsigned long timeout; -}; - -struct setc_elem { - struct { - ip_set_id_t id; - } __attribute__ ((aligned)); - struct ip_set_counter counter; -}; - -struct setct_elem { - struct { - ip_set_id_t id; - } __attribute__ ((aligned)); - struct ip_set_counter counter; - unsigned long timeout; -}; - struct set_adt_elem { ip_set_id_t id; ip_set_id_t refid; @@ -600,21 +578,18 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) /* Create list:set type of sets */ -static struct list_set * -init_list_set(struct ip_set *set, u32 size, size_t dsize, - unsigned long timeout) +static bool +init_list_set(struct ip_set *set, u32 size) { struct list_set *map; struct set_elem *e; u32 i; - map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL); + map = kzalloc(sizeof(*map) + size * set->dsize, GFP_KERNEL); if (!map) - return NULL; + return false; map->size = size; - set->dsize = dsize; - set->timeout = timeout; set->data = map; for (i = 0; i < size; i++) { @@ -622,15 +597,13 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize, e->id = IPSET_INVALID_ID; } - return map; + return true; } static int list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) { - struct list_set *map; - u32 size = IP_SET_LIST_DEFAULT_SIZE, cadt_flags = 0; - unsigned long timeout = 0; + u32 size = IP_SET_LIST_DEFAULT_SIZE; if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || @@ -642,45 +615,13 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (size < IP_SET_LIST_MIN_SIZE) size = IP_SET_LIST_MIN_SIZE; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (tb[IPSET_ATTR_TIMEOUT]) - timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->variant = &set_variant; - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - map = init_list_set(set, size, - sizeof(struct setct_elem), timeout); - if (!map) - return -ENOMEM; - set->extensions |= IPSET_EXT_TIMEOUT; - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct setct_elem, timeout); - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct setct_elem, counter); - list_set_gc_init(set, list_set_gc); - } else { - map = init_list_set(set, size, - sizeof(struct setc_elem), 0); - if (!map) - return -ENOMEM; - set->offset[IPSET_EXT_ID_COUNTER] = - offsetof(struct setc_elem, counter); - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - map = init_list_set(set, size, - sizeof(struct sett_elem), timeout); - if (!map) - return -ENOMEM; - set->extensions |= IPSET_EXT_TIMEOUT; - set->offset[IPSET_EXT_ID_TIMEOUT] = - offsetof(struct sett_elem, timeout); + set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem)); + if (!init_list_set(set, size)) + return -ENOMEM; + if (tb[IPSET_ATTR_TIMEOUT]) { + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); list_set_gc_init(set, list_set_gc); - } else { - map = init_list_set(set, size, sizeof(struct set_elem), 0); - if (!map) - return -ENOMEM; } return 0; } -- cgit v1.2.3 From 40cd63bf33b21ef4d43776b1d49c605f876fe32c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 9 Sep 2013 14:44:29 +0200 Subject: netfilter: ipset: Support extensions which need a per data destroy function Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_gen.h | 38 ++++++++++++++---- net/netfilter/ipset/ip_set_hash_gen.h | 71 +++++++++++++++++++++------------ net/netfilter/ipset/ip_set_list_set.c | 19 ++++++--- 3 files changed, 90 insertions(+), 38 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index f32ddbca3923..4515fe8b83dd 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -12,6 +12,7 @@ #define mtype_gc_test IPSET_TOKEN(MTYPE, _gc_test) #define mtype_is_filled IPSET_TOKEN(MTYPE, _is_filled) #define mtype_do_add IPSET_TOKEN(MTYPE, _do_add) +#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) #define mtype_do_del IPSET_TOKEN(MTYPE, _do_del) #define mtype_do_list IPSET_TOKEN(MTYPE, _do_list) #define mtype_do_head IPSET_TOKEN(MTYPE, _do_head) @@ -46,6 +47,17 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) add_timer(&map->gc); } +static void +mtype_ext_cleanup(struct ip_set *set) +{ + struct mtype *map = set->data; + u32 id; + + for (id = 0; id < map->elements; id++) + if (test_bit(id, map->members)) + ip_set_ext_destroy(set, get_ext(set, map, id)); +} + static void mtype_destroy(struct ip_set *set) { @@ -55,8 +67,11 @@ mtype_destroy(struct ip_set *set) del_timer_sync(&map->gc); ip_set_free(map->members); - if (set->dsize) + if (set->dsize) { + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set); ip_set_free(map->extensions); + } kfree(map); set->data = NULL; @@ -67,6 +82,8 @@ mtype_flush(struct ip_set *set) { struct mtype *map = set->data; + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set); memset(map->members, 0, map->memsize); } @@ -132,6 +149,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, ret = 0; else if (!(flags & IPSET_FLAG_EXIST)) return -IPSET_ERR_EXIST; + /* Element is re-added, cleanup extensions */ + ip_set_ext_destroy(set, x); } if (SET_WITH_TIMEOUT(set)) @@ -152,11 +171,14 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - const void *x = get_ext(set, map, e->id); + void *x = get_ext(set, map, e->id); - if (mtype_do_del(e, map) || - (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, set)))) + if (mtype_do_del(e, map)) + return -IPSET_ERR_EXIST; + + ip_set_ext_destroy(set, x); + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(x, set))) return -IPSET_ERR_EXIST; return 0; @@ -235,7 +257,7 @@ mtype_gc(unsigned long ul_set) { struct ip_set *set = (struct ip_set *) ul_set; struct mtype *map = set->data; - const void *x; + void *x; u32 id; /* We run parallel with other readers (test element) @@ -244,8 +266,10 @@ mtype_gc(unsigned long ul_set) for (id = 0; id < map->elements; id++) if (mtype_gc_test(id, map, set->dsize)) { x = get_ext(set, map, id); - if (ip_set_timeout_expired(ext_timeout(x, set))) + if (ip_set_timeout_expired(ext_timeout(x, set))) { clear_bit(id, map->members); + ip_set_ext_destroy(set, x); + } } read_unlock_bh(&set->lock); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 3999f1719f69..3c26e5b946f5 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -117,23 +117,6 @@ htable_bits(u32 hashsize) return bits; } -/* Destroy the hashtable part of the set */ -static void -ahash_destroy(struct htable *t) -{ - struct hbucket *n; - u32 i; - - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = hbucket(t, i); - if (n->size) - /* FIXME: use slab cache */ - kfree(n->value); - } - - ip_set_free(t); -} - static int hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) { @@ -192,6 +175,8 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #undef mtype_data_next #undef mtype_elem +#undef mtype_ahash_destroy +#undef mtype_ext_cleanup #undef mtype_add_cidr #undef mtype_del_cidr #undef mtype_ahash_memsize @@ -230,6 +215,8 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) #define mtype_data_next IPSET_TOKEN(MTYPE, _data_next) #define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy) +#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) #define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr) #define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr) #define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize) @@ -359,6 +346,19 @@ mtype_ahash_memsize(const struct htype *h, const struct htable *t, return memsize; } +/* Get the ith element from the array block n */ +#define ahash_data(n, i, dsize) \ + ((struct mtype_elem *)((n)->value + ((i) * (dsize)))) + +static void +mtype_ext_cleanup(struct ip_set *set, struct hbucket *n) +{ + int i; + + for (i = 0; i < n->pos; i++) + ip_set_ext_destroy(set, ahash_data(n, i, set->dsize)); +} + /* Flush a hash type of set: destroy all elements */ static void mtype_flush(struct ip_set *set) @@ -372,6 +372,8 @@ mtype_flush(struct ip_set *set) for (i = 0; i < jhash_size(t->htable_bits); i++) { n = hbucket(t, i); if (n->size) { + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set, n); n->size = n->pos = 0; /* FIXME: use slab cache */ kfree(n->value); @@ -383,6 +385,26 @@ mtype_flush(struct ip_set *set) h->elements = 0; } +/* Destroy the hashtable part of the set */ +static void +mtype_ahash_destroy(struct ip_set *set, struct htable *t) +{ + struct hbucket *n; + u32 i; + + for (i = 0; i < jhash_size(t->htable_bits); i++) { + n = hbucket(t, i); + if (n->size) { + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set, n); + /* FIXME: use slab cache */ + kfree(n->value); + } + } + + ip_set_free(t); +} + /* Destroy a hash type of set */ static void mtype_destroy(struct ip_set *set) @@ -392,7 +414,7 @@ mtype_destroy(struct ip_set *set) if (set->extensions & IPSET_EXT_TIMEOUT) del_timer_sync(&h->gc); - ahash_destroy(rcu_dereference_bh_nfnl(h->table)); + mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table)); #ifdef IP_SET_HASH_WITH_RBTREE rbtree_destroy(&h->rbtree); #endif @@ -430,10 +452,6 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } -/* Get the ith element from the array block n */ -#define ahash_data(n, i, dsize) \ - ((struct mtype_elem *)((n)->value + ((i) * (dsize)))) - /* Delete expired elements from the hashtable */ static void mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) @@ -456,6 +474,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) mtype_del_cidr(h, CIDR(data->cidr), nets_length, 0); #endif + ip_set_ext_destroy(set, data); if (j != n->pos - 1) /* Not last one */ memcpy(data, @@ -557,7 +576,7 @@ retry: mtype_data_reset_flags(data, &flags); #endif read_unlock_bh(&set->lock); - ahash_destroy(t); + mtype_ahash_destroy(set, t); if (ret == -EAGAIN) goto retry; return ret; @@ -578,7 +597,7 @@ retry: pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); - ahash_destroy(orig); + mtype_ahash_destroy(set, orig); return 0; } @@ -642,6 +661,7 @@ reuse_slot: mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0); mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif + ip_set_ext_destroy(set, data); } else { /* Use/create a new slot */ TUNE_AHASH_MAX(h, multi); @@ -707,6 +727,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, #ifdef IP_SET_HASH_WITH_NETS mtype_del_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); #endif + ip_set_ext_destroy(set, data); if (n->pos + AHASH_INIT_SIZE < n->size) { void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) * set->dsize, @@ -1033,7 +1054,7 @@ IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) rcu_assign_pointer(h->table, t); set->data = h; - if (set->family == NFPROTO_IPV4) { + if (set->family == NFPROTO_IPV4) { set->variant = &IPSET_TOKEN(HTYPE, 4_variant); set->dsize = ip_set_elem_len(set, tb, sizeof(struct IPSET_TOKEN(HTYPE, 4_elem))); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 7fd11c79aff4..e44986af1fc2 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -168,16 +168,19 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, struct set_elem *e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { - if (i == map->size - 1) + if (i == map->size - 1) { /* Last element replaced: e.g. add new,before,last */ ip_set_put_byindex(e->id); - else { + ip_set_ext_destroy(set, e); + } else { struct set_elem *x = list_set_elem(set, map, map->size - 1); /* Last element pushed off */ - if (x->id != IPSET_INVALID_ID) + if (x->id != IPSET_INVALID_ID) { ip_set_put_byindex(x->id); + ip_set_ext_destroy(set, x); + } memmove(list_set_elem(set, map, i + 1), e, set->dsize * (map->size - (i + 1))); } @@ -198,6 +201,7 @@ list_set_del(struct ip_set *set, u32 i) struct set_elem *e = list_set_elem(set, map, i); ip_set_put_byindex(e->id); + ip_set_ext_destroy(set, e); if (i < map->size - 1) memmove(e, list_set_elem(set, map, i + 1), @@ -266,14 +270,14 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, bool flag_exist = flags & IPSET_FLAG_EXIST; u32 i, ret = 0; + if (SET_WITH_TIMEOUT(set)) + set_cleanup_entries(set); + /* Check already added element */ for (i = 0; i < map->size; i++) { e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto insert; - else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, set))) - continue; else if (e->id != d->id) continue; @@ -286,6 +290,8 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* Can't re-add */ return -IPSET_ERR_EXIST; /* Update extensions */ + ip_set_ext_destroy(set, e); + if (SET_WITH_TIMEOUT(set)) ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) @@ -423,6 +429,7 @@ list_set_flush(struct ip_set *set) e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { ip_set_put_byindex(e->id); + ip_set_ext_destroy(set, e); e->id = IPSET_INVALID_ID; } } -- cgit v1.2.3 From b91b396d5e35eef9938a56e781cb0171a53907ca Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 9 Sep 2013 17:10:20 +0200 Subject: netfilter: ipset: list:set: make sure all elements are checked by the gc When an element timed out, the next one was skipped by the garbage collector, fixed. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_list_set.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index e44986af1fc2..30bf1dd11f04 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -218,13 +218,16 @@ set_cleanup_entries(struct ip_set *set) { struct list_set *map = set->data; struct set_elem *e; - u32 i; + u32 i = 0; - for (i = 0; i < map->size; i++) { + while (i < map->size) { e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID && ip_set_timeout_expired(ext_timeout(e, set))) list_set_del(set, i); + /* Check element moved to position i in next loop */ + else + i++; } } -- cgit v1.2.3 From d9628bbeca888fd1f9a9c57864dd90b7d25954e0 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 30 Sep 2013 09:15:44 +0200 Subject: netfilter: ipset: Kconfig: ipset needs NETFILTER_NETLINK Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index ba36c283d837..fd2c14b6c51b 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -1,7 +1,7 @@ menuconfig IP_SET tristate "IP set support" depends on INET && NETFILTER - depends on NETFILTER_NETLINK + select NETFILTER_NETLINK help This option adds IP set support to the kernel. In order to define and use the sets, you need the userspace utility -- cgit v1.2.3 From ea53ac5b630e813aec20c7cdcfe511daca70dee4 Mon Sep 17 00:00:00 2001 From: Oliver Smith Date: Fri, 20 Sep 2013 10:13:53 +0200 Subject: netfilter: ipset: Add hash:net,net module to kernel. This adds a new set that provides the ability to configure pairs of subnets. A small amount of additional handling code has been added to the generic hash header file - this code is conditionally activated by a preprocessor definition. Signed-off-by: Oliver Smith Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/Kconfig | 9 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_hash_gen.h | 58 +++- net/netfilter/ipset/ip_set_hash_netnet.c | 482 +++++++++++++++++++++++++++++++ 4 files changed, 541 insertions(+), 9 deletions(-) create mode 100644 net/netfilter/ipset/ip_set_hash_netnet.c (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index fd2c14b6c51b..9119f658a69b 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -99,6 +99,15 @@ config IP_SET_HASH_NET To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_NETNET + tristate "hash:net,net set support" + depends on IP_SET + help + This option adds the hash:net,net set type support, by which + one can store IPv4/IPv6 network address/prefix pairs in a set. + + To compile it as a module, choose M here. If unsure, say N. + config IP_SET_HASH_NETPORT tristate "hash:net,port set support" depends on IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 6e965ecd5444..43eef7a19e3c 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o +obj-$(CONFIG_IP_SET_HASH_NETNET) += ip_set_hash_netnet.o # list types obj-$(CONFIG_IP_SET_LIST_SET) += ip_set_list_set.o diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 3c26e5b946f5..6bd2eef4f5d0 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -142,11 +142,16 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) } #ifdef IP_SET_HASH_WITH_NETS +#if IPSET_NET_COUNT > 1 +#define __CIDR(cidr, i) (cidr[i]) +#else +#define __CIDR(cidr, i) (cidr) +#endif #ifdef IP_SET_HASH_WITH_NETS_PACKED /* When cidr is packed with nomatch, cidr - 1 is stored in the entry */ -#define CIDR(cidr) (cidr + 1) +#define CIDR(cidr, i) (__CIDR(cidr, i) + 1) #else -#define CIDR(cidr) (cidr) +#define CIDR(cidr, i) (__CIDR(cidr, i)) #endif #define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) @@ -210,6 +215,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define mtype_do_data_match(d) 1 #endif #define mtype_data_set_flags IPSET_TOKEN(MTYPE, _data_set_flags) +#define mtype_data_reset_elem IPSET_TOKEN(MTYPE, _data_reset_elem) #define mtype_data_reset_flags IPSET_TOKEN(MTYPE, _data_reset_flags) #define mtype_data_netmask IPSET_TOKEN(MTYPE, _data_netmask) #define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) @@ -461,6 +467,9 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) struct mtype_elem *data; u32 i; int j; +#ifdef IP_SET_HASH_WITH_NETS + u8 k; +#endif rcu_read_lock_bh(); t = rcu_dereference_bh(h->table); @@ -471,8 +480,9 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) if (ip_set_timeout_expired(ext_timeout(data, set))) { pr_debug("expired %u/%u\n", i, j); #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(data->cidr), - nets_length, 0); + for (k = 0; k < IPSET_NET_COUNT; k++) + mtype_del_cidr(h, CIDR(data->cidr, k), + nets_length, k); #endif ip_set_ext_destroy(set, data); if (j != n->pos - 1) @@ -658,8 +668,12 @@ reuse_slot: /* Fill out reused slot */ data = ahash_data(n, j, set->dsize); #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(data->cidr), NLEN(set->family), 0); - mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); + for (i = 0; i < IPSET_NET_COUNT; i++) { + mtype_del_cidr(h, CIDR(data->cidr, i), + NLEN(set->family), i); + mtype_add_cidr(h, CIDR(d->cidr, i), + NLEN(set->family), i); + } #endif ip_set_ext_destroy(set, data); } else { @@ -673,7 +687,9 @@ reuse_slot: } data = ahash_data(n, n->pos++, set->dsize); #ifdef IP_SET_HASH_WITH_NETS - mtype_add_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); + for (i = 0; i < IPSET_NET_COUNT; i++) + mtype_add_cidr(h, CIDR(d->cidr, i), NLEN(set->family), + i); #endif h->elements++; } @@ -704,6 +720,9 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct mtype_elem *data; struct hbucket *n; int i, ret = -IPSET_ERR_EXIST; +#ifdef IP_SET_HASH_WITH_NETS + u8 j; +#endif u32 key, multi = 0; rcu_read_lock_bh(); @@ -725,7 +744,9 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, n->pos--; h->elements--; #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(d->cidr), NLEN(set->family), 0); + for (j = 0; j < IPSET_NET_COUNT; j++) + mtype_del_cidr(h, CIDR(d->cidr, j), NLEN(set->family), + j); #endif ip_set_ext_destroy(set, data); if (n->pos + AHASH_INIT_SIZE < n->size) { @@ -772,13 +793,26 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, struct htable *t = rcu_dereference_bh(h->table); struct hbucket *n; struct mtype_elem *data; +#if IPSET_NET_COUNT == 2 + struct mtype_elem orig = *d; + int i, j = 0, k; +#else int i, j = 0; +#endif u32 key, multi = 0; u8 nets_length = NLEN(set->family); pr_debug("test by nets\n"); for (; j < nets_length && h->nets[j].nets[0] && !multi; j++) { +#if IPSET_NET_COUNT == 2 + mtype_data_reset_elem(d, &orig); + mtype_data_netmask(d, h->nets[j].cidr[0], false); + for (k = 0; k < nets_length && h->nets[k].nets[1] && !multi; + k++) { + mtype_data_netmask(d, h->nets[k].cidr[1], true); +#else mtype_data_netmask(d, h->nets[j].cidr[0]); +#endif key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { @@ -798,6 +832,9 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, return mtype_data_match(data, ext, mext, set, flags); } +#if IPSET_NET_COUNT == 2 + } +#endif } return 0; } @@ -821,7 +858,10 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, #ifdef IP_SET_HASH_WITH_NETS /* If we test an IP address and not a network address, * try all possible network sizes */ - if (CIDR(d->cidr) == SET_HOST_MASK(set->family)) { + for (i = 0; i < IPSET_NET_COUNT; i++) + if (CIDR(d->cidr, i) != SET_HOST_MASK(set->family)) + break; + if (i == IPSET_NET_COUNT) { ret = mtype_test_cidrs(set, d, ext, mext, flags); goto out; } diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c new file mode 100644 index 000000000000..771ce2b565a6 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -0,0 +1,482 @@ +/* Copyright (C) 2003-2013 Jozsef Kadlecsik + * Copyright (C) 2013 Oliver Smith + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:net type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 0 + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Oliver Smith "); +IP_SET_MODULE_DESC("hash:net,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); +MODULE_ALIAS("ip_set_hash:net,net"); + +/* Type specific function prefix */ +#define HTYPE hash_netnet +#define IP_SET_HASH_WITH_NETS +#define IPSET_NET_COUNT 2 + +/* IPv4 variants */ + +/* Member elements */ +struct hash_netnet4_elem { + union { + __be32 ip[2]; + __be64 ipcmp; + }; + u8 nomatch; + union { + u8 cidr[2]; + u16 ccmp; + }; +}; + +/* Common functions */ + +static inline bool +hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1, + const struct hash_netnet4_elem *ip2, + u32 *multi) +{ + return ip1->ipcmp == ip2->ipcmp && + ip2->ccmp == ip2->ccmp; +} + +static inline int +hash_netnet4_do_data_match(const struct hash_netnet4_elem *elem) +{ + return elem->nomatch ? -ENOTEMPTY : 1; +} + +static inline void +hash_netnet4_data_set_flags(struct hash_netnet4_elem *elem, u32 flags) +{ + elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH; +} + +static inline void +hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags) +{ + swap(*flags, elem->nomatch); +} + +static inline void +hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem, + struct hash_netnet4_elem *orig) +{ + elem->ip[1] = orig->ip[1]; +} + +static inline void +hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner) +{ + if (inner) { + elem->ip[1] &= ip_set_netmask(cidr); + elem->cidr[1] = cidr; + } else { + elem->ip[0] &= ip_set_netmask(cidr); + elem->cidr[0] = cidr; + } +} + +static bool +hash_netnet4_data_list(struct sk_buff *skb, + const struct hash_netnet4_elem *data) +{ + u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; + + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip[0]) || + nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip[1]) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_netnet4_data_next(struct hash_netnet4_elem *next, + const struct hash_netnet4_elem *d) +{ + next->ipcmp = d->ipcmp; +} + +#define MTYPE hash_netnet4 +#define PF 4 +#define HOST_MASK 32 +#include "ip_set_hash_gen.h" + +static int +hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_netnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet4_elem e = { + .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK, + .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK, + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + if (adt == IPSET_TEST) + e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; + + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0]); + ip4addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1]); + e.ip[0] &= ip_set_netmask(e.cidr[0]); + e.ip[1] &= ip_set_netmask(e.cidr[1]); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + const struct hash_netnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet4_elem e = { .cidr[0] = HOST_MASK, + .cidr[1] = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, last; + u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2; + u8 cidr, cidr2; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || + ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) { + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!cidr || cidr > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + e.cidr[0] = cidr; + } + + if (tb[IPSET_ATTR_CIDR2]) { + cidr2 = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + if (!cidr2 || cidr2 > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + e.cidr[1] = cidr2; + } + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); + } + + if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] && + tb[IPSET_ATTR_IP2_TO])) { + e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0])); + e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1])); + ret = adtfn(set, &e, &ext, &ext, flags); + return ip_set_enomatch(ret, flags, adt, set) ? -ret : + ip_set_eexist(ret, flags) ? 0 : ret; + } + + ip_to = ip; + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip_to < ip) + swap(ip, ip_to); + if (ip + UINT_MAX == ip_to) + return -IPSET_ERR_HASH_RANGE; + } + + ip2_to = ip2_from; + if (tb[IPSET_ATTR_IP2_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); + if (ret) + return ret; + if (ip2_to < ip2_from) + swap(ip2_from, ip2_to); + if (ip2_from + UINT_MAX == ip2_to) + return -IPSET_ERR_HASH_RANGE; + + } + + if (retried) + ip = ntohl(h->next.ip[0]); + + while (!after(ip, ip_to)) { + e.ip[0] = htonl(ip); + last = ip_set_range_to_cidr(ip, ip_to, &cidr); + e.cidr[0] = cidr; + ip2 = (retried && + ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1]) + : ip2_from; + while (!after(ip2, ip2_to)) { + e.ip[1] = htonl(ip2); + last2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr2); + e.cidr[1] = cidr2; + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + ip2 = last2 + 1; + } + ip = last + 1; + } + return ret; +} + +/* IPv6 variants */ + +struct hash_netnet6_elem { + union nf_inet_addr ip[2]; + u8 nomatch; + union { + u8 cidr[2]; + u16 ccmp; + }; +}; + +/* Common functions */ + +static inline bool +hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1, + const struct hash_netnet6_elem *ip2, + u32 *multi) +{ + return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) && + ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) && + ip1->ccmp == ip2->ccmp; +} + +static inline int +hash_netnet6_do_data_match(const struct hash_netnet6_elem *elem) +{ + return elem->nomatch ? -ENOTEMPTY : 1; +} + +static inline void +hash_netnet6_data_set_flags(struct hash_netnet6_elem *elem, u32 flags) +{ + elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH; +} + +static inline void +hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags) +{ + swap(*flags, elem->nomatch); +} + +static inline void +hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem, + struct hash_netnet6_elem *orig) +{ + elem->ip[1] = orig->ip[1]; +} + +static inline void +hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner) +{ + if (inner) { + ip6_netmask(&elem->ip[1], cidr); + elem->cidr[1] = cidr; + } else { + ip6_netmask(&elem->ip[0], cidr); + elem->cidr[0] = cidr; + } +} + +static bool +hash_netnet6_data_list(struct sk_buff *skb, + const struct hash_netnet6_elem *data) +{ + u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; + + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip[0].in6) || + nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip[1].in6) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_netnet6_data_next(struct hash_netnet4_elem *next, + const struct hash_netnet6_elem *d) +{ +} + +#undef MTYPE +#undef PF +#undef HOST_MASK + +#define MTYPE hash_netnet6 +#define PF 6 +#define HOST_MASK 128 +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h" + +static int +hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_netnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet6_elem e = { + .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK, + .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + if (adt == IPSET_TEST) + e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK; + + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1].in6); + ip6_netmask(&e.ip[0], e.cidr[0]); + ip6_netmask(&e.ip[1], e.cidr[1]); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet6_elem e = { .cidr[0] = HOST_MASK, + .cidr[1] = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) || + ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (tb[IPSET_ATTR_CIDR2]) + e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + + if (!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] || + e.cidr[1] > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + + ip6_netmask(&e.ip[0], e.cidr[0]); + ip6_netmask(&e.ip[1], e.cidr[1]); + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); + } + + ret = adtfn(set, &e, &ext, &ext, flags); + + return ip_set_enomatch(ret, flags, adt, set) ? -ret : + ip_set_eexist(ret, flags) ? 0 : ret; +} + +static struct ip_set_type hash_netnet_type __read_mostly = { + .name = "hash:net,net", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_IP2 | IPSET_TYPE_NOMATCH, + .dimension = IPSET_DIM_TWO, + .family = NFPROTO_UNSPEC, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, + .create = hash_netnet_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, + [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_netnet_init(void) +{ + return ip_set_type_register(&hash_netnet_type); +} + +static void __exit +hash_netnet_fini(void) +{ + ip_set_type_unregister(&hash_netnet_type); +} + +module_init(hash_netnet_init); +module_exit(hash_netnet_fini); -- cgit v1.2.3 From 68b63f08d22f23161c43cd2417104aa213ff877f Mon Sep 17 00:00:00 2001 From: Oliver Smith Date: Sun, 22 Sep 2013 20:56:30 +0200 Subject: netfilter: ipset: Support comments for ipset entries in the core. This adds the core support for having comments on ipset entries. The comments are stored as standard null-terminated strings in dynamically allocated memory after being passed to the kernel. As a result of this, code has been added to the generic destroy function to iterate all extensions and call that extension's destroy task if the set has that extension activated, and if such a task is defined. Signed-off-by: Oliver Smith Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index f35afed3814f..3bf9a3d29dff 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -315,6 +315,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); +typedef void (*destroyer)(void *); /* ipset data extension types, in size order */ const struct ip_set_ext_type ip_set_extensions[] = { @@ -329,6 +330,13 @@ const struct ip_set_ext_type ip_set_extensions[] = { .len = sizeof(unsigned long), .align = __alignof__(unsigned long), }, + [IPSET_EXT_ID_COMMENT] = { + .type = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY, + .flag = IPSET_FLAG_WITH_COMMENT, + .len = sizeof(struct ip_set_comment), + .align = __alignof__(struct ip_set_comment), + .destroy = (destroyer) ip_set_comment_free, + }, }; EXPORT_SYMBOL_GPL(ip_set_extensions); @@ -380,6 +388,12 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], ext->packets = be64_to_cpu(nla_get_be64( tb[IPSET_ATTR_PACKETS])); } + if (tb[IPSET_ATTR_COMMENT]) { + if (!(set->extensions & IPSET_EXT_COMMENT)) + return -IPSET_ERR_COMMENT; + ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]); + } + return 0; } EXPORT_SYMBOL_GPL(ip_set_get_extensions); -- cgit v1.2.3 From b90cb8ba19dac9b98add5e64adb583fccbf63f94 Mon Sep 17 00:00:00 2001 From: Oliver Smith Date: Sun, 22 Sep 2013 20:56:32 +0200 Subject: netfilter: ipset: Support comments in bitmap-type ipsets. This provides kernel support for creating bitmap ipsets with comment support. As is the case for hashes, this incurs a penalty when flushing or destroying the entire ipset as the entries must first be walked in order to free the comment strings. This penalty is of course far less than the cost of listing an ipset to userspace. Any set created without support for comments will be flushed/destroyed as before. Signed-off-by: Oliver Smith Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_gen.h | 14 ++++++++------ net/netfilter/ipset/ip_set_bitmap_ip.c | 4 +++- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 4 +++- net/netfilter/ipset/ip_set_bitmap_port.c | 4 +++- 4 files changed, 17 insertions(+), 9 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 4515fe8b83dd..6167fc9d0efe 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -101,12 +101,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize + - set->dsize * map->elements)) || - (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) || - (SET_WITH_COUNTER(set) && - nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, - htonl(IPSET_FLAG_WITH_COUNTERS)))) + set->dsize * map->elements))) + goto nla_put_failure; + if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; ipset_nest_end(skb, nested); @@ -162,6 +159,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(x, set), ext); + if (SET_WITH_COMMENT(set)) + ip_set_init_comment(ext_comment(x, set), ext); return 0; } @@ -233,6 +232,9 @@ mtype_list(const struct ip_set *set, if (SET_WITH_COUNTER(set) && ip_set_put_counter(skb, ext_counter(x, set))) goto nla_put_failure; + if (SET_WITH_COMMENT(set) && + ip_set_put_comment(skb, ext_comment(x, set))) + goto nla_put_failure; ipset_nest_end(skb, nested); } ipset_nest_end(skb, adt); diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 94d985457c51..faac124e2645 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -26,7 +26,8 @@ #include #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 1 /* Counter support added */ +/* 1 Counter support added */ +#define IPSET_TYPE_REV_MAX 2 /* Comment support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -354,6 +355,7 @@ static struct ip_set_type bitmap_ip_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 654a97bedfe9..fb4d163dea82 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -26,7 +26,8 @@ #include #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 1 /* Counter support added */ +/* 1 Counter support added */ +#define IPSET_TYPE_REV_MAX 2 /* Comment support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -403,6 +404,7 @@ static struct ip_set_type bitmap_ipmac_type = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 1ef2f3186b80..407a63caee6b 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -21,7 +21,8 @@ #include #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 1 /* Counter support added */ +/* 1 Counter support added */ +#define IPSET_TYPE_REV_MAX 2 /* Comment support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -288,6 +289,7 @@ static struct ip_set_type bitmap_port_type = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; -- cgit v1.2.3 From 81b10bb4bd16ea975c007f9bb2c2676cef6ade10 Mon Sep 17 00:00:00 2001 From: Oliver Smith Date: Sun, 22 Sep 2013 20:56:33 +0200 Subject: netfilter: ipset: Support comments in the list-type ipset. This provides kernel support for creating list ipsets with the comment annotation extension. Signed-off-by: Oliver Smith Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_list_set.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 30bf1dd11f04..f9681dcf7476 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -16,7 +16,8 @@ #include #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 1 /* Counters support added */ +/* 1 Counters support added */ +#define IPSET_TYPE_REV_MAX 2 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -191,6 +192,8 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(e, set), ext); + if (SET_WITH_COMMENT(set) && ext->comment) + ip_set_init_comment(ext_comment(e, set), ext); return 0; } @@ -299,6 +302,8 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(e, set), ext); + if (SET_WITH_COMMENT(set)) + ip_set_init_comment(ext_comment(e, set), ext); /* Set is already added to the list */ ip_set_put_byindex(d->id); return 0; @@ -461,15 +466,12 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || - (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) || - (SET_WITH_COUNTER(set) && - nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, - htonl(IPSET_FLAG_WITH_COUNTERS))) || nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->size * set->dsize))) goto nla_put_failure; + if (unlikely(ip_set_put_flags(skb, set))) + goto nla_put_failure; ipset_nest_end(skb, nested); return 0; @@ -516,6 +518,9 @@ list_set_list(const struct ip_set *set, if (SET_WITH_COUNTER(set) && ip_set_put_counter(skb, ext_counter(e, set))) goto nla_put_failure; + if (SET_WITH_COMMENT(set) && + ip_set_put_comment(skb, ext_comment(e, set))) + goto nla_put_failure; ipset_nest_end(skb, nested); } finish: @@ -660,6 +665,7 @@ static struct ip_set_type list_set_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; -- cgit v1.2.3 From fda75c6d9e31a901e25b922e86c8fd505c899873 Mon Sep 17 00:00:00 2001 From: Oliver Smith Date: Sun, 22 Sep 2013 20:56:31 +0200 Subject: netfilter: ipset: Support comments in hash-type ipsets. This provides kernel support for creating ipsets with comment support. This does incur a penalty to flushing/destroying an ipset since all entries are walked in order to free the allocated strings, this penalty is of course less expensive than the operation of listing an ipset to userspace, so for general-purpose usage the overall impact is expected to be little to none. Signed-off-by: Oliver Smith Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_gen.h | 14 ++++++++------ net/netfilter/ipset/ip_set_hash_ip.c | 4 +++- net/netfilter/ipset/ip_set_hash_ipport.c | 4 +++- net/netfilter/ipset/ip_set_hash_ipportip.c | 4 +++- net/netfilter/ipset/ip_set_hash_ipportnet.c | 4 +++- net/netfilter/ipset/ip_set_hash_net.c | 4 +++- net/netfilter/ipset/ip_set_hash_netiface.c | 4 +++- net/netfilter/ipset/ip_set_hash_netnet.c | 1 + net/netfilter/ipset/ip_set_hash_netport.c | 4 +++- 9 files changed, 30 insertions(+), 13 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 6bd2eef4f5d0..6efb65569e88 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -701,6 +701,8 @@ reuse_slot: ip_set_timeout_set(ext_timeout(data, set), ext->timeout); if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(data, set), ext); + if (SET_WITH_COMMENT(set)) + ip_set_init_comment(ext_comment(data, set), ext); out: rcu_read_unlock_bh(); @@ -908,12 +910,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; #endif if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || - nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || - ((set->extensions & IPSET_EXT_TIMEOUT) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout))) || - ((set->extensions & IPSET_EXT_COUNTER) && - nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, - htonl(IPSET_FLAG_WITH_COUNTERS)))) + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) + goto nla_put_failure; + if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; ipset_nest_end(skb, nested); @@ -970,6 +969,9 @@ mtype_list(const struct ip_set *set, if (SET_WITH_COUNTER(set) && ip_set_put_counter(skb, ext_counter(e, set))) goto nla_put_failure; + if (SET_WITH_COMMENT(set) && + ip_set_put_comment(skb, ext_comment(e, set))) + goto nla_put_failure; ipset_nest_end(skb, nested); } } diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index a111ffe40b46..e65fc2423d56 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -24,7 +24,8 @@ #include #define IPSET_TYPE_REV_MIN 0 -#define IPSET_TYPE_REV_MAX 1 /* Counters support */ +/* 1 Counters support */ +#define IPSET_TYPE_REV_MAX 2 /* Comments support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -292,6 +293,7 @@ static struct ip_set_type hash_ip_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 5dc735c4dac2..525a595dd1fe 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -26,7 +26,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 SCTP and UDPLITE support added */ -#define IPSET_TYPE_REV_MAX 2 /* Counters support added */ +/* 2 Counters support added */ +#define IPSET_TYPE_REV_MAX 3 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -367,6 +368,7 @@ static struct ip_set_type hash_ipport_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 8c43dc7811cb..f5636631466e 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -26,7 +26,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 SCTP and UDPLITE support added */ -#define IPSET_TYPE_REV_MAX 2 /* Counters support added */ +/* 2 Counters support added */ +#define IPSET_TYPE_REV_MAX 3 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -379,6 +380,7 @@ static struct ip_set_type hash_ipportip_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 34890452366c..5d87fe8a41ff 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -28,7 +28,8 @@ /* 1 SCTP and UDPLITE support added */ /* 2 Range as input support for IPv4 added */ /* 3 nomatch flag support added */ -#define IPSET_TYPE_REV_MAX 4 /* Counters support added */ +/* 4 Counters support added */ +#define IPSET_TYPE_REV_MAX 5 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -538,6 +539,7 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index d5598557f4a9..8295cf4f9fdc 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -25,7 +25,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 Range as input support for IPv4 added */ /* 2 nomatch flag support added */ -#define IPSET_TYPE_REV_MAX 3 /* Counters support added */ +/* 3 Counters support added */ +#define IPSET_TYPE_REV_MAX 4 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -374,6 +375,7 @@ static struct ip_set_type hash_net_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 26703e9e5082..3f64a66bf5d9 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -26,7 +26,8 @@ #define IPSET_TYPE_REV_MIN 0 /* 1 nomatch flag support added */ /* 2 /0 support added */ -#define IPSET_TYPE_REV_MAX 3 /* Counters support added */ +/* 3 Counters support added */ +#define IPSET_TYPE_REV_MAX 4 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -606,6 +607,7 @@ static struct ip_set_type hash_netiface_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 771ce2b565a6..426032706ca9 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -462,6 +462,7 @@ static struct ip_set_type hash_netnet_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 45b6e91b0636..7097fb0141bf 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -27,7 +27,8 @@ /* 1 SCTP and UDPLITE support added */ /* 2 Range as input support for IPv4 added */ /* 3 nomatch flag support added */ -#define IPSET_TYPE_REV_MAX 4 /* Counters support added */ +/* 4 Counters support added */ +#define IPSET_TYPE_REV_MAX 5 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik "); @@ -486,6 +487,7 @@ static struct ip_set_type hash_netport_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; -- cgit v1.2.3 From 80571a9ea4ca9b6fee7ef0c13d3f31e774e0533b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 23 Sep 2013 17:45:21 +0200 Subject: netfilter: ipset: Fix hash resizing with comments The destroy function must take into account that resizing doesn't create new extensions so those cannot be destroyed at resize. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_gen.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 6efb65569e88..1cffeb977605 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -393,7 +393,7 @@ mtype_flush(struct ip_set *set) /* Destroy the hashtable part of the set */ static void -mtype_ahash_destroy(struct ip_set *set, struct htable *t) +mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) { struct hbucket *n; u32 i; @@ -401,7 +401,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t) for (i = 0; i < jhash_size(t->htable_bits); i++) { n = hbucket(t, i); if (n->size) { - if (set->extensions & IPSET_EXT_DESTROY) + if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) mtype_ext_cleanup(set, n); /* FIXME: use slab cache */ kfree(n->value); @@ -420,7 +420,7 @@ mtype_destroy(struct ip_set *set) if (set->extensions & IPSET_EXT_TIMEOUT) del_timer_sync(&h->gc); - mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table)); + mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table), true); #ifdef IP_SET_HASH_WITH_RBTREE rbtree_destroy(&h->rbtree); #endif @@ -586,7 +586,7 @@ retry: mtype_data_reset_flags(data, &flags); #endif read_unlock_bh(&set->lock); - mtype_ahash_destroy(set, t); + mtype_ahash_destroy(set, t, false); if (ret == -EAGAIN) goto retry; return ret; @@ -607,7 +607,7 @@ retry: pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); - mtype_ahash_destroy(set, orig); + mtype_ahash_destroy(set, orig, false); return 0; } -- cgit v1.2.3 From 8ec81f9a4db94fa5638c55793365b896dd9daecc Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 23 Sep 2013 21:28:06 +0200 Subject: netfilter: ipset: For set:list types, replaced elements must be zeroed out The new extensions require zero initialization for the new element to be added into a slot from where another element was pushed away. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_list_set.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index f9681dcf7476..e23f33c14435 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -184,6 +184,8 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, } memmove(list_set_elem(set, map, i + 1), e, set->dsize * (map->size - (i + 1))); + /* Extensions must be initialized to zero */ + memset(e, 0, set->dsize); } } @@ -192,7 +194,7 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(e, set), ext); - if (SET_WITH_COMMENT(set) && ext->comment) + if (SET_WITH_COMMENT(set)) ip_set_init_comment(ext_comment(e, set), ext); return 0; } -- cgit v1.2.3 From 3fd986b3d99e3847f1cce6fc36043d0f16508e1d Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 25 Sep 2013 17:44:35 +0200 Subject: netfilter: ipset: Use a common function at listing the extensions Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_gen.h | 29 ++++++++++------------------- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 11 ----------- net/netfilter/ipset/ip_set_hash_gen.h | 11 +---------- net/netfilter/ipset/ip_set_list_set.c | 11 +---------- 4 files changed, 12 insertions(+), 50 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 6167fc9d0efe..a13e15be7911 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -183,6 +183,14 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, return 0; } +#ifndef IP_SET_BITMAP_STORED_TIMEOUT +static inline bool +mtype_is_filled(const struct mtype_elem *x) +{ + return true; +} +#endif + static int mtype_list(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb) @@ -215,25 +223,8 @@ mtype_list(const struct ip_set *set, } if (mtype_do_list(skb, map, id, set->dsize)) goto nla_put_failure; - if (SET_WITH_TIMEOUT(set)) { -#ifdef IP_SET_BITMAP_STORED_TIMEOUT - if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_stored(map, id, - ext_timeout(x, set), - set->dsize)))) - goto nla_put_failure; -#else - if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get( - ext_timeout(x, set))))) - goto nla_put_failure; -#endif - } - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(x, set))) - goto nla_put_failure; - if (SET_WITH_COMMENT(set) && - ip_set_put_comment(skb, ext_comment(x, set))) + if (ip_set_put_extensions(skb, set, x, + mtype_is_filled((const struct mtype_elem *) x))) goto nla_put_failure; ipset_nest_end(skb, nested); } diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index fb4d163dea82..87a218f8ab5f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -176,17 +176,6 @@ bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e, return !test_and_clear_bit(e->id, map->members); } -static inline unsigned long -ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout, - size_t dsize) -{ - const struct bitmap_ipmac_elem *elem = - get_elem(map->extensions, id, dsize); - - return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) : - *timeout; -} - static inline int bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map, u32 id, size_t dsize) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 1cffeb977605..b4add206c603 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -961,16 +961,7 @@ mtype_list(const struct ip_set *set, } if (mtype_data_list(skb, e)) goto nla_put_failure; - if (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get( - ext_timeout(e, set))))) - goto nla_put_failure; - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, set))) - goto nla_put_failure; - if (SET_WITH_COMMENT(set) && - ip_set_put_comment(skb, ext_comment(e, set))) + if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; ipset_nest_end(skb, nested); } diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index e23f33c14435..ba4232e451f4 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -512,16 +512,7 @@ list_set_list(const struct ip_set *set, if (nla_put_string(skb, IPSET_ATTR_NAME, ip_set_name_byindex(e->id))) goto nla_put_failure; - if (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get( - ext_timeout(e, set))))) - goto nla_put_failure; - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, set))) - goto nla_put_failure; - if (SET_WITH_COMMENT(set) && - ip_set_put_comment(skb, ext_comment(e, set))) + if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; ipset_nest_end(skb, nested); } -- cgit v1.2.3 From 1785e8f473082aa60d62c7165856cf6484077b99 Mon Sep 17 00:00:00 2001 From: Vitaly Lavrov Date: Mon, 30 Sep 2013 17:07:02 +0200 Subject: netfiler: ipset: Add net namespace for ipset This patch adds netns support for ipset. Major changes were made in ip_set_core.c and ip_set.h. Global variables are moved to per net namespace. Added initialization code and the destruction of the network namespace ipset subsystem. In the prototypes of public functions ip_set_* added parameter "struct net*". The remaining corrections related to the change prototypes of public functions ip_set_*. The patch for git://git.netfilter.org/ipset.git commit 6a4ec96c0b8caac5c35474e40e319704d92ca347 Signed-off-by: Vitaly Lavrov Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_ip.c | 3 +- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 2 +- net/netfilter/ipset/ip_set_bitmap_port.c | 3 +- net/netfilter/ipset/ip_set_core.c | 288 +++++++++++++++++++----------- net/netfilter/ipset/ip_set_hash_gen.h | 3 +- net/netfilter/ipset/ip_set_list_set.c | 31 ++-- net/netfilter/xt_set.c | 40 ++--- 7 files changed, 232 insertions(+), 138 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index faac124e2645..6f1f9f494808 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -242,7 +242,8 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, } static int -bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], + u32 flags) { struct bitmap_ip *map; u32 first_ip = 0, last_ip = 0, hosts; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 87a218f8ab5f..740eabededd9 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -309,7 +309,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, } static int -bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], +bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], u32 flags) { u32 first_ip = 0, last_ip = 0; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 407a63caee6b..e7603c5b53d7 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -228,7 +228,8 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, } static int -bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], + u32 flags) { struct bitmap_port *map; u16 first_port, last_port; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 3bf9a3d29dff..dc9284bdd2dd 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include @@ -27,8 +29,17 @@ static LIST_HEAD(ip_set_type_list); /* all registered set types */ static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ -static struct ip_set * __rcu *ip_set_list; /* all individual sets */ -static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ +struct ip_set_net { + struct ip_set * __rcu *ip_set_list; /* all individual sets */ + ip_set_id_t ip_set_max; /* max number of sets */ + int is_deleted; /* deleted by ip_set_net_exit */ +}; +static int ip_set_net_id __read_mostly; + +static inline struct ip_set_net *ip_set_pernet(struct net *net) +{ + return net_generic(net, ip_set_net_id); +} #define IP_SET_INC 64 #define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0) @@ -45,8 +56,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); /* When the nfnl mutex is held: */ #define nfnl_dereference(p) \ rcu_dereference_protected(p, 1) -#define nfnl_set(id) \ - nfnl_dereference(ip_set_list)[id] +#define nfnl_set(inst, id) \ + nfnl_dereference((inst)->ip_set_list)[id] /* * The set types are implemented in modules and registered set types @@ -434,13 +445,14 @@ __ip_set_put(struct ip_set *set) */ static inline struct ip_set * -ip_set_rcu_get(ip_set_id_t index) +ip_set_rcu_get(struct net *net, ip_set_id_t index) { struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); rcu_read_lock(); /* ip_set_list itself needs to be protected */ - set = rcu_dereference(ip_set_list)[index]; + set = rcu_dereference(inst->ip_set_list)[index]; rcu_read_unlock(); return set; @@ -450,7 +462,8 @@ int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(index); + struct ip_set *set = ip_set_rcu_get( + dev_net(par->in ? par->in : par->out), index); int ret = 0; BUG_ON(set == NULL); @@ -488,7 +501,8 @@ int ip_set_add(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(index); + struct ip_set *set = ip_set_rcu_get( + dev_net(par->in ? par->in : par->out), index); int ret; BUG_ON(set == NULL); @@ -510,7 +524,8 @@ int ip_set_del(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(index); + struct ip_set *set = ip_set_rcu_get( + dev_net(par->in ? par->in : par->out), index); int ret = 0; BUG_ON(set == NULL); @@ -534,14 +549,15 @@ EXPORT_SYMBOL_GPL(ip_set_del); * */ ip_set_id_t -ip_set_get_byname(const char *name, struct ip_set **set) +ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) { ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; + struct ip_set_net *inst = ip_set_pernet(net); rcu_read_lock(); - for (i = 0; i < ip_set_max; i++) { - s = rcu_dereference(ip_set_list)[i]; + for (i = 0; i < inst->ip_set_max; i++) { + s = rcu_dereference(inst->ip_set_list)[i]; if (s != NULL && STREQ(s->name, name)) { __ip_set_get(s); index = i; @@ -561,17 +577,26 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname); * to be valid, after calling this function. * */ -void -ip_set_put_byindex(ip_set_id_t index) + +static inline void +__ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index) { struct ip_set *set; rcu_read_lock(); - set = rcu_dereference(ip_set_list)[index]; + set = rcu_dereference(inst->ip_set_list)[index]; if (set != NULL) __ip_set_put(set); rcu_read_unlock(); } + +void +ip_set_put_byindex(struct net *net, ip_set_id_t index) +{ + struct ip_set_net *inst = ip_set_pernet(net); + + __ip_set_put_byindex(inst, index); +} EXPORT_SYMBOL_GPL(ip_set_put_byindex); /* @@ -582,9 +607,9 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex); * */ const char * -ip_set_name_byindex(ip_set_id_t index) +ip_set_name_byindex(struct net *net, ip_set_id_t index) { - const struct ip_set *set = ip_set_rcu_get(index); + const struct ip_set *set = ip_set_rcu_get(net, index); BUG_ON(set == NULL); BUG_ON(set->ref == 0); @@ -606,14 +631,15 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex); * The nfnl mutex is used in the function. */ ip_set_id_t -ip_set_nfnl_get(const char *name) +ip_set_nfnl_get(struct net *net, const char *name) { ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; + struct ip_set_net *inst = ip_set_pernet(net); nfnl_lock(NFNL_SUBSYS_IPSET); - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL && STREQ(s->name, name)) { __ip_set_get(s); index = i; @@ -633,15 +659,16 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get); * The nfnl mutex is used in the function. */ ip_set_id_t -ip_set_nfnl_get_byindex(ip_set_id_t index) +ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) { struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); - if (index > ip_set_max) + if (index > inst->ip_set_max) return IPSET_INVALID_ID; nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(index); + set = nfnl_set(inst, index); if (set) __ip_set_get(set); else @@ -660,13 +687,17 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex); * The nfnl mutex is used in the function. */ void -ip_set_nfnl_put(ip_set_id_t index) +ip_set_nfnl_put(struct net *net, ip_set_id_t index) { struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); + nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(index); - if (set != NULL) - __ip_set_put(set); + if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */ + set = nfnl_set(inst, index); + if (set != NULL) + __ip_set_put(set); + } nfnl_unlock(NFNL_SUBSYS_IPSET); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); @@ -724,14 +755,14 @@ static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static struct ip_set * -find_set_and_id(const char *name, ip_set_id_t *id) +find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id) { struct ip_set *set = NULL; ip_set_id_t i; *id = IPSET_INVALID_ID; - for (i = 0; i < ip_set_max; i++) { - set = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + set = nfnl_set(inst, i); if (set != NULL && STREQ(set->name, name)) { *id = i; break; @@ -741,22 +772,23 @@ find_set_and_id(const char *name, ip_set_id_t *id) } static inline struct ip_set * -find_set(const char *name) +find_set(struct ip_set_net *inst, const char *name) { ip_set_id_t id; - return find_set_and_id(name, &id); + return find_set_and_id(inst, name, &id); } static int -find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set) +find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, + struct ip_set **set) { struct ip_set *s; ip_set_id_t i; *index = IPSET_INVALID_ID; - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s == NULL) { if (*index == IPSET_INVALID_ID) *index = i; @@ -785,6 +817,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct net *net = sock_net(ctnl); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *clash = NULL; ip_set_id_t index = IPSET_INVALID_ID; struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; @@ -843,7 +877,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, goto put_out; } - ret = set->type->create(set, tb, flags); + ret = set->type->create(net, set, tb, flags); if (ret != 0) goto put_out; @@ -854,7 +888,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * by the nfnl mutex. Find the first free index in ip_set_list * and check clashing. */ - ret = find_free_id(set->name, &index, &clash); + ret = find_free_id(inst, set->name, &index, &clash); if (ret == -EEXIST) { /* If this is the same set and requested, ignore error */ if ((flags & IPSET_FLAG_EXIST) && @@ -867,9 +901,9 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, goto cleanup; } else if (ret == -IPSET_ERR_MAX_SETS) { struct ip_set **list, **tmp; - ip_set_id_t i = ip_set_max + IP_SET_INC; + ip_set_id_t i = inst->ip_set_max + IP_SET_INC; - if (i < ip_set_max || i == IPSET_INVALID_ID) + if (i < inst->ip_set_max || i == IPSET_INVALID_ID) /* Wraparound */ goto cleanup; @@ -877,14 +911,14 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, if (!list) goto cleanup; /* nfnl mutex is held, both lists are valid */ - tmp = nfnl_dereference(ip_set_list); - memcpy(list, tmp, sizeof(struct ip_set *) * ip_set_max); - rcu_assign_pointer(ip_set_list, list); + tmp = nfnl_dereference(inst->ip_set_list); + memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max); + rcu_assign_pointer(inst->ip_set_list, list); /* Make sure all current packets have passed through */ synchronize_net(); /* Use new list */ - index = ip_set_max; - ip_set_max = i; + index = inst->ip_set_max; + inst->ip_set_max = i; kfree(tmp); ret = 0; } else if (ret) @@ -894,7 +928,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * Finally! Add our shiny new set to the list, and be done. */ pr_debug("create: '%s' created with index %u!\n", set->name, index); - nfnl_set(index) = set; + nfnl_set(inst, index) = set; return ret; @@ -917,12 +951,12 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static void -ip_set_destroy_set(ip_set_id_t index) +ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index) { - struct ip_set *set = nfnl_set(index); + struct ip_set *set = nfnl_set(inst, index); pr_debug("set: %s\n", set->name); - nfnl_set(index) = NULL; + nfnl_set(inst, index) = NULL; /* Must call it without holding any lock */ set->variant->destroy(set); @@ -935,6 +969,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *s; ip_set_id_t i; int ret = 0; @@ -954,21 +989,22 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, */ read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL && s->ref) { ret = -IPSET_ERR_BUSY; goto out; } } read_unlock_bh(&ip_set_ref_lock); - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL) - ip_set_destroy_set(i); + ip_set_destroy_set(inst, i); } } else { - s = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &i); + s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), + &i); if (s == NULL) { ret = -ENOENT; goto out; @@ -978,7 +1014,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, } read_unlock_bh(&ip_set_ref_lock); - ip_set_destroy_set(i); + ip_set_destroy_set(inst, i); } return 0; out: @@ -1003,6 +1039,7 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *s; ip_set_id_t i; @@ -1010,13 +1047,13 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, return -IPSET_ERR_PROTOCOL; if (!attr[IPSET_ATTR_SETNAME]) { - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL) ip_set_flush_set(s); } } else { - s = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (s == NULL) return -ENOENT; @@ -1042,6 +1079,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set, *s; const char *name2; ip_set_id_t i; @@ -1052,7 +1090,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_SETNAME2] == NULL)) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1063,8 +1101,8 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, } name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL && STREQ(s->name, name2)) { ret = -IPSET_ERR_EXIST_SETNAME2; goto out; @@ -1091,6 +1129,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *from, *to; ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; @@ -1100,11 +1139,13 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_SETNAME2] == NULL)) return -IPSET_ERR_PROTOCOL; - from = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &from_id); + from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), + &from_id); if (from == NULL) return -ENOENT; - to = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id); + to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]), + &to_id); if (to == NULL) return -IPSET_ERR_EXIST_SETNAME2; @@ -1121,8 +1162,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, write_lock_bh(&ip_set_ref_lock); swap(from->ref, to->ref); - nfnl_set(from_id) = to; - nfnl_set(to_id) = from; + nfnl_set(inst, from_id) = to; + nfnl_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); return 0; @@ -1141,9 +1182,10 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, static int ip_set_dump_done(struct netlink_callback *cb) { + struct ip_set_net *inst = (struct ip_set_net *)cb->data; if (cb->args[2]) { - pr_debug("release set %s\n", nfnl_set(cb->args[1])->name); - ip_set_put_byindex((ip_set_id_t) cb->args[1]); + pr_debug("release set %s\n", nfnl_set(inst, cb->args[1])->name); + __ip_set_put_byindex(inst, (ip_set_id_t) cb->args[1]); } return 0; } @@ -1169,6 +1211,7 @@ dump_init(struct netlink_callback *cb) struct nlattr *attr = (void *)nlh + min_len; u32 dump_type; ip_set_id_t index; + struct ip_set_net *inst = (struct ip_set_net *)cb->data; /* Second pass, so parser can't fail */ nla_parse(cda, IPSET_ATTR_CMD_MAX, @@ -1182,7 +1225,7 @@ dump_init(struct netlink_callback *cb) if (cda[IPSET_ATTR_SETNAME]) { struct ip_set *set; - set = find_set_and_id(nla_data(cda[IPSET_ATTR_SETNAME]), + set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), &index); if (set == NULL) return -ENOENT; @@ -1210,6 +1253,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; u32 dump_type, dump_flags; int ret = 0; + struct ip_set_net *inst = (struct ip_set_net *)cb->data; if (!cb->args[0]) { ret = dump_init(cb); @@ -1223,18 +1267,18 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) } } - if (cb->args[1] >= ip_set_max) + if (cb->args[1] >= inst->ip_set_max) goto out; dump_type = DUMP_TYPE(cb->args[0]); dump_flags = DUMP_FLAGS(cb->args[0]); - max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max; + max = dump_type == DUMP_ONE ? cb->args[1] + 1 : inst->ip_set_max; dump_last: pr_debug("args[0]: %u %u args[1]: %ld\n", dump_type, dump_flags, cb->args[1]); for (; cb->args[1] < max; cb->args[1]++) { index = (ip_set_id_t) cb->args[1]; - set = nfnl_set(index); + set = nfnl_set(inst, index); if (set == NULL) { if (dump_type == DUMP_ONE) { ret = -ENOENT; @@ -1312,8 +1356,8 @@ next_set: release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[2]) { - pr_debug("release set %s\n", nfnl_set(index)->name); - ip_set_put_byindex(index); + pr_debug("release set %s\n", nfnl_set(inst, index)->name); + __ip_set_put_byindex(inst, index); cb->args[2] = 0; } out: @@ -1331,6 +1375,8 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; @@ -1338,6 +1384,7 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, struct netlink_dump_control c = { .dump = ip_set_dump_start, .done = ip_set_dump_done, + .data = (void *)inst }; return netlink_dump_start(ctnl, skb, nlh, &c); } @@ -1416,6 +1463,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; const struct nlattr *nla; @@ -1434,7 +1482,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_LINENO] == NULL)))) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1470,6 +1518,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; const struct nlattr *nla; @@ -1488,7 +1537,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_LINENO] == NULL)))) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1524,6 +1573,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; int ret = 0; @@ -1534,7 +1584,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, !flag_nested(attr[IPSET_ATTR_DATA]))) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1559,6 +1609,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); const struct ip_set *set; struct sk_buff *skb2; struct nlmsghdr *nlh2; @@ -1568,7 +1619,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_SETNAME] == NULL)) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1793,8 +1844,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) unsigned int *op; void *data; int copylen = *len, ret = 0; + struct net *net = sock_net(sk); + struct ip_set_net *inst = ip_set_pernet(net); - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (optval != SO_IP_SET) return -EBADF; @@ -1843,7 +1896,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; nfnl_lock(NFNL_SUBSYS_IPSET); - find_set_and_id(req_get->set.name, &id); + find_set_and_id(inst, req_get->set.name, &id); req_get->set.index = id; nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; @@ -1858,10 +1911,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; nfnl_lock(NFNL_SUBSYS_IPSET); - find_set_and_id(req_get->set.name, &id); + find_set_and_id(inst, req_get->set.name, &id); req_get->set.index = id; if (id != IPSET_INVALID_ID) - req_get->family = nfnl_set(id)->family; + req_get->family = nfnl_set(inst, id)->family; nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; } @@ -1870,12 +1923,12 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) struct ip_set *set; if (*len != sizeof(struct ip_set_req_get_set) || - req_get->set.index >= ip_set_max) { + req_get->set.index >= inst->ip_set_max) { ret = -EINVAL; goto done; } nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(req_get->set.index); + set = nfnl_set(inst, req_get->set.index); strncpy(req_get->set.name, set ? set->name : "", IPSET_MAXNAMELEN); nfnl_unlock(NFNL_SUBSYS_IPSET); @@ -1904,49 +1957,82 @@ static struct nf_sockopt_ops so_set __read_mostly = { .owner = THIS_MODULE, }; -static int __init -ip_set_init(void) +static int __net_init +ip_set_net_init(struct net *net) { + struct ip_set_net *inst = ip_set_pernet(net); + struct ip_set **list; - int ret; - if (max_sets) - ip_set_max = max_sets; - if (ip_set_max >= IPSET_INVALID_ID) - ip_set_max = IPSET_INVALID_ID - 1; + inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX; + if (inst->ip_set_max >= IPSET_INVALID_ID) + inst->ip_set_max = IPSET_INVALID_ID - 1; - list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL); + list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL); if (!list) return -ENOMEM; + inst->is_deleted = 0; + rcu_assign_pointer(inst->ip_set_list, list); + pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL); + return 0; +} + +static void __net_exit +ip_set_net_exit(struct net *net) +{ + struct ip_set_net *inst = ip_set_pernet(net); + + struct ip_set *set = NULL; + ip_set_id_t i; + + inst->is_deleted = 1; /* flag for ip_set_nfnl_put */ + + for (i = 0; i < inst->ip_set_max; i++) { + set = nfnl_set(inst, i); + if (set != NULL) + ip_set_destroy_set(inst, i); + } + kfree(rcu_dereference_protected(inst->ip_set_list, 1)); +} + +static struct pernet_operations ip_set_net_ops = { + .init = ip_set_net_init, + .exit = ip_set_net_exit, + .id = &ip_set_net_id, + .size = sizeof(struct ip_set_net) +}; + - rcu_assign_pointer(ip_set_list, list); - ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); +static int __init +ip_set_init(void) +{ + int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); if (ret != 0) { pr_err("ip_set: cannot register with nfnetlink.\n"); - kfree(list); return ret; } ret = nf_register_sockopt(&so_set); if (ret != 0) { pr_err("SO_SET registry failed: %d\n", ret); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - kfree(list); return ret; } - - pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL); + ret = register_pernet_subsys(&ip_set_net_ops); + if (ret) { + pr_err("ip_set: cannot register pernet_subsys.\n"); + nf_unregister_sockopt(&so_set); + nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + return ret; + } return 0; } static void __exit ip_set_fini(void) { - struct ip_set **list = rcu_dereference_protected(ip_set_list, 1); - - /* There can't be any existing set */ + unregister_pernet_subsys(&ip_set_net_ops); nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - kfree(list); pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index b4add206c603..6a80dbd30df7 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -1011,7 +1011,8 @@ static const struct ip_set_type_variant mtype_variant = { #ifdef IP_SET_EMIT_CREATE static int -IPSET_TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) +IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, + struct nlattr *tb[], u32 flags) { u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; u8 hbits; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index ba4232e451f4..ec6f6d15dded 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -39,6 +39,7 @@ struct set_adt_elem { struct list_set { u32 size; /* size of set list array */ struct timer_list gc; /* garbage collection */ + struct net *net; /* namespace */ struct set_elem members[0]; /* the set members */ }; @@ -171,7 +172,7 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, if (e->id != IPSET_INVALID_ID) { if (i == map->size - 1) { /* Last element replaced: e.g. add new,before,last */ - ip_set_put_byindex(e->id); + ip_set_put_byindex(map->net, e->id); ip_set_ext_destroy(set, e); } else { struct set_elem *x = list_set_elem(set, map, @@ -179,7 +180,7 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, /* Last element pushed off */ if (x->id != IPSET_INVALID_ID) { - ip_set_put_byindex(x->id); + ip_set_put_byindex(map->net, x->id); ip_set_ext_destroy(set, x); } memmove(list_set_elem(set, map, i + 1), e, @@ -205,7 +206,7 @@ list_set_del(struct ip_set *set, u32 i) struct list_set *map = set->data; struct set_elem *e = list_set_elem(set, map, i); - ip_set_put_byindex(e->id); + ip_set_put_byindex(map->net, e->id); ip_set_ext_destroy(set, e); if (i < map->size - 1) @@ -307,7 +308,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_COMMENT(set)) ip_set_init_comment(ext_comment(e, set), ext); /* Set is already added to the list */ - ip_set_put_byindex(d->id); + ip_set_put_byindex(map->net, d->id); return 0; } insert: @@ -366,6 +367,7 @@ static int list_set_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { + struct list_set *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct set_adt_elem e = { .refid = IPSET_INVALID_ID }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); @@ -385,7 +387,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; - e.id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s); + e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s); if (e.id == IPSET_INVALID_ID) return -IPSET_ERR_NAME; /* "Loop detection" */ @@ -405,7 +407,8 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], } if (tb[IPSET_ATTR_NAMEREF]) { - e.refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]), + e.refid = ip_set_get_byname(map->net, + nla_data(tb[IPSET_ATTR_NAMEREF]), &s); if (e.refid == IPSET_INVALID_ID) { ret = -IPSET_ERR_NAMEREF; @@ -421,9 +424,9 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], finish: if (e.refid != IPSET_INVALID_ID) - ip_set_put_byindex(e.refid); + ip_set_put_byindex(map->net, e.refid); if (adt != IPSET_ADD || ret) - ip_set_put_byindex(e.id); + ip_set_put_byindex(map->net, e.id); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -438,7 +441,7 @@ list_set_flush(struct ip_set *set) for (i = 0; i < map->size; i++) { e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { - ip_set_put_byindex(e->id); + ip_set_put_byindex(map->net, e->id); ip_set_ext_destroy(set, e); e->id = IPSET_INVALID_ID; } @@ -510,7 +513,7 @@ list_set_list(const struct ip_set *set, goto nla_put_failure; } if (nla_put_string(skb, IPSET_ATTR_NAME, - ip_set_name_byindex(e->id))) + ip_set_name_byindex(map->net, e->id))) goto nla_put_failure; if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; @@ -587,7 +590,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) /* Create list:set type of sets */ static bool -init_list_set(struct ip_set *set, u32 size) +init_list_set(struct net *net, struct ip_set *set, u32 size) { struct list_set *map; struct set_elem *e; @@ -598,6 +601,7 @@ init_list_set(struct ip_set *set, u32 size) return false; map->size = size; + map->net = net; set->data = map; for (i = 0; i < size; i++) { @@ -609,7 +613,8 @@ init_list_set(struct ip_set *set, u32 size) } static int -list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[], + u32 flags) { u32 size = IP_SET_LIST_DEFAULT_SIZE; @@ -625,7 +630,7 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) set->variant = &set_variant; set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem)); - if (!init_list_set(set, size)) + if (!init_list_set(net, set, size)) return -ENOMEM; if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 2095488684ff..e7c4e0e01ff5 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -81,7 +81,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) struct xt_set_info_match_v0 *info = par->matchinfo; ip_set_id_t index; - index = ip_set_nfnl_get_byindex(info->match_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find set indentified by id %u to match\n", @@ -91,7 +91,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) { pr_warning("Protocol error: set match dimension " "is over the limit!\n"); - ip_set_nfnl_put(info->match_set.index); + ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; } @@ -106,7 +106,7 @@ set_match_v0_destroy(const struct xt_mtdtor_param *par) { struct xt_set_info_match_v0 *info = par->matchinfo; - ip_set_nfnl_put(info->match_set.index); + ip_set_nfnl_put(par->net, info->match_set.index); } /* Revision 1 match */ @@ -131,7 +131,7 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par) struct xt_set_info_match_v1 *info = par->matchinfo; ip_set_id_t index; - index = ip_set_nfnl_get_byindex(info->match_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find set indentified by id %u to match\n", @@ -141,7 +141,7 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par) if (info->match_set.dim > IPSET_DIM_MAX) { pr_warning("Protocol error: set match dimension " "is over the limit!\n"); - ip_set_nfnl_put(info->match_set.index); + ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; } @@ -153,7 +153,7 @@ set_match_v1_destroy(const struct xt_mtdtor_param *par) { struct xt_set_info_match_v1 *info = par->matchinfo; - ip_set_nfnl_put(info->match_set.index); + ip_set_nfnl_put(par->net, info->match_set.index); } /* Revision 3 match */ @@ -228,7 +228,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) ip_set_id_t index; if (info->add_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->add_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find add_set index %u as target\n", info->add_set.index); @@ -237,12 +237,12 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) } if (info->del_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->del_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find del_set index %u as target\n", info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; } } @@ -251,9 +251,9 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) pr_warning("Protocol error: SET target dimension " "is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); return -ERANGE; } @@ -270,9 +270,9 @@ set_target_v0_destroy(const struct xt_tgdtor_param *par) const struct xt_set_info_target_v0 *info = par->targinfo; if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); } /* Revision 1 target */ @@ -301,7 +301,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) ip_set_id_t index; if (info->add_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->add_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find add_set index %u as target\n", info->add_set.index); @@ -310,12 +310,12 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) } if (info->del_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->del_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find del_set index %u as target\n", info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; } } @@ -324,9 +324,9 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) pr_warning("Protocol error: SET target dimension " "is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); return -ERANGE; } @@ -339,9 +339,9 @@ set_target_v1_destroy(const struct xt_tgdtor_param *par) const struct xt_set_info_target_v1 *info = par->targinfo; if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); } /* Revision 2 target */ -- cgit v1.2.3 From 7c3ad056ef79fd10f5f111c807ccbd9fa9068c7f Mon Sep 17 00:00:00 2001 From: Oliver Smith Date: Sat, 28 Sep 2013 20:33:23 +0200 Subject: netfilter: ipset: Add hash:net,port,net module to kernel. This adds a new set that provides similar functionality to ip,port,net but permits arbitrary size subnets for both the first and last parameter. Signed-off-by: Oliver Smith Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/Kconfig | 9 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_hash_netportnet.c | 588 +++++++++++++++++++++++++++ 3 files changed, 598 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_hash_netportnet.c (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 9119f658a69b..a2d6263b6c64 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -90,6 +90,15 @@ config IP_SET_HASH_IPPORTNET To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_NETPORTNET + tristate "hash:net,port,net set support" + depends on IP_SET + help + This option adds the hash:net,port,net set type support, by which + one can store two IPv4/IPv6 subnets, and a protocol/port in a set. + + To compile it as a module, choose M here. If unsure, say N. + config IP_SET_HASH_NET tristate "hash:net set support" depends on IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 43eef7a19e3c..44b2d38476fa 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o obj-$(CONFIG_IP_SET_HASH_NETNET) += ip_set_hash_netnet.o +obj-$(CONFIG_IP_SET_HASH_NETPORTNET) += ip_set_hash_netportnet.o # list types obj-$(CONFIG_IP_SET_LIST_SET) += ip_set_list_set.o diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c new file mode 100644 index 000000000000..363fab933d48 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -0,0 +1,588 @@ +/* Copyright (C) 2003-2013 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:ip,port,net type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 0 /* Comments support added */ + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Oliver Smith "); +IP_SET_MODULE_DESC("hash:net,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); +MODULE_ALIAS("ip_set_hash:net,port,net"); + +/* Type specific function prefix */ +#define HTYPE hash_netportnet +#define IP_SET_HASH_WITH_PROTO +#define IP_SET_HASH_WITH_NETS +#define IPSET_NET_COUNT 2 + +/* IPv4 variant */ + +/* Member elements */ +struct hash_netportnet4_elem { + union { + __be32 ip[2]; + __be64 ipcmp; + }; + __be16 port; + union { + u8 cidr[2]; + u16 ccmp; + }; + u8 nomatch:1; + u8 proto; +}; + +/* Common functions */ + +static inline bool +hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1, + const struct hash_netportnet4_elem *ip2, + u32 *multi) +{ + return ip1->ipcmp == ip2->ipcmp && + ip1->ccmp == ip2->ccmp && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline int +hash_netportnet4_do_data_match(const struct hash_netportnet4_elem *elem) +{ + return elem->nomatch ? -ENOTEMPTY : 1; +} + +static inline void +hash_netportnet4_data_set_flags(struct hash_netportnet4_elem *elem, u32 flags) +{ + elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags) +{ + swap(*flags, elem->nomatch); +} + +static inline void +hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem, + struct hash_netportnet4_elem *orig) +{ + elem->ip[1] = orig->ip[1]; +} + +static inline void +hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem, + u8 cidr, bool inner) +{ + if (inner) { + elem->ip[1] &= ip_set_netmask(cidr); + elem->cidr[1] = cidr; + } else { + elem->ip[0] &= ip_set_netmask(cidr); + elem->cidr[0] = cidr; + } +} + +static bool +hash_netportnet4_data_list(struct sk_buff *skb, + const struct hash_netportnet4_elem *data) +{ + u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; + + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip[0]) || + nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip[1]) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_netportnet4_data_next(struct hash_netportnet4_elem *next, + const struct hash_netportnet4_elem *d) +{ + next->ipcmp = d->ipcmp; + next->port = d->port; +} + +#define MTYPE hash_netportnet4 +#define PF 4 +#define HOST_MASK 32 +#include "ip_set_hash_gen.h" + +static int +hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_netportnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet4_elem e = { + .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK), + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + if (adt == IPSET_TEST) + e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; + + if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, + &e.port, &e.proto)) + return -EINVAL; + + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0]); + ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip[1]); + e.ip[0] &= ip_set_netmask(e.cidr[0]); + e.ip[1] &= ip_set_netmask(e.cidr[1]); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + const struct hash_netportnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet4_elem e = { .cidr[0] = HOST_MASK, + .cidr[1] = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to; + u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; + bool with_ports = false; + u8 cidr, cidr2; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || + ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) { + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!cidr || cidr > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + e.cidr[0] = cidr; + } + + if (tb[IPSET_ATTR_CIDR2]) { + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + if (!cidr || cidr > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + e.cidr[1] = cidr; + } + + if (tb[IPSET_ATTR_PORT]) + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(e.proto); + + if (e.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + if (!(with_ports || e.proto == IPPROTO_ICMP)) + e.port = 0; + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); + } + + with_ports = with_ports && tb[IPSET_ATTR_PORT_TO]; + if (adt == IPSET_TEST || + !(tb[IPSET_ATTR_IP_TO] || with_ports || tb[IPSET_ATTR_IP2_TO])) { + e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0])); + e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1])); + ret = adtfn(set, &e, &ext, &ext, flags); + return ip_set_enomatch(ret, flags, adt, set) ? -ret : + ip_set_eexist(ret, flags) ? 0 : ret; + } + + ip_to = ip; + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip > ip_to) + swap(ip, ip_to); + if (unlikely(ip + UINT_MAX == ip_to)) + return -IPSET_ERR_HASH_RANGE; + } + + port_to = port = ntohs(e.port); + if (tb[IPSET_ATTR_PORT_TO]) { + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + } + + ip2_to = ip2_from; + if (tb[IPSET_ATTR_IP2_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); + if (ret) + return ret; + if (ip2_from > ip2_to) + swap(ip2_from, ip2_to); + if (unlikely(ip2_from + UINT_MAX == ip2_to)) + return -IPSET_ERR_HASH_RANGE; + } + + if (retried) + ip = ntohl(h->next.ip[0]); + + while (!after(ip, ip_to)) { + e.ip[0] = htonl(ip); + ip_last = ip_set_range_to_cidr(ip, ip_to, &cidr); + e.cidr[0] = cidr; + p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port) + : port; + for (; p <= port_to; p++) { + e.port = htons(p); + ip2 = (retried && ip == ntohl(h->next.ip[0]) && + p == ntohs(h->next.port)) ? ntohl(h->next.ip[1]) + : ip2_from; + while (!after(ip2, ip2_to)) { + e.ip[1] = htonl(ip2); + ip2_last = ip_set_range_to_cidr(ip2, ip2_to, + &cidr2); + e.cidr[1] = cidr2; + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + ip2 = ip2_last + 1; + } + } + ip = ip_last + 1; + } + return ret; +} + +/* IPv6 variant */ + +struct hash_netportnet6_elem { + union nf_inet_addr ip[2]; + __be16 port; + union { + u8 cidr[2]; + u16 ccmp; + }; + u8 nomatch:1; + u8 proto; +}; + +/* Common functions */ + +static inline bool +hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1, + const struct hash_netportnet6_elem *ip2, + u32 *multi) +{ + return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) && + ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) && + ip1->ccmp == ip2->ccmp && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline int +hash_netportnet6_do_data_match(const struct hash_netportnet6_elem *elem) +{ + return elem->nomatch ? -ENOTEMPTY : 1; +} + +static inline void +hash_netportnet6_data_set_flags(struct hash_netportnet6_elem *elem, u32 flags) +{ + elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags) +{ + swap(*flags, elem->nomatch); +} + +static inline void +hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem, + struct hash_netportnet6_elem *orig) +{ + elem->ip[1] = orig->ip[1]; +} + +static inline void +hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem, + u8 cidr, bool inner) +{ + if (inner) { + ip6_netmask(&elem->ip[1], cidr); + elem->cidr[1] = cidr; + } else { + ip6_netmask(&elem->ip[0], cidr); + elem->cidr[0] = cidr; + } +} + +static bool +hash_netportnet6_data_list(struct sk_buff *skb, + const struct hash_netportnet6_elem *data) +{ + u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; + + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip[0].in6) || + nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip[1].in6) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_netportnet6_data_next(struct hash_netportnet4_elem *next, + const struct hash_netportnet6_elem *d) +{ + next->port = d->port; +} + +#undef MTYPE +#undef PF +#undef HOST_MASK + +#define MTYPE hash_netportnet6 +#define PF 6 +#define HOST_MASK 128 +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h" + +static int +hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_netportnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet6_elem e = { + .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK), + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + if (adt == IPSET_TEST) + e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK; + + if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, + &e.port, &e.proto)) + return -EINVAL; + + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip[1].in6); + ip6_netmask(&e.ip[0], e.cidr[0]); + ip6_netmask(&e.ip[1], e.cidr[1]); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + const struct hash_netportnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet6_elem e = { .cidr[0] = HOST_MASK, + .cidr[1] = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 port, port_to; + bool with_ports = false; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) || + ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (tb[IPSET_ATTR_CIDR2]) + e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + + if (unlikely(!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] || + e.cidr[1] > HOST_MASK)) + return -IPSET_ERR_INVALID_CIDR; + + ip6_netmask(&e.ip[0], e.cidr[0]); + ip6_netmask(&e.ip[1], e.cidr[1]); + + if (tb[IPSET_ATTR_PORT]) + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(e.proto); + + if (e.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + if (!(with_ports || e.proto == IPPROTO_ICMPV6)) + e.port = 0; + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); + } + + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { + ret = adtfn(set, &e, &ext, &ext, flags); + return ip_set_enomatch(ret, flags, adt, set) ? -ret : + ip_set_eexist(ret, flags) ? 0 : ret; + } + + port = ntohs(e.port); + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + + if (retried) + port = ntohs(h->next.port); + for (; port <= port_to; port++) { + e.port = htons(port); + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static struct ip_set_type hash_netportnet_type __read_mostly = { + .name = "hash:net,port,net", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2 | + IPSET_TYPE_NOMATCH, + .dimension = IPSET_DIM_THREE, + .family = NFPROTO_UNSPEC, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, + .create = hash_netportnet_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, + [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_netportnet_init(void) +{ + return ip_set_type_register(&hash_netportnet_type); +} + +static void __exit +hash_netportnet_fini(void) +{ + ip_set_type_unregister(&hash_netportnet_type); +} + +module_init(hash_netportnet_init); +module_exit(hash_netportnet_fini); -- cgit v1.2.3 From 7433268783eda21a04bf963f9992547a848d44a4 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 23 Sep 2013 19:20:55 +0800 Subject: netfilter: nfnetlink_queue: use proper net namespace to allocate skb Use proper net struct to allocate skb, otherwise netlink mmap will have no effect. Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index ae2e5c11d01a..21258cf70091 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -298,7 +298,7 @@ nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, } static struct sk_buff * -nfqnl_build_packet_message(struct nfqnl_instance *queue, +nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry, __be32 **packet_id_ptr) { @@ -372,7 +372,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (queue->flags & NFQA_CFG_F_CONNTRACK) ct = nfqnl_ct_get(entskb, &size, &ctinfo); - skb = nfnetlink_alloc_skb(&init_net, size, queue->peer_portid, + skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, GFP_ATOMIC); if (!skb) return NULL; @@ -525,7 +525,7 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, __be32 *packet_id_ptr; int failopen = 0; - nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); + nskb = nfqnl_build_packet_message(net, queue, entry, &packet_id_ptr); if (nskb == NULL) { err = -ENOMEM; goto err_out; -- cgit v1.2.3 From afff14f6089ac50f05f29e489c5b2a067077d560 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 23 Sep 2013 19:20:56 +0800 Subject: netfilter: nfnetlink_log: use proper net to allocate skb Use proper net struct to allocate skb, otherwise netlink mmap will be of no effect. Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index d92cc317bf8b..3c4b69e5fe17 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -319,7 +319,8 @@ nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags) } static struct sk_buff * -nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size) +nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, + unsigned int pkt_size) { struct sk_buff *skb; unsigned int n; @@ -328,13 +329,13 @@ nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size) * message. WARNING: has to be <= 128k due to slab restrictions */ n = max(inst_size, pkt_size); - skb = nfnetlink_alloc_skb(&init_net, n, peer_portid, GFP_ATOMIC); + skb = nfnetlink_alloc_skb(net, n, peer_portid, GFP_ATOMIC); if (!skb) { if (n > pkt_size) { /* try to allocate only as much as we need for current * packet */ - skb = nfnetlink_alloc_skb(&init_net, pkt_size, + skb = nfnetlink_alloc_skb(net, pkt_size, peer_portid, GFP_ATOMIC); if (!skb) pr_err("nfnetlink_log: can't even alloc %u bytes\n", @@ -702,8 +703,8 @@ nfulnl_log_packet(struct net *net, } if (!inst->skb) { - inst->skb = nfulnl_alloc_skb(inst->peer_portid, inst->nlbufsiz, - size); + inst->skb = nfulnl_alloc_skb(net, inst->peer_portid, + inst->nlbufsiz, size); if (!inst->skb) goto alloc_failure; } -- cgit v1.2.3 From 180cf72f56fab2810e00497c087c7126bfe53c85 Mon Sep 17 00:00:00 2001 From: "holger@eitzenberger.org" Date: Mon, 30 Sep 2013 17:07:28 +0200 Subject: netfilter: nf_ct_sip: consolidate NAT hook functions There are currently seven different NAT hooks used in both nf_conntrack_sip and nf_nat_sip, each of the hooks is exported in nf_conntrack_sip, then set from the nf_nat_sip NAT helper. And because each of them is exported there is quite some overhead introduced due of this. By introducing nf_nat_sip_hooks I am able to reduce both text/data somewhat. For nf_conntrack_sip e. g. I get text data bss dec old 15243 5256 32 20531 new 15010 5192 32 20234 Signed-off-by: Holger Eitzenberger Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 127 +++++++++++---------------------------- net/netfilter/nf_nat_sip.c | 35 +++++------ 2 files changed, 49 insertions(+), 113 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 5ed8c441dffd..466410eaa482 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -52,66 +52,8 @@ module_param(sip_direct_media, int, 0600); MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling " "endpoints only (default 1)"); -unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, const char **dptr, - unsigned int *datalen) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sip_hook); - -void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, unsigned int protoff, - s16 off) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook); - -unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - struct nf_conntrack_expect *exp, - unsigned int matchoff, - unsigned int matchlen) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook); - -unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int sdpoff, - enum sdp_header_types type, - enum sdp_header_types term, - const union nf_inet_addr *addr) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook); - -unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int matchoff, - unsigned int matchlen, - u_int16_t port) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook); - -unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int sdpoff, - const union nf_inet_addr *addr) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook); - -unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - struct nf_conntrack_expect *rtp_exp, - struct nf_conntrack_expect *rtcp_exp, - unsigned int mediaoff, - unsigned int medialen, - union nf_inet_addr *rtp_addr) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_media_hook); +const struct nf_nat_sip_hooks *nf_nat_sip_hooks; +EXPORT_SYMBOL_GPL(nf_nat_sip_hooks); static int string_len(const struct nf_conn *ct, const char *dptr, const char *limit, int *shift) @@ -914,8 +856,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, int direct_rtp = 0, skip_expect = 0, ret = NF_DROP; u_int16_t base_port; __be16 rtp_port, rtcp_port; - typeof(nf_nat_sdp_port_hook) nf_nat_sdp_port; - typeof(nf_nat_sdp_media_hook) nf_nat_sdp_media; + const struct nf_nat_sip_hooks *hooks; saddr = NULL; if (sip_direct_media) { @@ -972,9 +913,9 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, rtcp_port = htons(base_port + 1); if (direct_rtp) { - nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook); - if (nf_nat_sdp_port && - !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen, + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && + !hooks->sdp_port(skb, protoff, dataoff, dptr, datalen, mediaoff, medialen, ntohs(rtp_port))) goto err1; } @@ -996,10 +937,10 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, nf_ct_expect_init(rtcp_exp, class, nf_ct_l3num(ct), saddr, daddr, IPPROTO_UDP, NULL, &rtcp_port); - nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook); - if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp) - ret = nf_nat_sdp_media(skb, protoff, dataoff, dptr, datalen, - rtp_exp, rtcp_exp, + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && ct->status & IPS_NAT_MASK && !direct_rtp) + ret = hooks->sdp_media(skb, protoff, dataoff, dptr, + datalen, rtp_exp, rtcp_exp, mediaoff, medialen, daddr); else { if (nf_ct_expect_related(rtp_exp) == 0) { @@ -1053,13 +994,12 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, unsigned int caddr_len, maddr_len; unsigned int i; union nf_inet_addr caddr, maddr, rtp_addr; + const struct nf_nat_sip_hooks *hooks; unsigned int port; const struct sdp_media_type *t; int ret = NF_ACCEPT; - typeof(nf_nat_sdp_addr_hook) nf_nat_sdp_addr; - typeof(nf_nat_sdp_session_hook) nf_nat_sdp_session; - nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook); + hooks = rcu_dereference(nf_nat_sip_hooks); /* Find beginning of session description */ if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen, @@ -1127,10 +1067,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, } /* Update media connection address if present */ - if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) { - ret = nf_nat_sdp_addr(skb, protoff, dataoff, + if (maddr_len && hooks && ct->status & IPS_NAT_MASK) { + ret = hooks->sdp_addr(skb, protoff, dataoff, dptr, datalen, mediaoff, - SDP_HDR_CONNECTION, SDP_HDR_MEDIA, + SDP_HDR_CONNECTION, + SDP_HDR_MEDIA, &rtp_addr); if (ret != NF_ACCEPT) { nf_ct_helper_log(skb, ct, "cannot mangle SDP"); @@ -1141,10 +1082,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, } /* Update session connection and owner addresses */ - nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook); - if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp_session(skb, protoff, dataoff, - dptr, datalen, sdpoff, &rtp_addr); + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && ct->status & IPS_NAT_MASK) + ret = hooks->sdp_session(skb, protoff, dataoff, + dptr, datalen, sdpoff, + &rtp_addr); return ret; } @@ -1244,11 +1186,11 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, unsigned int matchoff, matchlen; struct nf_conntrack_expect *exp; union nf_inet_addr *saddr, daddr; + const struct nf_nat_sip_hooks *hooks; __be16 port; u8 proto; unsigned int expires = 0; int ret; - typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect; /* Expected connections can not register again. */ if (ct->status & IPS_EXPECTED) @@ -1311,10 +1253,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, exp->helper = nfct_help(ct)->helper; exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; - nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook); - if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK) - ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen, - exp, matchoff, matchlen); + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && ct->status & IPS_NAT_MASK) + ret = hooks->expect(skb, protoff, dataoff, dptr, datalen, + exp, matchoff, matchlen); else { if (nf_ct_expect_related(exp) != 0) { nf_ct_helper_log(skb, ct, "cannot add expectation"); @@ -1517,7 +1459,7 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, unsigned int protoff, unsigned int dataoff, const char **dptr, unsigned int *datalen) { - typeof(nf_nat_sip_hook) nf_nat_sip; + const struct nf_nat_sip_hooks *hooks; int ret; if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) @@ -1526,9 +1468,9 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, ret = process_sip_response(skb, protoff, dataoff, dptr, datalen); if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { - nf_nat_sip = rcu_dereference(nf_nat_sip_hook); - if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff, - dptr, datalen)) { + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && !hooks->msg(skb, protoff, dataoff, + dptr, datalen)) { nf_ct_helper_log(skb, ct, "cannot NAT SIP message"); ret = NF_DROP; } @@ -1548,7 +1490,6 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, s16 diff, tdiff = 0; int ret = NF_ACCEPT; bool term; - typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) @@ -1612,9 +1553,11 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, } if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { - nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook); - if (nf_nat_sip_seq_adjust) - nf_nat_sip_seq_adjust(skb, protoff, tdiff); + const struct nf_nat_sip_hooks *hooks; + + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks) + hooks->seq_adjust(skb, protoff, tdiff); } return ret; diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index f9790405b7ff..b4d691db955e 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -625,33 +625,26 @@ static struct nf_ct_helper_expectfn sip_nat = { static void __exit nf_nat_sip_fini(void) { - RCU_INIT_POINTER(nf_nat_sip_hook, NULL); - RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL); - RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL); + RCU_INIT_POINTER(nf_nat_sip_hooks, NULL); + nf_ct_helper_expectfn_unregister(&sip_nat); synchronize_rcu(); } +static const struct nf_nat_sip_hooks sip_hooks = { + .msg = nf_nat_sip, + .seq_adjust = nf_nat_sip_seq_adjust, + .expect = nf_nat_sip_expect, + .sdp_addr = nf_nat_sdp_addr, + .sdp_port = nf_nat_sdp_port, + .sdp_session = nf_nat_sdp_session, + .sdp_media = nf_nat_sdp_media, +}; + static int __init nf_nat_sip_init(void) { - BUG_ON(nf_nat_sip_hook != NULL); - BUG_ON(nf_nat_sip_seq_adjust_hook != NULL); - BUG_ON(nf_nat_sip_expect_hook != NULL); - BUG_ON(nf_nat_sdp_addr_hook != NULL); - BUG_ON(nf_nat_sdp_port_hook != NULL); - BUG_ON(nf_nat_sdp_session_hook != NULL); - BUG_ON(nf_nat_sdp_media_hook != NULL); - RCU_INIT_POINTER(nf_nat_sip_hook, nf_nat_sip); - RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, nf_nat_sip_seq_adjust); - RCU_INIT_POINTER(nf_nat_sip_expect_hook, nf_nat_sip_expect); - RCU_INIT_POINTER(nf_nat_sdp_addr_hook, nf_nat_sdp_addr); - RCU_INIT_POINTER(nf_nat_sdp_port_hook, nf_nat_sdp_port); - RCU_INIT_POINTER(nf_nat_sdp_session_hook, nf_nat_sdp_session); - RCU_INIT_POINTER(nf_nat_sdp_media_hook, nf_nat_sdp_media); + BUG_ON(nf_nat_sip_hooks != NULL); + RCU_INIT_POINTER(nf_nat_sip_hooks, &sip_hooks); nf_ct_helper_expectfn_register(&sip_nat); return 0; } -- cgit v1.2.3 From 91cb498e6a34b429a032f8cfbb57dde28cd20e0c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 4 Sep 2013 20:57:48 +0200 Subject: netfilter: cttimeout: allow to set/get default protocol timeouts Default timeouts are currently set via proc/sysctl interface, the typical pattern is a file name like: /proc/sys/net/netfilter/nf_conntrack_PROTOCOL_timeout_STATE This results in one entry per default protocol state timeout. This patch simplifies this by allowing to set default protocol timeouts via cttimeout netlink interface. This should allow us to get rid of the existing proc/sysctl code in the midterm. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cttimeout.c | 161 ++++++++++++++++++++++++++++++++++-- 1 file changed, 153 insertions(+), 8 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 50580494148d..476accd17145 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -49,10 +49,8 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { }; static int -ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, - struct nf_conntrack_l4proto *l4proto, - struct net *net, - const struct nlattr *attr) +ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto, + struct net *net, const struct nlattr *attr) { int ret = 0; @@ -64,8 +62,7 @@ ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, if (ret < 0) return ret; - ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, - &timeout->data); + ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts); } return ret; } @@ -123,7 +120,8 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, goto err_proto_put; } - ret = ctnl_timeout_parse_policy(matching, l4proto, net, + ret = ctnl_timeout_parse_policy(&matching->data, + l4proto, net, cda[CTA_TIMEOUT_DATA]); return ret; } @@ -138,7 +136,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, goto err_proto_put; } - ret = ctnl_timeout_parse_policy(timeout, l4proto, net, + ret = ctnl_timeout_parse_policy(&timeout->data, l4proto, net, cda[CTA_TIMEOUT_DATA]); if (ret < 0) goto err; @@ -342,6 +340,147 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, return ret; } +static int +cttimeout_default_set(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + __u16 l3num; + __u8 l4num; + struct nf_conntrack_l4proto *l4proto; + struct net *net = sock_net(skb->sk); + unsigned int *timeouts; + int ret; + + if (!cda[CTA_TIMEOUT_L3PROTO] || + !cda[CTA_TIMEOUT_L4PROTO] || + !cda[CTA_TIMEOUT_DATA]) + return -EINVAL; + + l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); + l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); + l4proto = nf_ct_l4proto_find_get(l3num, l4num); + + /* This protocol is not supported, skip. */ + if (l4proto->l4proto != l4num) { + ret = -EOPNOTSUPP; + goto err; + } + + timeouts = l4proto->get_timeouts(net); + + ret = ctnl_timeout_parse_policy(timeouts, l4proto, net, + cda[CTA_TIMEOUT_DATA]); + if (ret < 0) + goto err; + + nf_ct_l4proto_put(l4proto); + return 0; +err: + nf_ct_l4proto_put(l4proto); + return ret; +} + +static int +cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, + u32 seq, u32 type, int event, + struct nf_conntrack_l4proto *l4proto) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = portid ? NLM_F_MULTI : 0; + + event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) || + nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto)) + goto nla_put_failure; + + if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { + struct nlattr *nest_parms; + unsigned int *timeouts = l4proto->get_timeouts(net); + int ret; + + nest_parms = nla_nest_start(skb, + CTA_TIMEOUT_DATA | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + + ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts); + if (ret < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest_parms); + } + + nlmsg_end(skb, nlh); + return skb->len; + +nlmsg_failure: +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int cttimeout_default_get(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + __u16 l3num; + __u8 l4num; + struct nf_conntrack_l4proto *l4proto; + struct net *net = sock_net(skb->sk); + struct sk_buff *skb2; + int ret, err; + + if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO]) + return -EINVAL; + + l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); + l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); + l4proto = nf_ct_l4proto_find_get(l3num, l4num); + + /* This protocol is not supported, skip. */ + if (l4proto->l4proto != l4num) { + err = -EOPNOTSUPP; + goto err; + } + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + err = -ENOMEM; + goto err; + } + + ret = cttimeout_default_fill_info(net, skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + IPCTNL_MSG_TIMEOUT_DEFAULT_SET, + l4proto); + if (ret <= 0) { + kfree_skb(skb2); + err = -ENOMEM; + goto err; + } + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); + if (ret > 0) + ret = 0; + + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; +err: + nf_ct_l4proto_put(l4proto); + return err; +} + #ifdef CONFIG_NF_CONNTRACK_TIMEOUT static struct ctnl_timeout *ctnl_timeout_find_get(const char *name) { @@ -384,6 +523,12 @@ static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = { [IPCTNL_MSG_TIMEOUT_DELETE] = { .call = cttimeout_del_timeout, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, + [IPCTNL_MSG_TIMEOUT_DEFAULT_SET]= { .call = cttimeout_default_set, + .attr_count = CTA_TIMEOUT_MAX, + .policy = cttimeout_nla_policy }, + [IPCTNL_MSG_TIMEOUT_DEFAULT_GET]= { .call = cttimeout_default_get, + .attr_count = CTA_TIMEOUT_MAX, + .policy = cttimeout_nla_policy }, }; static const struct nfnetlink_subsystem cttimeout_subsys = { -- cgit v1.2.3 From efe4208f47f907b86f528788da711e8ab9dea44d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Oct 2013 15:42:29 -0700 Subject: ipv6: make lookups simpler and faster TCP listener refactoring, part 4 : To speed up inet lookups, we moved IPv4 addresses from inet to struct sock_common Now is time to do the same for IPv6, because it permits us to have fast lookups for all kind of sockets, including upcoming SYN_RECV. Getting IPv6 addresses in TCP lookups currently requires two extra cache lines, plus a dereference (and memory stall). inet6_sk(sk) does the dereference of inet_sk(__sk)->pinet6 This patch is way bigger than its IPv4 counter part, because for IPv4, we could add aliases (inet_daddr, inet_rcv_saddr), while on IPv6, it's not doable easily. inet6_sk(sk)->daddr becomes sk->sk_v6_daddr inet6_sk(sk)->rcv_saddr becomes sk->sk_v6_rcv_saddr And timewait socket also have tw->tw_v6_daddr & tw->tw_v6_rcv_saddr at the same offset. We get rid of INET6_TW_MATCH() as INET6_MATCH() is now the generic macro. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/netfilter/xt_TPROXY.c | 2 +- net/netfilter/xt_socket.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 5d8a3a3cd5a7..ef8a926752a9 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -200,7 +200,7 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, in->ifindex); if (sk) { int connected = (sk->sk_state == TCP_ESTABLISHED); - int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr); + int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr); /* NOTE: we return listeners even if bound to * 0.0.0.0, those are filtered out in diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 06df2b9110f5..3dd0e374bc2b 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -370,7 +370,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) */ wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && sk->sk_state != TCP_TIME_WAIT && - ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)); + ipv6_addr_any(&sk->sk_v6_rcv_saddr)); /* Ignore non-transparent sockets, if XT_SOCKET_TRANSPARENT is used */ -- cgit v1.2.3 From 795aa6ef6a1aba99050735eadd0c2341b789b53b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Oct 2013 09:21:55 +0200 Subject: netfilter: pass hook ops to hookfn Pass the hook ops to the hookfn to allow for generic hook functions. This change is required by nf_tables. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/core.c | 2 +- net/netfilter/ipvs/ip_vs_core.c | 42 ++++++++++++++++++++--------------------- 2 files changed, 22 insertions(+), 22 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 593b16ea45e0..1fbab0cdd302 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -146,7 +146,7 @@ unsigned int nf_iterate(struct list_head *head, /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ repeat: - verdict = (*elemp)->hook(hook, skb, indev, outdev, okfn); + verdict = (*elemp)->hook(*elemp, skb, indev, outdev, okfn); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 74fd00c27210..34fda62f40f6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1239,11 +1239,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET); + return ip_vs_out(ops->hooknum, skb, AF_INET); } /* @@ -1251,11 +1251,11 @@ ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET); + return ip_vs_out(ops->hooknum, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1266,11 +1266,11 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET6); + return ip_vs_out(ops->hooknum, skb, AF_INET6); } /* @@ -1278,11 +1278,11 @@ ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_out(hooknum, skb, AF_INET6); + return ip_vs_out(ops->hooknum, skb, AF_INET6); } #endif @@ -1733,12 +1733,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) * Schedule and forward packets from remote clients */ static unsigned int -ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET); + return ip_vs_in(ops->hooknum, skb, AF_INET); } /* @@ -1746,11 +1746,11 @@ ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb, * Schedule and forward packets from local clients */ static unsigned int -ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET); + return ip_vs_in(ops->hooknum, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1760,7 +1760,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, * Copy info from first fragment, to the rest of them. */ static unsigned int -ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -1792,12 +1792,12 @@ ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, * Schedule and forward packets from remote clients */ static unsigned int -ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET6); + return ip_vs_in(ops->hooknum, skb, AF_INET6); } /* @@ -1805,11 +1805,11 @@ ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb, * Schedule and forward packets from local clients */ static unsigned int -ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip_vs_in(hooknum, skb, AF_INET6); + return ip_vs_in(ops->hooknum, skb, AF_INET6); } #endif @@ -1825,7 +1825,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb, * and send them to ip_vs_in_icmp. */ static unsigned int -ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, +ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { @@ -1842,12 +1842,12 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp(skb, &r, hooknum); + return ip_vs_in_icmp(skb, &r, ops->hooknum); } #ifdef CONFIG_IP_VS_IPV6 static unsigned int -ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, +ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { @@ -1866,7 +1866,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr); + return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr); } #endif -- cgit v1.2.3 From f59cb0453cd885736daa11ae2445982c5ab2fc83 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Oct 2013 10:57:04 +0200 Subject: netfilter: nf_nat: move alloc_null_binding to nf_nat_core.c Similar to nat_decode_session, alloc_null_binding is needed for both ip_tables and nf_tables, so move it to nf_nat_core.c. This change is required by nf_tables. This is an adapted version of the original patch from Patrick McHardy. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 6f0f4f7f68a5..63a815402211 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -432,6 +432,26 @@ nf_nat_setup_info(struct nf_conn *ct, } EXPORT_SYMBOL(nf_nat_setup_info); +unsigned int +nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +{ + /* Force range to this IP; let proto decide mapping for + * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + * Use reply in case it's already been mangled (eg local packet). + */ + union nf_inet_addr ip = + (HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 : + ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3); + struct nf_nat_range range = { + .flags = NF_NAT_RANGE_MAP_IPS, + .min_addr = ip, + .max_addr = ip, + }; + return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); +} +EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding); + /* Do packet manipulations according to nf_nat_setup_info. */ unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, -- cgit v1.2.3 From 96518518cc417bb0a8c80b9fb736202e28acdf96 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 14 Oct 2013 11:00:02 +0200 Subject: netfilter: add nftables This patch adds nftables which is the intended successor of iptables. This packet filtering framework reuses the existing netfilter hooks, the connection tracking system, the NAT subsystem, the transparent proxying engine, the logging infrastructure and the userspace packet queueing facilities. In a nutshell, nftables provides a pseudo-state machine with 4 general purpose registers of 128 bits and 1 specific purpose register to store verdicts. This pseudo-machine comes with an extensible instruction set, a.k.a. "expressions" in the nftables jargon. The expressions included in this patch provide the basic functionality, they are: * bitwise: to perform bitwise operations. * byteorder: to change from host/network endianess. * cmp: to compare data with the content of the registers. * counter: to enable counters on rules. * ct: to store conntrack keys into register. * exthdr: to match IPv6 extension headers. * immediate: to load data into registers. * limit: to limit matching based on packet rate. * log: to log packets. * meta: to match metainformation that usually comes with the skbuff. * nat: to perform Network Address Translation. * payload: to fetch data from the packet payload and store it into registers. * reject (IPv4 only): to explicitly close connection, eg. TCP RST. Using this instruction-set, the userspace utility 'nft' can transform the rules expressed in human-readable text representation (using a new syntax, inspired by tcpdump) to nftables bytecode. nftables also inherits the table, chain and rule objects from iptables, but in a more configurable way, and it also includes the original datatype-agnostic set infrastructure with mapping support. This set infrastructure is enhanced in the follow up patch (netfilter: nf_tables: add netlink set API). This patch includes the following components: * the netlink API: net/netfilter/nf_tables_api.c and include/uapi/netfilter/nf_tables.h * the packet filter core: net/netfilter/nf_tables_core.c * the expressions (described above): net/netfilter/nft_*.c * the filter tables: arp, IPv4, IPv6 and bridge: net/ipv4/netfilter/nf_tables_ipv4.c net/ipv6/netfilter/nf_tables_ipv6.c net/ipv4/netfilter/nf_tables_arp.c net/bridge/netfilter/nf_tables_bridge.c * the NAT table (IPv4 only): net/ipv4/netfilter/nf_table_nat_ipv4.c * the route table (similar to mangle): net/ipv4/netfilter/nf_table_route_ipv4.c net/ipv6/netfilter/nf_table_route_ipv6.c * internal definitions under: include/net/netfilter/nf_tables.h include/net/netfilter/nf_tables_core.h * It also includes an skeleton expression: net/netfilter/nft_expr_template.c and the preliminary implementation of the meta target net/netfilter/nft_meta_target.c It also includes a change in struct nf_hook_ops to add a new pointer to store private data to the hook, that is used to store the rule list per chain. This patch is based on the patch from Patrick McHardy, plus merged accumulated cleanups, fixes and small enhancements to the nftables code that has been done since 2009, which are: From Patrick McHardy: * nf_tables: adjust netlink handler function signatures * nf_tables: only retry table lookup after successful table module load * nf_tables: fix event notification echo and avoid unnecessary messages * nft_ct: add l3proto support * nf_tables: pass expression context to nft_validate_data_load() * nf_tables: remove redundant definition * nft_ct: fix maxattr initialization * nf_tables: fix invalid event type in nf_tables_getrule() * nf_tables: simplify nft_data_init() usage * nf_tables: build in more core modules * nf_tables: fix double lookup expression unregistation * nf_tables: move expression initialization to nf_tables_core.c * nf_tables: build in payload module * nf_tables: use NFPROTO constants * nf_tables: rename pid variables to portid * nf_tables: save 48 bits per rule * nf_tables: introduce chain rename * nf_tables: check for duplicate names on chain rename * nf_tables: remove ability to specify handles for new rules * nf_tables: return error for rule change request * nf_tables: return error for NLM_F_REPLACE without rule handle * nf_tables: include NLM_F_APPEND/NLM_F_REPLACE flags in rule notification * nf_tables: fix NLM_F_MULTI usage in netlink notifications * nf_tables: include NLM_F_APPEND in rule dumps From Pablo Neira Ayuso: * nf_tables: fix stack overflow in nf_tables_newrule * nf_tables: nft_ct: fix compilation warning * nf_tables: nft_ct: fix crash with invalid packets * nft_log: group and qthreshold are 2^16 * nf_tables: nft_meta: fix socket uid,gid handling * nft_counter: allow to restore counters * nf_tables: fix module autoload * nf_tables: allow to remove all rules placed in one chain * nf_tables: use 64-bits rule handle instead of 16-bits * nf_tables: fix chain after rule deletion * nf_tables: improve deletion performance * nf_tables: add missing code in route chain type * nf_tables: rise maximum number of expressions from 12 to 128 * nf_tables: don't delete table if in use * nf_tables: fix basechain release From Tomasz Bursztyka: * nf_tables: Add support for changing users chain's name * nf_tables: Change chain's name to be fixed sized * nf_tables: Add support for replacing a rule by another one * nf_tables: Update uapi nftables netlink header documentation From Florian Westphal: * nft_log: group is u16, snaplen u32 From Phil Oester: * nf_tables: operational limit match Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 37 + net/netfilter/Makefile | 16 + net/netfilter/nf_tables_api.c | 1760 +++++++++++++++++++++++++++++++++++++ net/netfilter/nf_tables_core.c | 152 ++++ net/netfilter/nft_bitwise.c | 140 +++ net/netfilter/nft_byteorder.c | 167 ++++ net/netfilter/nft_cmp.c | 146 +++ net/netfilter/nft_counter.c | 107 +++ net/netfilter/nft_ct.c | 252 ++++++ net/netfilter/nft_expr_template.c | 88 ++ net/netfilter/nft_exthdr.c | 127 +++ net/netfilter/nft_hash.c | 348 ++++++++ net/netfilter/nft_immediate.c | 113 +++ net/netfilter/nft_limit.c | 113 +++ net/netfilter/nft_log.c | 140 +++ net/netfilter/nft_meta.c | 222 +++++ net/netfilter/nft_meta_target.c | 117 +++ net/netfilter/nft_payload.c | 137 +++ net/netfilter/nft_set.c | 381 ++++++++ 19 files changed, 4563 insertions(+) create mode 100644 net/netfilter/nf_tables_api.c create mode 100644 net/netfilter/nf_tables_core.c create mode 100644 net/netfilter/nft_bitwise.c create mode 100644 net/netfilter/nft_byteorder.c create mode 100644 net/netfilter/nft_cmp.c create mode 100644 net/netfilter/nft_counter.c create mode 100644 net/netfilter/nft_ct.c create mode 100644 net/netfilter/nft_expr_template.c create mode 100644 net/netfilter/nft_exthdr.c create mode 100644 net/netfilter/nft_hash.c create mode 100644 net/netfilter/nft_immediate.c create mode 100644 net/netfilter/nft_limit.c create mode 100644 net/netfilter/nft_log.c create mode 100644 net/netfilter/nft_meta.c create mode 100644 net/netfilter/nft_meta_target.c create mode 100644 net/netfilter/nft_payload.c create mode 100644 net/netfilter/nft_set.c (limited to 'net/netfilter') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 6e839b6dff2b..c271e1af93b5 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -413,6 +413,43 @@ config NETFILTER_SYNPROXY endif # NF_CONNTRACK +config NF_TABLES + depends on NETFILTER_NETLINK + tristate "Netfilter nf_tables support" + +config NFT_EXTHDR + depends on NF_TABLES + tristate "Netfilter nf_tables IPv6 exthdr module" + +config NFT_META + depends on NF_TABLES + tristate "Netfilter nf_tables meta module" + +config NFT_CT + depends on NF_TABLES + depends on NF_CONNTRACK + tristate "Netfilter nf_tables conntrack module" + +config NFT_SET + depends on NF_TABLES + tristate "Netfilter nf_tables set module" + +config NFT_HASH + depends on NF_TABLES + tristate "Netfilter nf_tables hash module" + +config NFT_COUNTER + depends on NF_TABLES + tristate "Netfilter nf_tables counter module" + +config NFT_LOG + depends on NF_TABLES + tristate "Netfilter nf_tables log module" + +config NFT_LIMIT + depends on NF_TABLES + tristate "Netfilter nf_tables limit module" + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index c3a0a12907f6..1ca3f3932826 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -64,6 +64,22 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o # SYNPROXY obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o +# nf_tables +nf_tables-objs += nf_tables_core.o nf_tables_api.o +nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o +nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o + +obj-$(CONFIG_NF_TABLES) += nf_tables.o +obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o +obj-$(CONFIG_NFT_META) += nft_meta.o +obj-$(CONFIG_NFT_CT) += nft_ct.o +obj-$(CONFIG_NFT_LIMIT) += nft_limit.o +#nf_tables-objs += nft_meta_target.o +obj-$(CONFIG_NFT_SET) += nft_set.o +obj-$(CONFIG_NFT_HASH) += nft_hash.o +obj-$(CONFIG_NFT_COUNTER) += nft_counter.o +obj-$(CONFIG_NFT_LOG) += nft_log.o + # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c new file mode 100644 index 000000000000..7d59c89c6c75 --- /dev/null +++ b/net/netfilter/nf_tables_api.c @@ -0,0 +1,1760 @@ +/* + * Copyright (c) 2007, 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static LIST_HEAD(nf_tables_afinfo); +static LIST_HEAD(nf_tables_expressions); + +/** + * nft_register_afinfo - register nf_tables address family info + * + * @afi: address family info to register + * + * Register the address family for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_afinfo(struct nft_af_info *afi) +{ + INIT_LIST_HEAD(&afi->tables); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail(&afi->list, &nf_tables_afinfo); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_afinfo); + +/** + * nft_unregister_afinfo - unregister nf_tables address family info + * + * @afi: address family info to unregister + * + * Unregister the address family for use with nf_tables. + */ +void nft_unregister_afinfo(struct nft_af_info *afi) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&afi->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_afinfo); + +static struct nft_af_info *nft_afinfo_lookup(int family) +{ + struct nft_af_info *afi; + + list_for_each_entry(afi, &nf_tables_afinfo, list) { + if (afi->family == family) + return afi; + } + return NULL; +} + +static struct nft_af_info *nf_tables_afinfo_lookup(int family, bool autoload) +{ + struct nft_af_info *afi; + + afi = nft_afinfo_lookup(family); + if (afi != NULL) + return afi; +#ifdef CONFIG_MODULES + if (autoload) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-afinfo-%u", family); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + afi = nft_afinfo_lookup(family); + if (afi != NULL) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-EAFNOSUPPORT); +} + +/* + * Tables + */ + +static struct nft_table *nft_table_lookup(const struct nft_af_info *afi, + const struct nlattr *nla) +{ + struct nft_table *table; + + list_for_each_entry(table, &afi->tables, list) { + if (!nla_strcmp(nla, table->name)) + return table; + } + return NULL; +} + +static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, + const struct nlattr *nla, + bool autoload) +{ + struct nft_table *table; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + table = nft_table_lookup(afi, nla); + if (table != NULL) + return table; + +#ifdef CONFIG_MODULES + if (autoload) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-table-%u-%*.s", afi->family, + nla_len(nla)-1, (const char *)nla_data(nla)); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (nft_table_lookup(afi, nla)) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-ENOENT); +} + +static inline u64 nf_tables_alloc_handle(struct nft_table *table) +{ + return ++table->hgenerator; +} + +static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { + [NFTA_TABLE_NAME] = { .type = NLA_STRING }, +}; + +static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, + int event, u32 flags, int family, + const struct nft_table *table) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_TABLE_NAME, table->name)) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_table_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + int event, int family) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + struct net *net = oskb ? sock_net(oskb->sk) : &init_net; + bool report; + int err; + + report = nlh ? nlmsg_report(nlh) : false; + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_table_info(skb, portid, seq, event, 0, + family, table); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_tables(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + unsigned int idx = 0, s_idx = cb->args[0]; + int family = nfmsg->nfgen_family; + + list_for_each_entry(afi, &nf_tables_afinfo, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry(table, &afi->tables, list) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_table_info(skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWTABLE, + NLM_F_MULTI, + afi->family, table) < 0) + goto done; +cont: + idx++; + } + } +done: + cb->args[0] = idx; + return skb->len; +} + +static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + struct sk_buff *skb2; + int family = nfmsg->nfgen_family; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_tables, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0, + family, table); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nlattr *name; + struct nft_af_info *afi; + struct nft_table *table; + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + name = nla[NFTA_TABLE_NAME]; + table = nf_tables_table_lookup(afi, name, false); + if (IS_ERR(table)) { + if (PTR_ERR(table) != -ENOENT) + return PTR_ERR(table); + table = NULL; + } + + if (table != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + return 0; + } + + table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL); + if (table == NULL) + return -ENOMEM; + + nla_strlcpy(table->name, name, nla_len(name)); + INIT_LIST_HEAD(&table->chains); + + list_add_tail(&table->list, &afi->tables); + nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); + return 0; +} + +static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + struct nft_af_info *afi; + struct nft_table *table; + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + if (table->flags & NFT_TABLE_BUILTIN) + return -EOPNOTSUPP; + + if (table->use) + return -EBUSY; + + list_del(&table->list); + nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family); + kfree(table); + return 0; +} + +static struct nft_table *__nf_tables_table_lookup(const struct nft_af_info *afi, + const char *name) +{ + struct nft_table *table; + + list_for_each_entry(table, &afi->tables, list) { + if (!strcmp(name, table->name)) + return table; + } + + return ERR_PTR(-ENOENT); +} + +static int nf_tables_chain_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + const struct nft_chain *chain, + int event, int family); + +/** + * nft_register_table - register a built-in table + * + * @table: the table to register + * @family: protocol family to register table with + * + * Register a built-in table for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_table(struct nft_table *table, int family) +{ + struct nft_af_info *afi; + struct nft_table *t; + struct nft_chain *chain; + int err; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); +again: + afi = nf_tables_afinfo_lookup(family, true); + if (IS_ERR(afi)) { + err = PTR_ERR(afi); + if (err == -EAGAIN) + goto again; + goto err; + } + + t = __nf_tables_table_lookup(afi, table->name); + if (IS_ERR(t)) { + err = PTR_ERR(t); + if (err != -ENOENT) + goto err; + t = NULL; + } + + if (t != NULL) { + err = -EEXIST; + goto err; + } + + table->flags |= NFT_TABLE_BUILTIN; + list_add_tail(&table->list, &afi->tables); + nf_tables_table_notify(NULL, NULL, table, NFT_MSG_NEWTABLE, family); + list_for_each_entry(chain, &table->chains, list) + nf_tables_chain_notify(NULL, NULL, table, chain, + NFT_MSG_NEWCHAIN, family); + err = 0; +err: + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return err; +} +EXPORT_SYMBOL_GPL(nft_register_table); + +/** + * nft_unregister_table - unregister a built-in table + * + * @table: the table to unregister + * @family: protocol family to unregister table with + * + * Unregister a built-in table for use with nf_tables. + */ +void nft_unregister_table(struct nft_table *table, int family) +{ + struct nft_chain *chain; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&table->list); + list_for_each_entry(chain, &table->chains, list) + nf_tables_chain_notify(NULL, NULL, table, chain, + NFT_MSG_DELCHAIN, family); + nf_tables_table_notify(NULL, NULL, table, NFT_MSG_DELTABLE, family); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_table); + +/* + * Chains + */ + +static struct nft_chain * +nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle) +{ + struct nft_chain *chain; + + list_for_each_entry(chain, &table->chains, list) { + if (chain->handle == handle) + return chain; + } + + return ERR_PTR(-ENOENT); +} + +static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table, + const struct nlattr *nla) +{ + struct nft_chain *chain; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + list_for_each_entry(chain, &table->chains, list) { + if (!nla_strcmp(nla, chain->name)) + return chain; + } + + return ERR_PTR(-ENOENT); +} + +static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { + [NFTA_CHAIN_TABLE] = { .type = NLA_STRING }, + [NFTA_CHAIN_HANDLE] = { .type = NLA_U64 }, + [NFTA_CHAIN_NAME] = { .type = NLA_STRING, + .len = NFT_CHAIN_MAXNAMELEN - 1 }, + [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { + [NFTA_HOOK_HOOKNUM] = { .type = NLA_U32 }, + [NFTA_HOOK_PRIORITY] = { .type = NLA_U32 }, +}; + +static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, + int event, u32 flags, int family, + const struct nft_table *table, + const struct nft_chain *chain) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name)) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle))) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name)) + goto nla_put_failure; + + if (chain->flags & NFT_BASE_CHAIN) { + const struct nf_hook_ops *ops = &nft_base_chain(chain)->ops; + struct nlattr *nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); + if (nest == NULL) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority))) + goto nla_put_failure; + nla_nest_end(skb, nest); + } + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_chain_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + const struct nft_chain *chain, + int event, int family) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + struct net *net = oskb ? sock_net(oskb->sk) : &init_net; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + bool report; + int err; + + report = nlh ? nlmsg_report(nlh) : false; + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family, + table, chain); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_chains(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + unsigned int idx = 0, s_idx = cb->args[0]; + int family = nfmsg->nfgen_family; + + list_for_each_entry(afi, &nf_tables_afinfo, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry(chain, &table->chains, list) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWCHAIN, + NLM_F_MULTI, + afi->family, table, chain) < 0) + goto done; +cont: + idx++; + } + } + } +done: + cb->args[0] = idx; + return skb->len; +} + + +static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + struct sk_buff *skb2; + int family = nfmsg->nfgen_family; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_chains, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0, + family, table, chain); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nlattr * uninitialized_var(name); + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + struct nft_base_chain *basechain; + struct nlattr *ha[NFTA_HOOK_MAX + 1]; + int family = nfmsg->nfgen_family; + u64 handle = 0; + int err; + bool create; + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + + afi = nf_tables_afinfo_lookup(family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], create); + if (IS_ERR(table)) + return PTR_ERR(table); + + if (table->use == UINT_MAX) + return -EOVERFLOW; + + chain = NULL; + name = nla[NFTA_CHAIN_NAME]; + + if (nla[NFTA_CHAIN_HANDLE]) { + handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); + chain = nf_tables_chain_lookup_byhandle(table, handle); + if (IS_ERR(chain)) + return PTR_ERR(chain); + } else { + chain = nf_tables_chain_lookup(table, name); + if (IS_ERR(chain)) { + if (PTR_ERR(chain) != -ENOENT) + return PTR_ERR(chain); + chain = NULL; + } + } + + if (chain != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + + if (nla[NFTA_CHAIN_HANDLE] && name && + !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]))) + return -EEXIST; + + if (nla[NFTA_CHAIN_HANDLE] && name) + nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); + + goto notify; + } + + if (nla[NFTA_CHAIN_HOOK]) { + struct nf_hook_ops *ops; + + err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], + nft_hook_policy); + if (err < 0) + return err; + if (ha[NFTA_HOOK_HOOKNUM] == NULL || + ha[NFTA_HOOK_PRIORITY] == NULL) + return -EINVAL; + if (ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])) >= afi->nhooks) + return -EINVAL; + + basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); + if (basechain == NULL) + return -ENOMEM; + chain = &basechain->chain; + + ops = &basechain->ops; + ops->pf = family; + ops->owner = afi->owner; + ops->hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); + ops->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); + ops->priv = chain; + ops->hook = nft_do_chain; + if (afi->hooks[ops->hooknum]) + ops->hook = afi->hooks[ops->hooknum]; + + chain->flags |= NFT_BASE_CHAIN; + } else { + chain = kzalloc(sizeof(*chain), GFP_KERNEL); + if (chain == NULL) + return -ENOMEM; + } + + INIT_LIST_HEAD(&chain->rules); + chain->handle = nf_tables_alloc_handle(table); + nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); + + list_add_tail(&chain->list, &table->chains); + table->use++; +notify: + nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN, + family); + return 0; +} + +static void nf_tables_rcu_chain_destroy(struct rcu_head *head) +{ + struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head); + + BUG_ON(chain->use > 0); + + if (chain->flags & NFT_BASE_CHAIN) + kfree(nft_base_chain(chain)); + else + kfree(chain); +} + +static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + if (chain->flags & NFT_CHAIN_BUILTIN) + return -EOPNOTSUPP; + + if (!list_empty(&chain->rules)) + return -EBUSY; + + list_del(&chain->list); + table->use--; + + if (chain->flags & NFT_BASE_CHAIN) + nf_unregister_hook(&nft_base_chain(chain)->ops); + + nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN, + family); + + /* Make sure all rule references are gone before this is released */ + call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy); + return 0; +} + +static void nft_ctx_init(struct nft_ctx *ctx, + const struct nft_af_info *afi, + const struct nft_table *table, + const struct nft_chain *chain) +{ + ctx->afi = afi; + ctx->table = table; + ctx->chain = chain; +} + +/* + * Expressions + */ + +/** + * nft_register_expr - register nf_tables expr operations + * @ops: expr operations + * + * Registers the expr operations for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_expr(struct nft_expr_ops *ops) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail(&ops->list, &nf_tables_expressions); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_expr); + +/** + * nft_unregister_expr - unregister nf_tables expr operations + * @ops: expr operations + * + * Unregisters the expr operations for use with nf_tables. + */ +void nft_unregister_expr(struct nft_expr_ops *ops) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&ops->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_expr); + +static const struct nft_expr_ops *__nft_expr_ops_get(struct nlattr *nla) +{ + const struct nft_expr_ops *ops; + + list_for_each_entry(ops, &nf_tables_expressions, list) { + if (!nla_strcmp(nla, ops->name)) + return ops; + } + return NULL; +} + +static const struct nft_expr_ops *nft_expr_ops_get(struct nlattr *nla) +{ + const struct nft_expr_ops *ops; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + ops = __nft_expr_ops_get(nla); + if (ops != NULL && try_module_get(ops->owner)) + return ops; + +#ifdef CONFIG_MODULES + if (ops == NULL) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-expr-%.*s", + nla_len(nla), (char *)nla_data(nla)); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (__nft_expr_ops_get(nla)) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-ENOENT); +} + +static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = { + [NFTA_EXPR_NAME] = { .type = NLA_STRING }, + [NFTA_EXPR_DATA] = { .type = NLA_NESTED }, +}; + +static int nf_tables_fill_expr_info(struct sk_buff *skb, + const struct nft_expr *expr) +{ + if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->name)) + goto nla_put_failure; + + if (expr->ops->dump) { + struct nlattr *data = nla_nest_start(skb, NFTA_EXPR_DATA); + if (data == NULL) + goto nla_put_failure; + if (expr->ops->dump(skb, expr) < 0) + goto nla_put_failure; + nla_nest_end(skb, data); + } + + return skb->len; + +nla_put_failure: + return -1; +}; + +struct nft_expr_info { + const struct nft_expr_ops *ops; + struct nlattr *tb[NFTA_EXPR_MAX + 1]; +}; + +static int nf_tables_expr_parse(const struct nlattr *nla, + struct nft_expr_info *info) +{ + const struct nft_expr_ops *ops; + int err; + + err = nla_parse_nested(info->tb, NFTA_EXPR_MAX, nla, nft_expr_policy); + if (err < 0) + return err; + + ops = nft_expr_ops_get(info->tb[NFTA_EXPR_NAME]); + if (IS_ERR(ops)) + return PTR_ERR(ops); + info->ops = ops; + return 0; +} + +static int nf_tables_newexpr(const struct nft_ctx *ctx, + struct nft_expr_info *info, + struct nft_expr *expr) +{ + const struct nft_expr_ops *ops = info->ops; + int err; + + expr->ops = ops; + if (ops->init) { + struct nlattr *ma[ops->maxattr + 1]; + + if (info->tb[NFTA_EXPR_DATA]) { + err = nla_parse_nested(ma, ops->maxattr, + info->tb[NFTA_EXPR_DATA], + ops->policy); + if (err < 0) + goto err1; + } else + memset(ma, 0, sizeof(ma[0]) * (ops->maxattr + 1)); + + err = ops->init(ctx, expr, (const struct nlattr **)ma); + if (err < 0) + goto err1; + } + + info->ops = NULL; + return 0; + +err1: + expr->ops = NULL; + return err; +} + +static void nf_tables_expr_destroy(struct nft_expr *expr) +{ + if (expr->ops->destroy) + expr->ops->destroy(expr); + module_put(expr->ops->owner); +} + +/* + * Rules + */ + +static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain, + u64 handle) +{ + struct nft_rule *rule; + + // FIXME: this sucks + list_for_each_entry(rule, &chain->rules, list) { + if (handle == rule->handle) + return rule; + } + + return ERR_PTR(-ENOENT); +} + +static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain, + const struct nlattr *nla) +{ + if (nla == NULL) + return ERR_PTR(-EINVAL); + + return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla))); +} + +static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { + [NFTA_RULE_TABLE] = { .type = NLA_STRING }, + [NFTA_RULE_CHAIN] = { .type = NLA_STRING, + .len = NFT_CHAIN_MAXNAMELEN - 1 }, + [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, + [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, +}; + +static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, + int event, u32 flags, int family, + const struct nft_table *table, + const struct nft_chain *chain, + const struct nft_rule *rule) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + const struct nft_expr *expr, *next; + struct nlattr *list; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_RULE_TABLE, table->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name)) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle))) + goto nla_put_failure; + + list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS); + if (list == NULL) + goto nla_put_failure; + nft_rule_for_each_expr(expr, next, rule) { + struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM); + if (elem == NULL) + goto nla_put_failure; + if (nf_tables_fill_expr_info(skb, expr) < 0) + goto nla_put_failure; + nla_nest_end(skb, elem); + } + nla_nest_end(skb, list); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_rule_notify(const struct sk_buff *oskb, + const struct nlmsghdr *nlh, + const struct nft_table *table, + const struct nft_chain *chain, + const struct nft_rule *rule, + int event, u32 flags, int family) +{ + struct sk_buff *skb; + u32 portid = NETLINK_CB(oskb).portid; + struct net *net = oskb ? sock_net(oskb->sk) : &init_net; + u32 seq = nlh->nlmsg_seq; + bool report; + int err; + + report = nlmsg_report(nlh); + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_rule_info(skb, portid, seq, event, flags, + family, table, chain, rule); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_rules(struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + const struct nft_rule *rule; + unsigned int idx = 0, s_idx = cb->args[0]; + int family = nfmsg->nfgen_family; + + list_for_each_entry(afi, &nf_tables_afinfo, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry(table, &afi->tables, list) { + list_for_each_entry(chain, &table->chains, list) { + list_for_each_entry(rule, &chain->rules, list) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWRULE, + NLM_F_MULTI | NLM_F_APPEND, + afi->family, table, chain, rule) < 0) + goto done; +cont: + idx++; + } + } + } + } +done: + cb->args[0] = idx; + return skb->len; +} + +static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + const struct nft_rule *rule; + struct sk_buff *skb2; + int family = nfmsg->nfgen_family; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_rules, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, + family, table, chain, rule); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static void nf_tables_rcu_rule_destroy(struct rcu_head *head) +{ + struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head); + struct nft_expr *expr; + + /* + * Careful: some expressions might not be initialized in case this + * is called on error from nf_tables_newrule(). + */ + expr = nft_expr_first(rule); + while (expr->ops && expr != nft_expr_last(rule)) { + nf_tables_expr_destroy(expr); + expr = nft_expr_next(expr); + } + kfree(rule); +} + +static void nf_tables_rule_destroy(struct nft_rule *rule) +{ + call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy); +} + +#define NFT_RULE_MAXEXPRS 128 + +static struct nft_expr_info *info; + +static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + struct nft_rule *rule, *old_rule = NULL; + struct nft_expr *expr; + struct nft_ctx ctx; + struct nlattr *tmp; + unsigned int size, i, n; + int err, rem; + bool create; + u64 handle; + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + + afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, create); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], create); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + if (nla[NFTA_RULE_HANDLE]) { + handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE])); + rule = __nf_tables_rule_lookup(chain, handle); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + old_rule = rule; + else + return -EOPNOTSUPP; + } else { + if (!create || nlh->nlmsg_flags & NLM_F_REPLACE) + return -EINVAL; + handle = nf_tables_alloc_handle(table); + } + + n = 0; + size = 0; + if (nla[NFTA_RULE_EXPRESSIONS]) { + nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) { + err = -EINVAL; + if (nla_type(tmp) != NFTA_LIST_ELEM) + goto err1; + if (n == NFT_RULE_MAXEXPRS) + goto err1; + err = nf_tables_expr_parse(tmp, &info[n]); + if (err < 0) + goto err1; + size += info[n].ops->size; + n++; + } + } + + err = -ENOMEM; + rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL); + if (rule == NULL) + goto err1; + + rule->handle = handle; + rule->dlen = size; + + nft_ctx_init(&ctx, afi, table, chain); + expr = nft_expr_first(rule); + for (i = 0; i < n; i++) { + err = nf_tables_newexpr(&ctx, &info[i], expr); + if (err < 0) + goto err2; + expr = nft_expr_next(expr); + } + + /* Register hook when first rule is inserted into a base chain */ + if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) { + err = nf_register_hook(&nft_base_chain(chain)->ops); + if (err < 0) + goto err2; + } + + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + list_replace_rcu(&old_rule->list, &rule->list); + nf_tables_rule_destroy(old_rule); + } else if (nlh->nlmsg_flags & NLM_F_APPEND) + list_add_tail_rcu(&rule->list, &chain->rules); + else + list_add_rcu(&rule->list, &chain->rules); + + nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE, + nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), + nfmsg->nfgen_family); + return 0; + +err2: + nf_tables_rule_destroy(rule); +err1: + for (i = 0; i < n; i++) { + if (info[i].ops != NULL) + module_put(info[i].ops->owner); + } + return err; +} + +static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + struct nft_chain *chain; + struct nft_rule *rule, *tmp; + int family = nfmsg->nfgen_family; + + afi = nf_tables_afinfo_lookup(family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + + if (nla[NFTA_RULE_HANDLE]) { + rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + /* List removal must be visible before destroying expressions */ + list_del_rcu(&rule->list); + + nf_tables_rule_notify(skb, nlh, table, chain, rule, + NFT_MSG_DELRULE, 0, family); + nf_tables_rule_destroy(rule); + } else { + /* Remove all rules in this chain */ + list_for_each_entry_safe(rule, tmp, &chain->rules, list) { + list_del_rcu(&rule->list); + + nf_tables_rule_notify(skb, nlh, table, chain, rule, + NFT_MSG_DELRULE, 0, family); + nf_tables_rule_destroy(rule); + } + } + + /* Unregister hook when last rule from base chain is deleted */ + if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) + nf_unregister_hook(&nft_base_chain(chain)->ops); + + return 0; +} + +static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { + [NFT_MSG_NEWTABLE] = { + .call = nf_tables_newtable, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_table_policy, + }, + [NFT_MSG_GETTABLE] = { + .call = nf_tables_gettable, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_table_policy, + }, + [NFT_MSG_DELTABLE] = { + .call = nf_tables_deltable, + .attr_count = NFTA_TABLE_MAX, + .policy = nft_table_policy, + }, + [NFT_MSG_NEWCHAIN] = { + .call = nf_tables_newchain, + .attr_count = NFTA_CHAIN_MAX, + .policy = nft_chain_policy, + }, + [NFT_MSG_GETCHAIN] = { + .call = nf_tables_getchain, + .attr_count = NFTA_CHAIN_MAX, + .policy = nft_chain_policy, + }, + [NFT_MSG_DELCHAIN] = { + .call = nf_tables_delchain, + .attr_count = NFTA_CHAIN_MAX, + .policy = nft_chain_policy, + }, + [NFT_MSG_NEWRULE] = { + .call = nf_tables_newrule, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, + [NFT_MSG_GETRULE] = { + .call = nf_tables_getrule, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, + [NFT_MSG_DELRULE] = { + .call = nf_tables_delrule, + .attr_count = NFTA_RULE_MAX, + .policy = nft_rule_policy, + }, +}; + +static const struct nfnetlink_subsystem nf_tables_subsys = { + .name = "nf_tables", + .subsys_id = NFNL_SUBSYS_NFTABLES, + .cb_count = NFT_MSG_MAX, + .cb = nf_tables_cb, +}; + +/** + * nft_validate_input_register - validate an expressions' input register + * + * @reg: the register number + * + * Validate that the input register is one of the general purpose + * registers. + */ +int nft_validate_input_register(enum nft_registers reg) +{ + if (reg <= NFT_REG_VERDICT) + return -EINVAL; + if (reg > NFT_REG_MAX) + return -ERANGE; + return 0; +} +EXPORT_SYMBOL_GPL(nft_validate_input_register); + +/** + * nft_validate_output_register - validate an expressions' output register + * + * @reg: the register number + * + * Validate that the output register is one of the general purpose + * registers or the verdict register. + */ +int nft_validate_output_register(enum nft_registers reg) +{ + if (reg < NFT_REG_VERDICT) + return -EINVAL; + if (reg > NFT_REG_MAX) + return -ERANGE; + return 0; +} +EXPORT_SYMBOL_GPL(nft_validate_output_register); + +/** + * nft_validate_data_load - validate an expressions' data load + * + * @ctx: context of the expression performing the load + * @reg: the destination register number + * @data: the data to load + * @type: the data type + * + * Validate that a data load uses the appropriate data type for + * the destination register. A value of NULL for the data means + * that its runtime gathered data, which is always of type + * NFT_DATA_VALUE. + */ +int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type) +{ + switch (reg) { + case NFT_REG_VERDICT: + if (data == NULL || type != NFT_DATA_VERDICT) + return -EINVAL; + // FIXME: do loop detection + return 0; + default: + if (data != NULL && type != NFT_DATA_VALUE) + return -EINVAL; + return 0; + } +} +EXPORT_SYMBOL_GPL(nft_validate_data_load); + +static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = { + [NFTA_VERDICT_CODE] = { .type = NLA_U32 }, + [NFTA_VERDICT_CHAIN] = { .type = NLA_STRING, + .len = NFT_CHAIN_MAXNAMELEN - 1 }, +}; + +static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla) +{ + struct nlattr *tb[NFTA_VERDICT_MAX + 1]; + struct nft_chain *chain; + int err; + + err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy); + if (err < 0) + return err; + + if (!tb[NFTA_VERDICT_CODE]) + return -EINVAL; + data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); + + switch (data->verdict) { + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + case NFT_CONTINUE: + case NFT_BREAK: + case NFT_RETURN: + desc->len = sizeof(data->verdict); + break; + case NFT_JUMP: + case NFT_GOTO: + if (!tb[NFTA_VERDICT_CHAIN]) + return -EINVAL; + chain = nf_tables_chain_lookup(ctx->table, + tb[NFTA_VERDICT_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + if (chain->flags & NFT_BASE_CHAIN) + return -EOPNOTSUPP; + + if (ctx->chain->level + 1 > chain->level) { + if (ctx->chain->level + 1 == 16) + return -EMLINK; + chain->level = ctx->chain->level + 1; + } + chain->use++; + data->chain = chain; + desc->len = sizeof(data); + break; + default: + return -EINVAL; + } + + desc->type = NFT_DATA_VERDICT; + return 0; +} + +static void nft_verdict_uninit(const struct nft_data *data) +{ + switch (data->verdict) { + case NFT_JUMP: + case NFT_GOTO: + data->chain->use--; + break; + } +} + +static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_DATA_VERDICT); + if (!nest) + goto nla_put_failure; + + if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict))) + goto nla_put_failure; + + switch (data->verdict) { + case NFT_JUMP: + case NFT_GOTO: + if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name)) + goto nla_put_failure; + } + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + +static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla) +{ + unsigned int len; + + len = nla_len(nla); + if (len == 0) + return -EINVAL; + if (len > sizeof(data->data)) + return -EOVERFLOW; + + nla_memcpy(data->data, nla, sizeof(data->data)); + desc->type = NFT_DATA_VALUE; + desc->len = len; + return 0; +} + +static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data, + unsigned int len) +{ + return nla_put(skb, NFTA_DATA_VALUE, len, data->data); +} + +static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { + [NFTA_DATA_VALUE] = { .type = NLA_BINARY, + .len = FIELD_SIZEOF(struct nft_data, data) }, + [NFTA_DATA_VERDICT] = { .type = NLA_NESTED }, +}; + +/** + * nft_data_init - parse nf_tables data netlink attributes + * + * @ctx: context of the expression using the data + * @data: destination struct nft_data + * @desc: data description + * @nla: netlink attribute containing data + * + * Parse the netlink data attributes and initialize a struct nft_data. + * The type and length of data are returned in the data description. + * + * The caller can indicate that it only wants to accept data of type + * NFT_DATA_VALUE by passing NULL for the ctx argument. + */ +int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla) +{ + struct nlattr *tb[NFTA_DATA_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy); + if (err < 0) + return err; + + if (tb[NFTA_DATA_VALUE]) + return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]); + if (tb[NFTA_DATA_VERDICT] && ctx != NULL) + return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]); + return -EINVAL; +} +EXPORT_SYMBOL_GPL(nft_data_init); + +/** + * nft_data_uninit - release a nft_data item + * + * @data: struct nft_data to release + * @type: type of data + * + * Release a nft_data item. NFT_DATA_VALUE types can be silently discarded, + * all others need to be released by calling this function. + */ +void nft_data_uninit(const struct nft_data *data, enum nft_data_types type) +{ + switch (type) { + case NFT_DATA_VALUE: + return; + case NFT_DATA_VERDICT: + return nft_verdict_uninit(data); + default: + WARN_ON(1); + } +} +EXPORT_SYMBOL_GPL(nft_data_uninit); + +int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, + enum nft_data_types type, unsigned int len) +{ + struct nlattr *nest; + int err; + + nest = nla_nest_start(skb, attr); + if (nest == NULL) + return -1; + + switch (type) { + case NFT_DATA_VALUE: + err = nft_value_dump(skb, data, len); + break; + case NFT_DATA_VERDICT: + err = nft_verdict_dump(skb, data); + break; + default: + err = -EINVAL; + WARN_ON(1); + } + + nla_nest_end(skb, nest); + return err; +} +EXPORT_SYMBOL_GPL(nft_data_dump); + +static int __init nf_tables_module_init(void) +{ + int err; + + info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS, + GFP_KERNEL); + if (info == NULL) { + err = -ENOMEM; + goto err1; + } + + err = nf_tables_core_module_init(); + if (err < 0) + goto err2; + + err = nfnetlink_subsys_register(&nf_tables_subsys); + if (err < 0) + goto err3; + + pr_info("nf_tables: (c) 2007-2009 Patrick McHardy \n"); + return 0; +err3: + nf_tables_core_module_exit(); +err2: + kfree(info); +err1: + return err; +} + +static void __exit nf_tables_module_exit(void) +{ + nfnetlink_subsys_unregister(&nf_tables_subsys); + nf_tables_core_module_exit(); + kfree(info); +} + +module_init(nf_tables_module_init); +module_exit(nf_tables_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c new file mode 100644 index 000000000000..bc7fb85d4002 --- /dev/null +++ b/net/netfilter/nf_tables_core.c @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NFT_JUMP_STACK_SIZE 16 + +unsigned int nft_do_chain(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + const struct nft_chain *chain = ops->priv; + const struct nft_rule *rule; + const struct nft_expr *expr, *last; + struct nft_data data[NFT_REG_MAX + 1]; + const struct nft_pktinfo pkt = { + .skb = skb, + .in = in, + .out = out, + .hooknum = ops->hooknum, + }; + unsigned int stackptr = 0; + struct { + const struct nft_chain *chain; + const struct nft_rule *rule; + } jumpstack[NFT_JUMP_STACK_SIZE]; + +do_chain: + rule = list_entry(&chain->rules, struct nft_rule, list); +next_rule: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + list_for_each_entry_continue_rcu(rule, &chain->rules, list) { + nft_rule_for_each_expr(expr, last, rule) { + expr->ops->eval(expr, data, &pkt); + if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE) + break; + } + + switch (data[NFT_REG_VERDICT].verdict) { + case NFT_BREAK: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + /* fall through */ + case NFT_CONTINUE: + continue; + } + break; + } + + switch (data[NFT_REG_VERDICT].verdict) { + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + return data[NFT_REG_VERDICT].verdict; + case NFT_JUMP: + BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); + jumpstack[stackptr].chain = chain; + jumpstack[stackptr].rule = rule; + stackptr++; + /* fall through */ + case NFT_GOTO: + chain = data[NFT_REG_VERDICT].chain; + goto do_chain; + case NFT_RETURN: + case NFT_CONTINUE: + break; + default: + WARN_ON(1); + } + + if (stackptr > 0) { + stackptr--; + chain = jumpstack[stackptr].chain; + rule = jumpstack[stackptr].rule; + goto next_rule; + } + + return NF_ACCEPT; +} +EXPORT_SYMBOL_GPL(nft_do_chain); + +int __init nf_tables_core_module_init(void) +{ + int err; + + err = nft_immediate_module_init(); + if (err < 0) + goto err1; + + err = nft_cmp_module_init(); + if (err < 0) + goto err2; + + err = nft_lookup_module_init(); + if (err < 0) + goto err3; + + err = nft_bitwise_module_init(); + if (err < 0) + goto err4; + + err = nft_byteorder_module_init(); + if (err < 0) + goto err5; + + err = nft_payload_module_init(); + if (err < 0) + goto err6; + + return 0; + +err6: + nft_byteorder_module_exit(); +err5: + nft_bitwise_module_exit(); +err4: + nft_lookup_module_exit(); +err3: + nft_cmp_module_exit(); +err2: + nft_immediate_module_exit(); +err1: + return err; +} + +void nf_tables_core_module_exit(void) +{ + nft_payload_module_exit(); + nft_byteorder_module_exit(); + nft_bitwise_module_exit(); + nft_lookup_module_exit(); + nft_cmp_module_exit(); + nft_immediate_module_exit(); +} diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c new file mode 100644 index 000000000000..0f7501506367 --- /dev/null +++ b/net/netfilter/nft_bitwise.c @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_bitwise { + enum nft_registers sreg:8; + enum nft_registers dreg:8; + u8 len; + struct nft_data mask; + struct nft_data xor; +}; + +static void nft_bitwise_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_bitwise *priv = nft_expr_priv(expr); + const struct nft_data *src = &data[priv->sreg]; + struct nft_data *dst = &data[priv->dreg]; + unsigned int i; + + for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) { + dst->data[i] = (src->data[i] & priv->mask.data[i]) ^ + priv->xor.data[i]; + } +} + +static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { + [NFTA_BITWISE_SREG] = { .type = NLA_U32 }, + [NFTA_BITWISE_DREG] = { .type = NLA_U32 }, + [NFTA_BITWISE_LEN] = { .type = NLA_U32 }, + [NFTA_BITWISE_MASK] = { .type = NLA_NESTED }, + [NFTA_BITWISE_XOR] = { .type = NLA_NESTED }, +}; + +static int nft_bitwise_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_bitwise *priv = nft_expr_priv(expr); + struct nft_data_desc d1, d2; + int err; + + if (tb[NFTA_BITWISE_SREG] == NULL || + tb[NFTA_BITWISE_DREG] == NULL || + tb[NFTA_BITWISE_LEN] == NULL || + tb[NFTA_BITWISE_MASK] == NULL || + tb[NFTA_BITWISE_XOR] == NULL) + return -EINVAL; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN])); + + err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]); + if (err < 0) + return err; + if (d1.len != priv->len) + return -EINVAL; + + err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]); + if (err < 0) + return err; + if (d2.len != priv->len) + return -EINVAL; + + return 0; +} + +static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_bitwise *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len))) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_BITWISE_XOR, &priv->xor, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_bitwise_ops __read_mostly = { + .name = "bitwise", + .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), + .owner = THIS_MODULE, + .eval = nft_bitwise_eval, + .init = nft_bitwise_init, + .dump = nft_bitwise_dump, + .policy = nft_bitwise_policy, + .maxattr = NFTA_BITWISE_MAX, +}; + +int __init nft_bitwise_module_init(void) +{ + return nft_register_expr(&nft_bitwise_ops); +} + +void nft_bitwise_module_exit(void) +{ + nft_unregister_expr(&nft_bitwise_ops); +} diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c new file mode 100644 index 000000000000..8b0657a4d17b --- /dev/null +++ b/net/netfilter/nft_byteorder.c @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_byteorder { + enum nft_registers sreg:8; + enum nft_registers dreg:8; + enum nft_byteorder_ops op:8; + u8 len; + u8 size; +}; + +static void nft_byteorder_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_byteorder *priv = nft_expr_priv(expr); + struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg]; + union { u32 u32; u16 u16; } *s, *d; + unsigned int i; + + s = (void *)src->data; + d = (void *)dst->data; + + switch (priv->size) { + case 4: + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + for (i = 0; i < priv->len / 4; i++) + d[i].u32 = ntohl((__force __be32)s[i].u32); + break; + case NFT_BYTEORDER_HTON: + for (i = 0; i < priv->len / 4; i++) + d[i].u32 = (__force __u32)htonl(s[i].u32); + break; + } + break; + case 2: + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + for (i = 0; i < priv->len / 2; i++) + d[i].u16 = ntohs((__force __be16)s[i].u16); + break; + case NFT_BYTEORDER_HTON: + for (i = 0; i < priv->len / 2; i++) + d[i].u16 = (__force __u16)htons(s[i].u16); + break; + } + break; + } +} + +static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = { + [NFTA_BYTEORDER_SREG] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_DREG] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_OP] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_LEN] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_SIZE] = { .type = NLA_U32 }, +}; + +static int nft_byteorder_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_byteorder *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_BYTEORDER_SREG] == NULL || + tb[NFTA_BYTEORDER_DREG] == NULL || + tb[NFTA_BYTEORDER_LEN] == NULL || + tb[NFTA_BYTEORDER_SIZE] == NULL || + tb[NFTA_BYTEORDER_OP] == NULL) + return -EINVAL; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP])); + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + case NFT_BYTEORDER_HTON: + break; + default: + return -EINVAL; + } + + priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN])); + if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE])); + switch (priv->size) { + case 2: + case 4: + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_byteorder *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_byteorder_ops __read_mostly = { + .name = "byteorder", + .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), + .owner = THIS_MODULE, + .eval = nft_byteorder_eval, + .init = nft_byteorder_init, + .dump = nft_byteorder_dump, + .policy = nft_byteorder_policy, + .maxattr = NFTA_BYTEORDER_MAX, +}; + +int __init nft_byteorder_module_init(void) +{ + return nft_register_expr(&nft_byteorder_ops); +} + +void nft_byteorder_module_exit(void) +{ + nft_unregister_expr(&nft_byteorder_ops); +} diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c new file mode 100644 index 000000000000..e734d670120a --- /dev/null +++ b/net/netfilter/nft_cmp.c @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_cmp_expr { + struct nft_data data; + enum nft_registers sreg:8; + u8 len; + enum nft_cmp_ops op:8; +}; + +static void nft_cmp_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_cmp_expr *priv = nft_expr_priv(expr); + int d; + + d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len); + switch (priv->op) { + case NFT_CMP_EQ: + if (d != 0) + goto mismatch; + break; + case NFT_CMP_NEQ: + if (d == 0) + goto mismatch; + break; + case NFT_CMP_LT: + if (d == 0) + goto mismatch; + case NFT_CMP_LTE: + if (d > 0) + goto mismatch; + break; + case NFT_CMP_GT: + if (d == 0) + goto mismatch; + case NFT_CMP_GTE: + if (d < 0) + goto mismatch; + break; + } + return; + +mismatch: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = { + [NFTA_CMP_SREG] = { .type = NLA_U32 }, + [NFTA_CMP_OP] = { .type = NLA_U32 }, + [NFTA_CMP_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_cmp_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc; + int err; + + if (tb[NFTA_CMP_SREG] == NULL || + tb[NFTA_CMP_OP] == NULL || + tb[NFTA_CMP_DATA] == NULL) + return -EINVAL; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); + switch (priv->op) { + case NFT_CMP_EQ: + case NFT_CMP_NEQ: + case NFT_CMP_LT: + case NFT_CMP_LTE: + case NFT_CMP_GT: + case NFT_CMP_GTE: + break; + default: + return -EINVAL; + } + + err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); + if (err < 0) + return err; + + priv->len = desc.len; + return 0; +} + +static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_cmp_expr *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op))) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_cmp_ops __read_mostly = { + .name = "cmp", + .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)), + .owner = THIS_MODULE, + .eval = nft_cmp_eval, + .init = nft_cmp_init, + .dump = nft_cmp_dump, + .policy = nft_cmp_policy, + .maxattr = NFTA_CMP_MAX, +}; + +int __init nft_cmp_module_init(void) +{ + return nft_register_expr(&nft_cmp_ops); +} + +void nft_cmp_module_exit(void) +{ + nft_unregister_expr(&nft_cmp_ops); +} diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c new file mode 100644 index 000000000000..33c5d36819bb --- /dev/null +++ b/net/netfilter/nft_counter.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_counter { + seqlock_t lock; + u64 bytes; + u64 packets; +}; + +static void nft_counter_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_counter *priv = nft_expr_priv(expr); + + write_seqlock_bh(&priv->lock); + priv->bytes += pkt->skb->len; + priv->packets++; + write_sequnlock_bh(&priv->lock); +} + +static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_counter *priv = nft_expr_priv(expr); + unsigned int seq; + u64 bytes; + u64 packets; + + do { + seq = read_seqbegin(&priv->lock); + bytes = priv->bytes; + packets = priv->packets; + } while (read_seqretry(&priv->lock, seq)); + + if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes))) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { + [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, + [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, +}; + +static int nft_counter_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_counter *priv = nft_expr_priv(expr); + + if (tb[NFTA_COUNTER_PACKETS]) + priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + if (tb[NFTA_COUNTER_BYTES]) + priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); + + seqlock_init(&priv->lock); + return 0; +} + +static struct nft_expr_ops nft_counter_ops __read_mostly = { + .name = "counter", + .size = NFT_EXPR_SIZE(sizeof(struct nft_counter)), + .policy = nft_counter_policy, + .maxattr = NFTA_COUNTER_MAX, + .owner = THIS_MODULE, + .eval = nft_counter_eval, + .init = nft_counter_init, + .dump = nft_counter_dump, +}; + +static int __init nft_counter_module_init(void) +{ + return nft_register_expr(&nft_counter_ops); +} + +static void __exit nft_counter_module_exit(void) +{ + nft_unregister_expr(&nft_counter_ops); +} + +module_init(nft_counter_module_init); +module_exit(nft_counter_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("counter"); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c new file mode 100644 index 000000000000..a1756d678226 --- /dev/null +++ b/net/netfilter/nft_ct.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_ct { + enum nft_ct_keys key:8; + enum ip_conntrack_dir dir:8; + enum nft_registers dreg:8; + uint8_t family; +}; + +static void nft_ct_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_ct *priv = nft_expr_priv(expr); + struct nft_data *dest = &data[priv->dreg]; + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + const struct nf_conn_help *help; + const struct nf_conntrack_tuple *tuple; + const struct nf_conntrack_helper *helper; + long diff; + unsigned int state; + + ct = nf_ct_get(pkt->skb, &ctinfo); + + switch (priv->key) { + case NFT_CT_STATE: + if (ct == NULL) + state = NF_CT_STATE_INVALID_BIT; + else if (nf_ct_is_untracked(ct)) + state = NF_CT_STATE_UNTRACKED_BIT; + else + state = NF_CT_STATE_BIT(ctinfo); + dest->data[0] = state; + return; + } + + if (ct == NULL) + goto err; + + switch (priv->key) { + case NFT_CT_DIRECTION: + dest->data[0] = CTINFO2DIR(ctinfo); + return; + case NFT_CT_STATUS: + dest->data[0] = ct->status; + return; +#ifdef CONFIG_NF_CONNTRACK_MARK + case NFT_CT_MARK: + dest->data[0] = ct->mark; + return; +#endif +#ifdef CONFIG_NF_CONNTRACK_SECMARK + case NFT_CT_SECMARK: + dest->data[0] = ct->secmark; + return; +#endif + case NFT_CT_EXPIRATION: + diff = (long)jiffies - (long)ct->timeout.expires; + if (diff < 0) + diff = 0; + dest->data[0] = jiffies_to_msecs(diff); + return; + case NFT_CT_HELPER: + if (ct->master == NULL) + goto err; + help = nfct_help(ct->master); + if (help == NULL) + goto err; + helper = rcu_dereference(help->helper); + if (helper == NULL) + goto err; + if (strlen(helper->name) >= sizeof(dest->data)) + goto err; + strncpy((char *)dest->data, helper->name, sizeof(dest->data)); + return; + } + + tuple = &ct->tuplehash[priv->dir].tuple; + switch (priv->key) { + case NFT_CT_L3PROTOCOL: + dest->data[0] = nf_ct_l3num(ct); + return; + case NFT_CT_SRC: + memcpy(dest->data, tuple->src.u3.all, + nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); + return; + case NFT_CT_DST: + memcpy(dest->data, tuple->dst.u3.all, + nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); + return; + case NFT_CT_PROTOCOL: + dest->data[0] = nf_ct_protonum(ct); + return; + case NFT_CT_PROTO_SRC: + dest->data[0] = (__force __u16)tuple->src.u.all; + return; + case NFT_CT_PROTO_DST: + dest->data[0] = (__force __u16)tuple->dst.u.all; + return; + } + return; +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { + [NFTA_CT_DREG] = { .type = NLA_U32 }, + [NFTA_CT_KEY] = { .type = NLA_U32 }, + [NFTA_CT_DIRECTION] = { .type = NLA_U8 }, +}; + +static int nft_ct_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_ct *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_CT_DREG] == NULL || + tb[NFTA_CT_KEY] == NULL) + return -EINVAL; + + priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); + if (tb[NFTA_CT_DIRECTION] != NULL) { + priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]); + switch (priv->dir) { + case IP_CT_DIR_ORIGINAL: + case IP_CT_DIR_REPLY: + break; + default: + return -EINVAL; + } + } + + switch (priv->key) { + case NFT_CT_STATE: + case NFT_CT_DIRECTION: + case NFT_CT_STATUS: +#ifdef CONFIG_NF_CONNTRACK_MARK + case NFT_CT_MARK: +#endif +#ifdef CONFIG_NF_CONNTRACK_SECMARK + case NFT_CT_SECMARK: +#endif + case NFT_CT_EXPIRATION: + case NFT_CT_HELPER: + if (tb[NFTA_CT_DIRECTION] != NULL) + return -EINVAL; + break; + case NFT_CT_PROTOCOL: + case NFT_CT_SRC: + case NFT_CT_DST: + case NFT_CT_PROTO_SRC: + case NFT_CT_PROTO_DST: + if (tb[NFTA_CT_DIRECTION] == NULL) + return -EINVAL; + break; + default: + return -EOPNOTSUPP; + } + + err = nf_ct_l3proto_try_module_get(ctx->afi->family); + if (err < 0) + return err; + priv->family = ctx->afi->family; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + goto err1; + + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + goto err1; + return 0; + +err1: + nf_ct_l3proto_module_put(ctx->afi->family); + return err; +} + +static void nft_ct_destroy(const struct nft_expr *expr) +{ + struct nft_ct *priv = nft_expr_priv(expr); + + nf_ct_l3proto_module_put(priv->family); +} + +static int nft_ct_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_ct *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key))) + goto nla_put_failure; + if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_ct_ops __read_mostly = { + .name = "ct", + .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), + .owner = THIS_MODULE, + .eval = nft_ct_eval, + .init = nft_ct_init, + .destroy = nft_ct_destroy, + .dump = nft_ct_dump, + .policy = nft_ct_policy, + .maxattr = NFTA_CT_MAX, +}; + +static int __init nft_ct_module_init(void) +{ + return nft_register_expr(&nft_ct_ops); +} + +static void __exit nft_ct_module_exit(void) +{ + nft_unregister_expr(&nft_ct_ops); +} + +module_init(nft_ct_module_init); +module_exit(nft_ct_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("ct"); diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c new file mode 100644 index 000000000000..9fc8eb308193 --- /dev/null +++ b/net/netfilter/nft_expr_template.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include + +struct nft_template { + +}; + +static void nft_template_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_template *priv = nft_expr_priv(expr); + +} + +static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = { + [NFTA_TEMPLATE_ATTR] = { .type = NLA_U32 }, +}; + +static int nft_template_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr *tb[]) +{ + struct nft_template *priv = nft_expr_priv(expr); + + return 0; +} + +static void nft_template_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_template *priv = nft_expr_priv(expr); + +} + +static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_template *priv = nft_expr_priv(expr); + + NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops template_ops __read_mostly = { + .name = "template", + .size = NFT_EXPR_SIZE(sizeof(struct nft_template)), + .owner = THIS_MODULE, + .eval = nft_template_eval, + .init = nft_template_init, + .destroy = nft_template_destroy, + .dump = nft_template_dump, + .policy = nft_template_policy, + .maxattr = NFTA_TEMPLATE_MAX, +}; + +static int __init nft_template_module_init(void) +{ + return nft_register_expr(&template_ops); +} + +static void __exit nft_template_module_exit(void) +{ + nft_unregister_expr(&template_ops); +} + +module_init(nft_template_module_init); +module_exit(nft_template_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("template"); diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c new file mode 100644 index 000000000000..21c6a6b7b662 --- /dev/null +++ b/net/netfilter/nft_exthdr.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +// FIXME: +#include + +struct nft_exthdr { + u8 type; + u8 offset; + u8 len; + enum nft_registers dreg:8; +}; + +static void nft_exthdr_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_exthdr *priv = nft_expr_priv(expr); + struct nft_data *dest = &data[priv->dreg]; + unsigned int offset; + int err; + + err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL); + if (err < 0) + goto err; + offset += priv->offset; + + if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0) + goto err; + return; +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { + [NFTA_EXTHDR_DREG] = { .type = NLA_U32 }, + [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 }, + [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, + [NFTA_EXTHDR_LEN] = { .type = NLA_U32 }, +}; + +static int nft_exthdr_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_exthdr *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_EXTHDR_DREG] == NULL || + tb[NFTA_EXTHDR_TYPE] == NULL || + tb[NFTA_EXTHDR_OFFSET] == NULL || + tb[NFTA_EXTHDR_LEN] == NULL) + return -EINVAL; + + priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); + priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN])); + if (priv->len == 0 || + priv->len > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); +} + +static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_exthdr *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops exthdr_ops __read_mostly = { + .name = "exthdr", + .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), + .owner = THIS_MODULE, + .eval = nft_exthdr_eval, + .init = nft_exthdr_init, + .dump = nft_exthdr_dump, + .policy = nft_exthdr_policy, + .maxattr = NFTA_EXTHDR_MAX, +}; + +static int __init nft_exthdr_module_init(void) +{ + return nft_register_expr(&exthdr_ops); +} + +static void __exit nft_exthdr_module_exit(void) +{ + nft_unregister_expr(&exthdr_ops); +} + +module_init(nft_exthdr_module_init); +module_exit(nft_exthdr_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("exthdr"); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c new file mode 100644 index 000000000000..67cc502881f1 --- /dev/null +++ b/net/netfilter/nft_hash.c @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_hash { + struct hlist_head *hash; + unsigned int hsize; + enum nft_registers sreg:8; + enum nft_registers dreg:8; + u8 klen; + u8 dlen; + u16 flags; +}; + +struct nft_hash_elem { + struct hlist_node hnode; + struct nft_data key; + struct nft_data data[]; +}; + +static u32 nft_hash_rnd __read_mostly; +static bool nft_hash_rnd_initted __read_mostly; + +static unsigned int nft_hash_data(const struct nft_data *data, + unsigned int hsize, unsigned int len) +{ + unsigned int h; + + // FIXME: can we reasonably guarantee the upper bits are fixed? + h = jhash2(data->data, len >> 2, nft_hash_rnd); + return ((u64)h * hsize) >> 32; +} + +static void nft_hash_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + const struct nft_hash_elem *elem; + const struct nft_data *key = &data[priv->sreg]; + unsigned int h; + + h = nft_hash_data(key, priv->hsize, priv->klen); + hlist_for_each_entry(elem, &priv->hash[h], hnode) { + if (nft_data_cmp(&elem->key, key, priv->klen)) + continue; + if (priv->flags & NFT_HASH_MAP) + nft_data_copy(&data[priv->dreg], elem->data); + return; + } + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static void nft_hash_elem_destroy(const struct nft_expr *expr, + struct nft_hash_elem *elem) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + + nft_data_uninit(&elem->key, NFT_DATA_VALUE); + if (priv->flags & NFT_HASH_MAP) + nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg)); + kfree(elem); +} + +static const struct nla_policy nft_he_policy[NFTA_HE_MAX + 1] = { + [NFTA_HE_KEY] = { .type = NLA_NESTED }, + [NFTA_HE_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_hash_elem_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr *nla, + struct nft_hash_elem **new) +{ + struct nft_hash *priv = nft_expr_priv(expr); + struct nlattr *tb[NFTA_HE_MAX + 1]; + struct nft_hash_elem *elem; + struct nft_data_desc d1, d2; + unsigned int size; + int err; + + err = nla_parse_nested(tb, NFTA_HE_MAX, nla, nft_he_policy); + if (err < 0) + return err; + + if (tb[NFTA_HE_KEY] == NULL) + return -EINVAL; + size = sizeof(*elem); + + if (priv->flags & NFT_HASH_MAP) { + if (tb[NFTA_HE_DATA] == NULL) + return -EINVAL; + size += sizeof(elem->data[0]); + } else { + if (tb[NFTA_HE_DATA] != NULL) + return -EINVAL; + } + + elem = kzalloc(size, GFP_KERNEL); + if (elem == NULL) + return -ENOMEM; + + err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_HE_KEY]); + if (err < 0) + goto err1; + err = -EINVAL; + if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen) + goto err2; + + if (tb[NFTA_HE_DATA] != NULL) { + err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_HE_DATA]); + if (err < 0) + goto err2; + err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type); + if (err < 0) + goto err3; + } + + *new = elem; + return 0; + +err3: + nft_data_uninit(elem->data, d2.type); +err2: + nft_data_uninit(&elem->key, d1.type); +err1: + kfree(elem); + return err; +} + +static int nft_hash_elem_dump(struct sk_buff *skb, const struct nft_expr *expr, + const struct nft_hash_elem *elem) + +{ + const struct nft_hash *priv = nft_expr_priv(expr); + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_LIST_ELEM); + if (nest == NULL) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_HE_KEY, &elem->key, + NFT_DATA_VALUE, priv->klen) < 0) + goto nla_put_failure; + + if (priv->flags & NFT_HASH_MAP) { + if (nft_data_dump(skb, NFTA_HE_DATA, elem->data, + NFT_DATA_VALUE, priv->dlen) < 0) + goto nla_put_failure; + } + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + +static void nft_hash_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + const struct hlist_node *next; + struct nft_hash_elem *elem; + unsigned int i; + + for (i = 0; i < priv->hsize; i++) { + hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) { + hlist_del(&elem->hnode); + nft_hash_elem_destroy(expr, elem); + } + } + kfree(priv->hash); +} + +static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { + [NFTA_HASH_FLAGS] = { .type = NLA_U32 }, + [NFTA_HASH_SREG] = { .type = NLA_U32 }, + [NFTA_HASH_DREG] = { .type = NLA_U32 }, + [NFTA_HASH_KLEN] = { .type = NLA_U32 }, + [NFTA_HASH_ELEMENTS] = { .type = NLA_NESTED }, +}; + +static int nft_hash_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_hash *priv = nft_expr_priv(expr); + struct nft_hash_elem *elem, *uninitialized_var(new); + const struct nlattr *nla; + unsigned int cnt, i; + unsigned int h; + int err, rem; + + if (unlikely(!nft_hash_rnd_initted)) { + get_random_bytes(&nft_hash_rnd, 4); + nft_hash_rnd_initted = true; + } + + if (tb[NFTA_HASH_SREG] == NULL || + tb[NFTA_HASH_KLEN] == NULL || + tb[NFTA_HASH_ELEMENTS] == NULL) + return -EINVAL; + + if (tb[NFTA_HASH_FLAGS] != NULL) { + priv->flags = ntohl(nla_get_be32(tb[NFTA_HASH_FLAGS])); + if (priv->flags & ~NFT_HASH_MAP) + return -EINVAL; + } + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_HASH_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + if (tb[NFTA_HASH_DREG] != NULL) { + if (!(priv->flags & NFT_HASH_MAP)) + return -EINVAL; + priv->dreg = ntohl(nla_get_be32(tb[NFTA_HASH_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + } + + priv->klen = ntohl(nla_get_be32(tb[NFTA_HASH_KLEN])); + if (priv->klen == 0) + return -EINVAL; + + cnt = 0; + nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) { + if (nla_type(nla) != NFTA_LIST_ELEM) + return -EINVAL; + cnt++; + } + + /* Aim for a load factor of 0.75 */ + cnt = cnt * 4 / 3; + + priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL); + if (priv->hash == NULL) + return -ENOMEM; + priv->hsize = cnt; + + for (i = 0; i < cnt; i++) + INIT_HLIST_HEAD(&priv->hash[i]); + + err = -ENOMEM; + nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) { + err = nft_hash_elem_init(ctx, expr, nla, &new); + if (err < 0) + goto err1; + + h = nft_hash_data(&new->key, priv->hsize, priv->klen); + hlist_for_each_entry(elem, &priv->hash[h], hnode) { + if (nft_data_cmp(&elem->key, &new->key, priv->klen)) + continue; + nft_hash_elem_destroy(expr, new); + err = -EEXIST; + goto err1; + } + hlist_add_head(&new->hnode, &priv->hash[h]); + } + return 0; + +err1: + nft_hash_destroy(ctx, expr); + return err; +} + +static int nft_hash_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + const struct nft_hash_elem *elem; + struct nlattr *list; + unsigned int i; + + if (priv->flags) + if (nla_put_be32(skb, NFTA_HASH_FLAGS, htonl(priv->flags))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (priv->flags & NFT_HASH_MAP) + if (nla_put_be32(skb, NFTA_HASH_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_KLEN, htonl(priv->klen))) + goto nla_put_failure; + + list = nla_nest_start(skb, NFTA_HASH_ELEMENTS); + if (list == NULL) + goto nla_put_failure; + + for (i = 0; i < priv->hsize; i++) { + hlist_for_each_entry(elem, &priv->hash[i], hnode) { + if (nft_hash_elem_dump(skb, expr, elem) < 0) + goto nla_put_failure; + } + } + + nla_nest_end(skb, list); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_hash_ops __read_mostly = { + .name = "hash", + .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)), + .owner = THIS_MODULE, + .eval = nft_hash_eval, + .init = nft_hash_init, + .destroy = nft_hash_destroy, + .dump = nft_hash_dump, + .policy = nft_hash_policy, + .maxattr = NFTA_HASH_MAX, +}; + +static int __init nft_hash_module_init(void) +{ + return nft_register_expr(&nft_hash_ops); +} + +static void __exit nft_hash_module_exit(void) +{ + nft_unregister_expr(&nft_hash_ops); +} + +module_init(nft_hash_module_init); +module_exit(nft_hash_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("hash"); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c new file mode 100644 index 000000000000..3bf42c3cc49a --- /dev/null +++ b/net/netfilter/nft_immediate.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_immediate_expr { + struct nft_data data; + enum nft_registers dreg:8; + u8 dlen; +}; + +static void nft_immediate_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + nft_data_copy(&data[priv->dreg], &priv->data); +} + +static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = { + [NFTA_IMMEDIATE_DREG] = { .type = NLA_U32 }, + [NFTA_IMMEDIATE_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_immediate_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_immediate_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc; + int err; + + if (tb[NFTA_IMMEDIATE_DREG] == NULL || + tb[NFTA_IMMEDIATE_DATA] == NULL) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]); + if (err < 0) + return err; + priv->dlen = desc.len; + + err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type); + if (err < 0) + goto err1; + + return 0; + +err1: + nft_data_uninit(&priv->data, desc.type); + return err; +} + +static void nft_immediate_destroy(const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg)); +} + +static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg))) + goto nla_put_failure; + + return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data, + nft_dreg_to_type(priv->dreg), priv->dlen); + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_imm_ops __read_mostly = { + .name = "immediate", + .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), + .owner = THIS_MODULE, + .eval = nft_immediate_eval, + .init = nft_immediate_init, + .destroy = nft_immediate_destroy, + .dump = nft_immediate_dump, + .policy = nft_immediate_policy, + .maxattr = NFTA_IMMEDIATE_MAX, +}; + +int __init nft_immediate_module_init(void) +{ + return nft_register_expr(&nft_imm_ops); +} + +void nft_immediate_module_exit(void) +{ + nft_unregister_expr(&nft_imm_ops); +} diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c new file mode 100644 index 000000000000..e0e3fc8aebc3 --- /dev/null +++ b/net/netfilter/nft_limit.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_SPINLOCK(limit_lock); + +struct nft_limit { + u64 tokens; + u64 rate; + u64 unit; + unsigned long stamp; +}; + +static void nft_limit_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_limit *priv = nft_expr_priv(expr); + + spin_lock_bh(&limit_lock); + if (time_after_eq(jiffies, priv->stamp)) { + priv->tokens = priv->rate; + priv->stamp = jiffies + priv->unit * HZ; + } + + if (priv->tokens >= 1) { + priv->tokens--; + spin_unlock_bh(&limit_lock); + return; + } + spin_unlock_bh(&limit_lock); + + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { + [NFTA_LIMIT_RATE] = { .type = NLA_U64 }, + [NFTA_LIMIT_UNIT] = { .type = NLA_U64 }, +}; + +static int nft_limit_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_limit *priv = nft_expr_priv(expr); + + if (tb[NFTA_LIMIT_RATE] == NULL || + tb[NFTA_LIMIT_UNIT] == NULL) + return -EINVAL; + + priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); + priv->unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); + priv->stamp = jiffies + priv->unit * HZ; + priv->tokens = priv->rate; + return 0; +} + +static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_limit *priv = nft_expr_priv(expr); + + if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate))) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_limit_ops __read_mostly = { + .name = "limit", + .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)), + .owner = THIS_MODULE, + .eval = nft_limit_eval, + .init = nft_limit_init, + .dump = nft_limit_dump, + .policy = nft_limit_policy, + .maxattr = NFTA_LIMIT_MAX, +}; + +static int __init nft_limit_module_init(void) +{ + return nft_register_expr(&nft_limit_ops); +} + +static void __exit nft_limit_module_exit(void) +{ + nft_unregister_expr(&nft_limit_ops); +} + +module_init(nft_limit_module_init); +module_exit(nft_limit_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("limit"); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c new file mode 100644 index 000000000000..da495c3b1e7e --- /dev/null +++ b/net/netfilter/nft_log.c @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const char *nft_log_null_prefix = ""; + +struct nft_log { + struct nf_loginfo loginfo; + char *prefix; + int family; +}; + +static void nft_log_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_log *priv = nft_expr_priv(expr); + struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); + + nf_log_packet(net, priv->family, pkt->hooknum, pkt->skb, pkt->in, + pkt->out, &priv->loginfo, "%s", priv->prefix); +} + +static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { + [NFTA_LOG_GROUP] = { .type = NLA_U16 }, + [NFTA_LOG_PREFIX] = { .type = NLA_STRING }, + [NFTA_LOG_SNAPLEN] = { .type = NLA_U32 }, + [NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 }, +}; + +static int nft_log_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_log *priv = nft_expr_priv(expr); + struct nf_loginfo *li = &priv->loginfo; + const struct nlattr *nla; + + priv->family = ctx->afi->family; + + nla = tb[NFTA_LOG_PREFIX]; + if (nla != NULL) { + priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL); + if (priv->prefix == NULL) + return -ENOMEM; + nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1); + } else + priv->prefix = (char *)nft_log_null_prefix; + + li->type = NF_LOG_TYPE_ULOG; + if (tb[NFTA_LOG_GROUP] != NULL) + li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP])); + + if (tb[NFTA_LOG_SNAPLEN] != NULL) + li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN])); + if (tb[NFTA_LOG_QTHRESHOLD] != NULL) { + li->u.ulog.qthreshold = + ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD])); + } + + return 0; +} + +static void nft_log_destroy(const struct nft_expr *expr) +{ + struct nft_log *priv = nft_expr_priv(expr); + + if (priv->prefix != nft_log_null_prefix) + kfree(priv->prefix); +} + +static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_log *priv = nft_expr_priv(expr); + const struct nf_loginfo *li = &priv->loginfo; + + if (priv->prefix != nft_log_null_prefix) + if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix)) + goto nla_put_failure; + if (li->u.ulog.group) + if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group))) + goto nla_put_failure; + if (li->u.ulog.copy_len) + if (nla_put_be32(skb, NFTA_LOG_SNAPLEN, + htonl(li->u.ulog.copy_len))) + goto nla_put_failure; + if (li->u.ulog.qthreshold) + if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD, + htons(li->u.ulog.qthreshold))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_log_ops __read_mostly = { + .name = "log", + .size = NFT_EXPR_SIZE(sizeof(struct nft_log)), + .owner = THIS_MODULE, + .eval = nft_log_eval, + .init = nft_log_init, + .destroy = nft_log_destroy, + .dump = nft_log_dump, + .policy = nft_log_policy, + .maxattr = NFTA_LOG_MAX, +}; + +static int __init nft_log_module_init(void) +{ + return nft_register_expr(&nft_log_ops); +} + +static void __exit nft_log_module_exit(void) +{ + nft_unregister_expr(&nft_log_ops); +} + +module_init(nft_log_module_init); +module_exit(nft_log_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("log"); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c new file mode 100644 index 000000000000..96735aa2f039 --- /dev/null +++ b/net/netfilter/nft_meta.c @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for TCP_TIME_WAIT */ +#include + +struct nft_meta { + enum nft_meta_keys key:8; + enum nft_registers dreg:8; +}; + +static void nft_meta_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + const struct net_device *in = pkt->in, *out = pkt->out; + struct nft_data *dest = &data[priv->dreg]; + + switch (priv->key) { + case NFT_META_LEN: + dest->data[0] = skb->len; + break; + case NFT_META_PROTOCOL: + *(__be16 *)dest->data = skb->protocol; + break; + case NFT_META_PRIORITY: + dest->data[0] = skb->priority; + break; + case NFT_META_MARK: + dest->data[0] = skb->mark; + break; + case NFT_META_IIF: + if (in == NULL) + goto err; + dest->data[0] = in->ifindex; + break; + case NFT_META_OIF: + if (out == NULL) + goto err; + dest->data[0] = out->ifindex; + break; + case NFT_META_IIFNAME: + if (in == NULL) + goto err; + strncpy((char *)dest->data, in->name, sizeof(dest->data)); + break; + case NFT_META_OIFNAME: + if (out == NULL) + goto err; + strncpy((char *)dest->data, out->name, sizeof(dest->data)); + break; + case NFT_META_IIFTYPE: + if (in == NULL) + goto err; + *(u16 *)dest->data = in->type; + break; + case NFT_META_OIFTYPE: + if (out == NULL) + goto err; + *(u16 *)dest->data = out->type; + break; + case NFT_META_SKUID: + if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) + goto err; + + read_lock_bh(&skb->sk->sk_callback_lock); + if (skb->sk->sk_socket == NULL || + skb->sk->sk_socket->file == NULL) { + read_unlock_bh(&skb->sk->sk_callback_lock); + goto err; + } + + dest->data[0] = + from_kuid_munged(&init_user_ns, + skb->sk->sk_socket->file->f_cred->fsuid); + read_unlock_bh(&skb->sk->sk_callback_lock); + break; + case NFT_META_SKGID: + if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT) + goto err; + + read_lock_bh(&skb->sk->sk_callback_lock); + if (skb->sk->sk_socket == NULL || + skb->sk->sk_socket->file == NULL) { + read_unlock_bh(&skb->sk->sk_callback_lock); + goto err; + } + dest->data[0] = + from_kgid_munged(&init_user_ns, + skb->sk->sk_socket->file->f_cred->fsgid); + read_unlock_bh(&skb->sk->sk_callback_lock); + break; +#ifdef CONFIG_NET_CLS_ROUTE + case NFT_META_RTCLASSID: { + const struct dst_entry *dst = skb_dst(skb); + + if (dst == NULL) + goto err; + dest->data[0] = dst->tclassid; + break; + } +#endif +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: + dest->data[0] = skb->secmark; + break; +#endif + default: + WARN_ON(1); + goto err; + } + return; + +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { + [NFTA_META_DREG] = { .type = NLA_U32 }, + [NFTA_META_KEY] = { .type = NLA_U32 }, +}; + +static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_META_DREG] == NULL || + tb[NFTA_META_KEY] == NULL) + return -EINVAL; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_LEN: + case NFT_META_PROTOCOL: + case NFT_META_PRIORITY: + case NFT_META_MARK: + case NFT_META_IIF: + case NFT_META_OIF: + case NFT_META_IIFNAME: + case NFT_META_OIFNAME: + case NFT_META_IIFTYPE: + case NFT_META_OIFTYPE: + case NFT_META_SKUID: + case NFT_META_SKGID: +#ifdef CONFIG_NET_CLS_ROUTE + case NFT_META_RTCLASSID: +#endif +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: +#endif + break; + default: + return -EOPNOTSUPP; + } + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); +} + +static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_meta_ops __read_mostly = { + .name = "meta", + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .owner = THIS_MODULE, + .eval = nft_meta_eval, + .init = nft_meta_init, + .dump = nft_meta_dump, + .policy = nft_meta_policy, + .maxattr = NFTA_META_MAX, +}; + +static int __init nft_meta_module_init(void) +{ + return nft_register_expr(&nft_meta_ops); +} + +static void __exit nft_meta_module_exit(void) +{ + nft_unregister_expr(&nft_meta_ops); +} + +module_init(nft_meta_module_init); +module_exit(nft_meta_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("meta"); diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c new file mode 100644 index 000000000000..71177df75ffb --- /dev/null +++ b/net/netfilter/nft_meta_target.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_meta { + enum nft_meta_keys key; +}; + +static void nft_meta_eval(const struct nft_expr *expr, + struct nft_data *nfres, + struct nft_data *data, + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *meta = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + u32 val = data->data[0]; + + switch (meta->key) { + case NFT_META_MARK: + skb->mark = val; + break; + case NFT_META_PRIORITY: + skb->priority = val; + break; + case NFT_META_NFTRACE: + skb->nf_trace = val; + break; +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: + skb->secmark = val; + break; +#endif + default: + WARN_ON(1); + } +} + +static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { + [NFTA_META_KEY] = { .type = NLA_U32 }, +}; + +static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[]) +{ + struct nft_meta *meta = nft_expr_priv(expr); + + if (tb[NFTA_META_KEY] == NULL) + return -EINVAL; + + meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (meta->key) { + case NFT_META_MARK: + case NFT_META_PRIORITY: + case NFT_META_NFTRACE: +#ifdef CONFIG_NETWORK_SECMARK + case NFT_META_SECMARK: +#endif + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_meta *meta = nft_expr_priv(expr); + + NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key)); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops meta_target __read_mostly = { + .name = "meta", + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .owner = THIS_MODULE, + .eval = nft_meta_eval, + .init = nft_meta_init, + .dump = nft_meta_dump, + .policy = nft_meta_policy, + .maxattr = NFTA_META_MAX, +}; + +static int __init nft_meta_target_init(void) +{ + return nft_register_expr(&meta_target); +} + +static void __exit nft_meta_target_exit(void) +{ + nft_unregister_expr(&meta_target); +} + +module_init(nft_meta_target_init); +module_exit(nft_meta_target_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("meta"); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c new file mode 100644 index 000000000000..329f134b3f89 --- /dev/null +++ b/net/netfilter/nft_payload.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_payload { + enum nft_payload_bases base:8; + u8 offset; + u8 len; + enum nft_registers dreg:8; +}; + +static void nft_payload_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_payload *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + struct nft_data *dest = &data[priv->dreg]; + int offset; + + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: + if (!skb_mac_header_was_set(skb)) + goto err; + offset = skb_mac_header(skb) - skb->data; + break; + case NFT_PAYLOAD_NETWORK_HEADER: + offset = skb_network_offset(skb); + break; + case NFT_PAYLOAD_TRANSPORT_HEADER: + offset = skb_transport_offset(skb); + break; + default: + BUG(); + } + offset += priv->offset; + + if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0) + goto err; + return; +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { + [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, +}; + +static int nft_payload_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_payload *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_PAYLOAD_DREG] == NULL || + tb[NFTA_PAYLOAD_BASE] == NULL || + tb[NFTA_PAYLOAD_OFFSET] == NULL || + tb[NFTA_PAYLOAD_LEN] == NULL) + return -EINVAL; + + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: + case NFT_PAYLOAD_NETWORK_HEADER: + case NFT_PAYLOAD_TRANSPORT_HEADER: + break; + default: + return -EOPNOTSUPP; + } + + priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + if (priv->len == 0 || + priv->len > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); +} + +static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_payload *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) || + nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) || + nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) || + nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_payload_ops __read_mostly = { + .name = "payload", + .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), + .owner = THIS_MODULE, + .eval = nft_payload_eval, + .init = nft_payload_init, + .dump = nft_payload_dump, + .policy = nft_payload_policy, + .maxattr = NFTA_PAYLOAD_MAX, +}; + +int __init nft_payload_module_init(void) +{ + return nft_register_expr(&nft_payload_ops); +} + +void nft_payload_module_exit(void) +{ + nft_unregister_expr(&nft_payload_ops); +} diff --git a/net/netfilter/nft_set.c b/net/netfilter/nft_set.c new file mode 100644 index 000000000000..7b7c8354c327 --- /dev/null +++ b/net/netfilter/nft_set.c @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_set { + struct rb_root root; + enum nft_registers sreg:8; + enum nft_registers dreg:8; + u8 klen; + u8 dlen; + u16 flags; +}; + +struct nft_set_elem { + struct rb_node node; + enum nft_set_elem_flags flags; + struct nft_data key; + struct nft_data data[]; +}; + +static void nft_set_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_set *priv = nft_expr_priv(expr); + const struct rb_node *parent = priv->root.rb_node; + const struct nft_set_elem *elem, *interval = NULL; + const struct nft_data *key = &data[priv->sreg]; + int d; + + while (parent != NULL) { + elem = rb_entry(parent, struct nft_set_elem, node); + + d = nft_data_cmp(&elem->key, key, priv->klen); + if (d < 0) { + parent = parent->rb_left; + interval = elem; + } else if (d > 0) + parent = parent->rb_right; + else { +found: + if (elem->flags & NFT_SE_INTERVAL_END) + goto out; + if (priv->flags & NFT_SET_MAP) + nft_data_copy(&data[priv->dreg], elem->data); + return; + } + } + + if (priv->flags & NFT_SET_INTERVAL && interval != NULL) { + elem = interval; + goto found; + } +out: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static void nft_set_elem_destroy(const struct nft_expr *expr, + struct nft_set_elem *elem) +{ + const struct nft_set *priv = nft_expr_priv(expr); + + nft_data_uninit(&elem->key, NFT_DATA_VALUE); + if (priv->flags & NFT_SET_MAP) + nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg)); + kfree(elem); +} + +static const struct nla_policy nft_se_policy[NFTA_SE_MAX + 1] = { + [NFTA_SE_KEY] = { .type = NLA_NESTED }, + [NFTA_SE_DATA] = { .type = NLA_NESTED }, + [NFTA_SE_FLAGS] = { .type = NLA_U32 }, +}; + +static int nft_set_elem_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr *nla, + struct nft_set_elem **new) +{ + struct nft_set *priv = nft_expr_priv(expr); + struct nlattr *tb[NFTA_SE_MAX + 1]; + struct nft_set_elem *elem; + struct nft_data_desc d1, d2; + enum nft_set_elem_flags flags = 0; + unsigned int size; + int err; + + err = nla_parse_nested(tb, NFTA_SE_MAX, nla, nft_se_policy); + if (err < 0) + return err; + + if (tb[NFTA_SE_KEY] == NULL) + return -EINVAL; + + if (tb[NFTA_SE_FLAGS] != NULL) { + flags = ntohl(nla_get_be32(tb[NFTA_SE_FLAGS])); + if (flags & ~NFT_SE_INTERVAL_END) + return -EINVAL; + } + + size = sizeof(*elem); + if (priv->flags & NFT_SET_MAP) { + if (tb[NFTA_SE_DATA] == NULL && !(flags & NFT_SE_INTERVAL_END)) + return -EINVAL; + size += sizeof(elem->data[0]); + } else { + if (tb[NFTA_SE_DATA] != NULL) + return -EINVAL; + } + + elem = kzalloc(size, GFP_KERNEL); + if (elem == NULL) + return -ENOMEM; + elem->flags = flags; + + err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_SE_KEY]); + if (err < 0) + goto err1; + err = -EINVAL; + if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen) + goto err2; + + if (tb[NFTA_SE_DATA] != NULL) { + err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_SE_DATA]); + if (err < 0) + goto err2; + err = -EINVAL; + if (priv->dreg != NFT_REG_VERDICT && d2.len != priv->dlen) + goto err2; + err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type); + if (err < 0) + goto err3; + } + + *new = elem; + return 0; + +err3: + nft_data_uninit(elem->data, d2.type); +err2: + nft_data_uninit(&elem->key, d1.type); +err1: + kfree(elem); + return err; +} + +static int nft_set_elem_dump(struct sk_buff *skb, const struct nft_expr *expr, + const struct nft_set_elem *elem) + +{ + const struct nft_set *priv = nft_expr_priv(expr); + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_LIST_ELEM); + if (nest == NULL) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_SE_KEY, &elem->key, + NFT_DATA_VALUE, priv->klen) < 0) + goto nla_put_failure; + + if (priv->flags & NFT_SET_MAP && !(elem->flags & NFT_SE_INTERVAL_END)) { + if (nft_data_dump(skb, NFTA_SE_DATA, elem->data, + nft_dreg_to_type(priv->dreg), priv->dlen) < 0) + goto nla_put_failure; + } + + if (elem->flags){ + if (nla_put_be32(skb, NFTA_SE_FLAGS, htonl(elem->flags))) + goto nla_put_failure; + } + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + +static void nft_set_destroy(const struct nft_expr *expr) +{ + struct nft_set *priv = nft_expr_priv(expr); + struct nft_set_elem *elem; + struct rb_node *node; + + while ((node = priv->root.rb_node) != NULL) { + rb_erase(node, &priv->root); + elem = rb_entry(node, struct nft_set_elem, node); + nft_set_elem_destroy(expr, elem); + } +} + +static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { + [NFTA_SET_FLAGS] = { .type = NLA_U32 }, + [NFTA_SET_SREG] = { .type = NLA_U32 }, + [NFTA_SET_DREG] = { .type = NLA_U32 }, + [NFTA_SET_KLEN] = { .type = NLA_U32 }, + [NFTA_SET_DLEN] = { .type = NLA_U32 }, + [NFTA_SET_ELEMENTS] = { .type = NLA_NESTED }, +}; + +static int nft_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_set *priv = nft_expr_priv(expr); + struct nft_set_elem *elem, *uninitialized_var(new); + struct rb_node *parent, **p; + const struct nlattr *nla; + int err, rem, d; + + if (tb[NFTA_SET_SREG] == NULL || + tb[NFTA_SET_KLEN] == NULL || + tb[NFTA_SET_ELEMENTS] == NULL) + return -EINVAL; + + priv->root = RB_ROOT; + + if (tb[NFTA_SET_FLAGS] != NULL) { + priv->flags = ntohl(nla_get_be32(tb[NFTA_SET_FLAGS])); + if (priv->flags & ~(NFT_SET_INTERVAL | NFT_SET_MAP)) + return -EINVAL; + } + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_SET_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + if (tb[NFTA_SET_DREG] != NULL) { + if (!(priv->flags & NFT_SET_MAP)) + return -EINVAL; + if (tb[NFTA_SET_DLEN] == NULL) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_SET_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + if (priv->dreg == NFT_REG_VERDICT) + priv->dlen = FIELD_SIZEOF(struct nft_data, data); + else { + priv->dlen = ntohl(nla_get_be32(tb[NFTA_SET_DLEN])); + if (priv->dlen == 0 || + priv->dlen > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + } + } else { + if (priv->flags & NFT_SET_MAP) + return -EINVAL; + if (tb[NFTA_SET_DLEN] != NULL) + return -EINVAL; + } + + priv->klen = ntohl(nla_get_be32(tb[NFTA_SET_KLEN])); + if (priv->klen == 0 || + priv->klen > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + nla_for_each_nested(nla, tb[NFTA_SET_ELEMENTS], rem) { + err = -EINVAL; + if (nla_type(nla) != NFTA_LIST_ELEM) + goto err1; + + err = nft_set_elem_init(ctx, expr, nla, &new); + if (err < 0) + goto err1; + + parent = NULL; + p = &priv->root.rb_node; + while (*p != NULL) { + parent = *p; + elem = rb_entry(parent, struct nft_set_elem, node); + d = nft_data_cmp(&elem->key, &new->key, priv->klen); + if (d < 0) + p = &parent->rb_left; + else if (d > 0) + p = &parent->rb_right; + else { + err = -EEXIST; + goto err2; + } + } + rb_link_node(&new->node, parent, p); + rb_insert_color(&new->node, &priv->root); + } + + return 0; + +err2: + nft_set_elem_destroy(expr, new); +err1: + nft_set_destroy(expr); + return err; +} + +static int nft_set_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_set *priv = nft_expr_priv(expr); + const struct nft_set_elem *elem; + struct rb_node *node; + struct nlattr *list; + + if (priv->flags) { + if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(priv->flags))) + goto nla_put_failure; + } + + if (nla_put_be32(skb, NFTA_SET_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_SET_KLEN, htonl(priv->klen))) + goto nla_put_failure; + + if (priv->flags & NFT_SET_MAP) { + if (nla_put_be32(skb, NFTA_SET_DREG, htonl(priv->dreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_SET_DLEN, htonl(priv->dlen))) + goto nla_put_failure; + } + + list = nla_nest_start(skb, NFTA_SET_ELEMENTS); + if (list == NULL) + goto nla_put_failure; + + for (node = rb_first(&priv->root); node; node = rb_next(node)) { + elem = rb_entry(node, struct nft_set_elem, node); + if (nft_set_elem_dump(skb, expr, elem) < 0) + goto nla_put_failure; + } + + nla_nest_end(skb, list); + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_set_ops __read_mostly = { + .name = "set", + .size = NFT_EXPR_SIZE(sizeof(struct nft_set)), + .owner = THIS_MODULE, + .eval = nft_set_eval, + .init = nft_set_init, + .destroy = nft_set_destroy, + .dump = nft_set_dump, + .policy = nft_set_policy, + .maxattr = NFTA_SET_MAX, +}; + +static int __init nft_set_module_init(void) +{ + return nft_register_expr(&nft_set_ops); +} + +static void __exit nft_set_module_exit(void) +{ + nft_unregister_expr(&nft_set_ops); +} + +module_init(nft_set_module_init); +module_exit(nft_set_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_EXPR("set"); -- cgit v1.2.3 From 20a69341f2d00cd042e81c82289fba8a13c05a25 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 11 Oct 2013 12:06:22 +0200 Subject: netfilter: nf_tables: add netlink set API This patch adds the new netlink API for maintaining nf_tables sets independently of the ruleset. The API supports the following operations: - creation of sets - deletion of sets - querying of specific sets - dumping of all sets - addition of set elements - removal of set elements - dumping of all set elements Sets are identified by name, each table defines an individual namespace. The name of a set may be allocated automatically, this is mostly useful in combination with the NFT_SET_ANONYMOUS flag, which destroys a set automatically once the last reference has been released. Sets can be marked constant, meaning they're not allowed to change while linked to a rule. This allows to perform lockless operation for set types that would otherwise require locking. Additionally, if the implementation supports it, sets can (as before) be used as maps, associating a data value with each key (or range), by specifying the NFT_SET_MAP flag and can be used for interval queries by specifying the NFT_SET_INTERVAL flag. Set elements are added and removed incrementally. All element operations support batching, reducing netlink message and set lookup overhead. The old "set" and "hash" expressions are replaced by a generic "lookup" expression, which binds to the specified set. Userspace is not aware of the actual set implementation used by the kernel anymore, all configuration options are generic. Currently the implementation selection logic is largely missing and the kernel will simply use the first registered implementation supporting the requested operation. Eventually, the plan is to have userspace supply a description of the data characteristics and select the implementation based on expected performance and memory use. This patch includes the new 'lookup' expression to look up for element matching in the set. This patch includes kernel-doc descriptions for this set API and it also includes the following fixes. From Patrick McHardy: * netfilter: nf_tables: fix set element data type in dumps * netfilter: nf_tables: fix indentation of struct nft_set_elem comments * netfilter: nf_tables: fix oops in nft_validate_data_load() * netfilter: nf_tables: fix oops while listing sets of built-in tables * netfilter: nf_tables: destroy anonymous sets immediately if binding fails * netfilter: nf_tables: propagate context to set iter callback * netfilter: nf_tables: add loop detection From Pablo Neira Ayuso: * netfilter: nf_tables: allow to dump all existing sets * netfilter: nf_tables: fix wrong type for flags variable in newelem Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 6 +- net/netfilter/Makefile | 2 +- net/netfilter/nf_tables_api.c | 1078 +++++++++++++++++++++++++++++++++++++++- net/netfilter/nf_tables_core.c | 2 - net/netfilter/nft_hash.c | 329 ++++-------- net/netfilter/nft_immediate.c | 11 + net/netfilter/nft_lookup.c | 135 +++++ net/netfilter/nft_rbtree.c | 247 +++++++++ net/netfilter/nft_set.c | 381 -------------- 9 files changed, 1573 insertions(+), 618 deletions(-) create mode 100644 net/netfilter/nft_lookup.c create mode 100644 net/netfilter/nft_rbtree.c delete mode 100644 net/netfilter/nft_set.c (limited to 'net/netfilter') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c271e1af93b5..aa184a46bbf3 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -430,13 +430,13 @@ config NFT_CT depends on NF_CONNTRACK tristate "Netfilter nf_tables conntrack module" -config NFT_SET +config NFT_RBTREE depends on NF_TABLES - tristate "Netfilter nf_tables set module" + tristate "Netfilter nf_tables rbtree set module" config NFT_HASH depends on NF_TABLES - tristate "Netfilter nf_tables hash module" + tristate "Netfilter nf_tables hash set module" config NFT_COUNTER depends on NF_TABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1ca3f3932826..b6b78754e4cc 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -75,7 +75,7 @@ obj-$(CONFIG_NFT_META) += nft_meta.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o #nf_tables-objs += nft_meta_target.o -obj-$(CONFIG_NFT_SET) += nft_set.o +obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o obj-$(CONFIG_NFT_HASH) += nft_hash.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7d59c89c6c75..5092c817c222 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, 2008 Patrick McHardy + * Copyright (c) 2007-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -315,6 +315,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, nla_strlcpy(table->name, name, nla_len(name)); INIT_LIST_HEAD(&table->chains); + INIT_LIST_HEAD(&table->sets); list_add_tail(&table->list, &afi->tables); nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); @@ -409,6 +410,7 @@ again: } table->flags |= NFT_TABLE_BUILTIN; + INIT_LIST_HEAD(&table->sets); list_add_tail(&table->list, &afi->tables); nf_tables_table_notify(NULL, NULL, table, NFT_MSG_NEWTABLE, family); list_for_each_entry(chain, &table->chains, list) @@ -820,10 +822,14 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, } static void nft_ctx_init(struct nft_ctx *ctx, + const struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nft_af_info *afi, const struct nft_table *table, const struct nft_chain *chain) { + ctx->skb = skb; + ctx->nlh = nlh; ctx->afi = afi; ctx->table = table; ctx->chain = chain; @@ -1301,7 +1307,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, rule->handle = handle; rule->dlen = size; - nft_ctx_init(&ctx, afi, table, chain); + nft_ctx_init(&ctx, skb, nlh, afi, table, chain); expr = nft_expr_first(rule); for (i = 0; i < n; i++) { err = nf_tables_newexpr(&ctx, &info[i], expr); @@ -1392,6 +1398,939 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, return 0; } +/* + * Sets + */ + +static LIST_HEAD(nf_tables_set_ops); + +int nft_register_set(struct nft_set_ops *ops) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail(&ops->list, &nf_tables_set_ops); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_set); + +void nft_unregister_set(struct nft_set_ops *ops) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del(&ops->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_set); + +static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[]) +{ + const struct nft_set_ops *ops; + u32 features; + +#ifdef CONFIG_MODULES + if (list_empty(&nf_tables_set_ops)) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-set"); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (!list_empty(&nf_tables_set_ops)) + return ERR_PTR(-EAGAIN); + } +#endif + features = 0; + if (nla[NFTA_SET_FLAGS] != NULL) { + features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); + features &= NFT_SET_INTERVAL | NFT_SET_MAP; + } + + // FIXME: implement selection properly + list_for_each_entry(ops, &nf_tables_set_ops, list) { + if ((ops->features & features) != features) + continue; + if (!try_module_get(ops->owner)) + continue; + return ops; + } + + return ERR_PTR(-EOPNOTSUPP); +} + +static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { + [NFTA_SET_TABLE] = { .type = NLA_STRING }, + [NFTA_SET_NAME] = { .type = NLA_STRING }, + [NFTA_SET_FLAGS] = { .type = NLA_U32 }, + [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 }, + [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, + [NFTA_SET_DATA_TYPE] = { .type = NLA_U32 }, + [NFTA_SET_DATA_LEN] = { .type = NLA_U32 }, +}; + +static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, + const struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table = NULL; + + afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + if (nla[NFTA_SET_TABLE] != NULL) { + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + } + + nft_ctx_init(ctx, skb, nlh, afi, table, NULL); + return 0; +} + +struct nft_set *nf_tables_set_lookup(const struct nft_table *table, + const struct nlattr *nla) +{ + struct nft_set *set; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + + list_for_each_entry(set, &table->sets, list) { + if (!nla_strcmp(nla, set->name)) + return set; + } + return ERR_PTR(-ENOENT); +} + +static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, + const char *name) +{ + const struct nft_set *i; + const char *p; + unsigned long *inuse; + unsigned int n = 0; + + p = strnchr(name, IFNAMSIZ, '%'); + if (p != NULL) { + if (p[1] != 'd' || strchr(p + 2, '%')) + return -EINVAL; + + inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL); + if (inuse == NULL) + return -ENOMEM; + + list_for_each_entry(i, &ctx->table->sets, list) { + if (!sscanf(i->name, name, &n)) + continue; + if (n < 0 || n > BITS_PER_LONG * PAGE_SIZE) + continue; + set_bit(n, inuse); + } + + n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE); + free_page((unsigned long)inuse); + } + + snprintf(set->name, sizeof(set->name), name, n); + list_for_each_entry(i, &ctx->table->sets, list) { + if (!strcmp(set->name, i->name)) + return -ENFILE; + } + return 0; +} + +static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, + const struct nft_set *set, u16 event, u16 flags) +{ + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + u32 portid = NETLINK_CB(ctx->skb).portid; + u32 seq = ctx->nlh->nlmsg_seq; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = ctx->afi->family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_SET_NAME, set->name)) + goto nla_put_failure; + if (set->flags != 0) + if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags))) + goto nla_put_failure; + + if (nla_put_be32(skb, NFTA_SET_KEY_TYPE, htonl(set->ktype))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_SET_KEY_LEN, htonl(set->klen))) + goto nla_put_failure; + if (set->flags & NFT_SET_MAP) { + if (nla_put_be32(skb, NFTA_SET_DATA_TYPE, htonl(set->dtype))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen))) + goto nla_put_failure; + } + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_set_notify(const struct nft_ctx *ctx, + const struct nft_set *set, + int event) +{ + struct sk_buff *skb; + u32 portid = NETLINK_CB(ctx->skb).portid; + struct net *net = sock_net(ctx->skb->sk); + bool report; + int err; + + report = nlmsg_report(ctx->nlh); + if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_set(skb, ctx, set, event, 0); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + GFP_KERNEL); +err: + if (err < 0) + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + return err; +} + +static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nft_set *set; + unsigned int idx = 0, s_idx = cb->args[0]; + + if (cb->args[1]) + return skb->len; + + list_for_each_entry(set, &ctx->table->sets, list) { + if (idx < s_idx) + goto cont; + if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET, + NLM_F_MULTI) < 0) { + cb->args[0] = idx; + goto done; + } +cont: + idx++; + } + cb->args[1] = 1; +done: + return skb->len; +} + +static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nft_set *set; + unsigned int idx = 0, s_idx = cb->args[0]; + struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; + + if (cb->args[1]) + return skb->len; + + list_for_each_entry(table, &ctx->afi->tables, list) { + if (cur_table && cur_table != table) + continue; + + ctx->table = table; + list_for_each_entry(set, &ctx->table->sets, list) { + if (idx < s_idx) + goto cont; + if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET, + NLM_F_MULTI) < 0) { + cb->args[0] = idx; + cb->args[2] = (unsigned long) table; + goto done; + } +cont: + idx++; + } + } + cb->args[1] = 1; +done: + return skb->len; +} + +static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + struct nlattr *nla[NFTA_SET_MAX + 1]; + struct nft_ctx ctx; + int err, ret; + + err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_MAX, + nft_set_policy); + if (err < 0) + return err; + + err = nft_ctx_init_from_setattr(&ctx, cb->skb, cb->nlh, (void *)nla); + if (err < 0) + return err; + + if (ctx.table == NULL) + ret = nf_tables_dump_sets_all(&ctx, skb, cb); + else + ret = nf_tables_dump_sets_table(&ctx, skb, cb); + + return ret; +} + +static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nft_set *set; + struct nft_ctx ctx; + struct sk_buff *skb2; + int err; + + /* Verify existance before starting dump */ + err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_sets, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); + if (IS_ERR(set)) + return PTR_ERR(set); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + err = nf_tables_fill_set(skb2, &ctx, set, NFT_MSG_NEWSET, 0); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); + +err: + kfree_skb(skb2); + return err; +} + +static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_set_ops *ops; + const struct nft_af_info *afi; + struct nft_table *table; + struct nft_set *set; + struct nft_ctx ctx; + char name[IFNAMSIZ]; + unsigned int size; + bool create; + u32 ktype, klen, dlen, dtype, flags; + int err; + + if (nla[NFTA_SET_TABLE] == NULL || + nla[NFTA_SET_NAME] == NULL || + nla[NFTA_SET_KEY_LEN] == NULL) + return -EINVAL; + + ktype = NFT_DATA_VALUE; + if (nla[NFTA_SET_KEY_TYPE] != NULL) { + ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); + if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK) + return -EINVAL; + } + + klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); + if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + + flags = 0; + if (nla[NFTA_SET_FLAGS] != NULL) { + flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); + if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | + NFT_SET_INTERVAL | NFT_SET_MAP)) + return -EINVAL; + } + + dtype = 0; + dlen = 0; + if (nla[NFTA_SET_DATA_TYPE] != NULL) { + if (!(flags & NFT_SET_MAP)) + return -EINVAL; + + dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE])); + if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK && + dtype != NFT_DATA_VERDICT) + return -EINVAL; + + if (dtype != NFT_DATA_VERDICT) { + if (nla[NFTA_SET_DATA_LEN] == NULL) + return -EINVAL; + dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); + if (dlen == 0 || + dlen > FIELD_SIZEOF(struct nft_data, data)) + return -EINVAL; + } else + dlen = sizeof(struct nft_data); + } else if (flags & NFT_SET_MAP) + return -EINVAL; + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + + afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, create); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], create); + if (IS_ERR(table)) + return PTR_ERR(table); + + nft_ctx_init(&ctx, skb, nlh, afi, table, NULL); + + set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]); + if (IS_ERR(set)) { + if (PTR_ERR(set) != -ENOENT) + return PTR_ERR(set); + set = NULL; + } + + if (set != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + return 0; + } + + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -ENOENT; + + ops = nft_select_set_ops(nla); + if (IS_ERR(ops)) + return PTR_ERR(ops); + + size = 0; + if (ops->privsize != NULL) + size = ops->privsize(nla); + + err = -ENOMEM; + set = kzalloc(sizeof(*set) + size, GFP_KERNEL); + if (set == NULL) + goto err1; + + nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name)); + err = nf_tables_set_alloc_name(&ctx, set, name); + if (err < 0) + goto err2; + + INIT_LIST_HEAD(&set->bindings); + set->ops = ops; + set->ktype = ktype; + set->klen = klen; + set->dtype = dtype; + set->dlen = dlen; + set->flags = flags; + + err = ops->init(set, nla); + if (err < 0) + goto err2; + + list_add_tail(&set->list, &table->sets); + nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET); + return 0; + +err2: + kfree(set); +err1: + module_put(ops->owner); + return err; +} + +static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) +{ + list_del(&set->list); + if (!(set->flags & NFT_SET_ANONYMOUS)) + nf_tables_set_notify(ctx, set, NFT_MSG_DELSET); + + set->ops->destroy(set); + module_put(set->ops->owner); + kfree(set); +} + +static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + struct nft_set *set; + struct nft_ctx ctx; + int err; + + if (nla[NFTA_SET_TABLE] == NULL) + return -EINVAL; + + err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); + if (IS_ERR(set)) + return PTR_ERR(set); + if (!list_empty(&set->bindings)) + return -EBUSY; + + nf_tables_set_destroy(&ctx, set); + return 0; +} + +static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem) +{ + enum nft_registers dreg; + + dreg = nft_type_to_reg(set->dtype); + return nft_validate_data_load(ctx, dreg, &elem->data, set->dtype); +} + +int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding) +{ + struct nft_set_binding *i; + struct nft_set_iter iter; + + if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) + return -EBUSY; + + if (set->flags & NFT_SET_MAP) { + /* If the set is already bound to the same chain all + * jumps are already validated for that chain. + */ + list_for_each_entry(i, &set->bindings, list) { + if (i->chain == binding->chain) + goto bind; + } + + iter.skip = 0; + iter.count = 0; + iter.err = 0; + iter.fn = nf_tables_bind_check_setelem; + + set->ops->walk(ctx, set, &iter); + if (iter.err < 0) { + /* Destroy anonymous sets if binding fails */ + if (set->flags & NFT_SET_ANONYMOUS) + nf_tables_set_destroy(ctx, set); + + return iter.err; + } + } +bind: + binding->chain = ctx->chain; + list_add_tail(&binding->list, &set->bindings); + return 0; +} + +void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding) +{ + list_del(&binding->list); + + if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) + nf_tables_set_destroy(ctx, set); +} + +/* + * Set elements + */ + +static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { + [NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 }, +}; + +static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = { + [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING }, + [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING }, + [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED }, +}; + +static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, + const struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + + afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE], false); + if (IS_ERR(table)) + return PTR_ERR(table); + + nft_ctx_init(ctx, skb, nlh, afi, table, NULL); + return 0; +} + +static int nf_tables_fill_setelem(struct sk_buff *skb, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + unsigned char *b = skb_tail_pointer(skb); + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_LIST_ELEM); + if (nest == NULL) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE, + set->klen) < 0) + goto nla_put_failure; + + if (set->flags & NFT_SET_MAP && + !(elem->flags & NFT_SET_ELEM_INTERVAL_END) && + nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data, + set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE, + set->dlen) < 0) + goto nla_put_failure; + + if (elem->flags != 0) + if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags))) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nlmsg_trim(skb, b); + return -EMSGSIZE; +} + +struct nft_set_dump_args { + const struct netlink_callback *cb; + struct nft_set_iter iter; + struct sk_buff *skb; +}; + +static int nf_tables_dump_setelem(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem) +{ + struct nft_set_dump_args *args; + + args = container_of(iter, struct nft_set_dump_args, iter); + return nf_tables_fill_setelem(args->skb, set, elem); +} + +static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct nft_set *set; + struct nft_set_dump_args args; + struct nft_ctx ctx; + struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1]; + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + struct nlattr *nest; + u32 portid, seq; + int event, err; + + nfmsg = nlmsg_data(cb->nlh); + err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_ELEM_LIST_MAX, + nft_set_elem_list_policy); + if (err < 0) + return err; + + err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + + event = NFT_MSG_NEWSETELEM; + event |= NFNL_SUBSYS_NFTABLES << 8; + portid = NETLINK_CB(cb->skb).portid; + seq = cb->nlh->nlmsg_seq; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + NLM_F_MULTI); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = NFPROTO_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name)) + goto nla_put_failure; + + nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS); + if (nest == NULL) + goto nla_put_failure; + + args.cb = cb; + args.skb = skb; + args.iter.skip = cb->args[0]; + args.iter.count = 0; + args.iter.err = 0; + args.iter.fn = nf_tables_dump_setelem; + set->ops->walk(&ctx, set, &args.iter); + + nla_nest_end(skb, nest); + nlmsg_end(skb, nlh); + + if (args.iter.err && args.iter.err != -EMSGSIZE) + return args.iter.err; + if (args.iter.count == cb->args[0]) + return 0; + + cb->args[0] = args.iter.count; + return skb->len; + +nla_put_failure: + return -ENOSPC; +} + +static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nft_set *set; + struct nft_ctx ctx; + int err; + + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_set, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + return -EOPNOTSUPP; +} + +static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, + const struct nlattr *attr) +{ + struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; + struct nft_data_desc d1, d2; + struct nft_set_elem elem; + struct nft_set_binding *binding; + enum nft_registers dreg; + int err; + + err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy); + if (err < 0) + return err; + + if (nla[NFTA_SET_ELEM_KEY] == NULL) + return -EINVAL; + + elem.flags = 0; + if (nla[NFTA_SET_ELEM_FLAGS] != NULL) { + elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); + if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; + } + + if (set->flags & NFT_SET_MAP) { + if (nla[NFTA_SET_ELEM_DATA] == NULL && + !(elem.flags & NFT_SET_ELEM_INTERVAL_END)) + return -EINVAL; + } else { + if (nla[NFTA_SET_ELEM_DATA] != NULL) + return -EINVAL; + } + + err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]); + if (err < 0) + goto err1; + err = -EINVAL; + if (d1.type != NFT_DATA_VALUE || d1.len != set->klen) + goto err2; + + err = -EEXIST; + if (set->ops->get(set, &elem) == 0) + goto err2; + + if (nla[NFTA_SET_ELEM_DATA] != NULL) { + err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]); + if (err < 0) + goto err2; + + err = -EINVAL; + if (set->dtype != NFT_DATA_VERDICT && d2.len != set->dlen) + goto err3; + + dreg = nft_type_to_reg(set->dtype); + list_for_each_entry(binding, &set->bindings, list) { + struct nft_ctx bind_ctx = { + .afi = ctx->afi, + .table = ctx->table, + .chain = binding->chain, + }; + + err = nft_validate_data_load(&bind_ctx, dreg, + &elem.data, d2.type); + if (err < 0) + goto err3; + } + } + + err = set->ops->insert(set, &elem); + if (err < 0) + goto err3; + + return 0; + +err3: + if (nla[NFTA_SET_ELEM_DATA] != NULL) + nft_data_uninit(&elem.data, d2.type); +err2: + nft_data_uninit(&elem.key, d1.type); +err1: + return err; +} + +static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nlattr *attr; + struct nft_set *set; + struct nft_ctx ctx; + int rem, err; + + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + return -EBUSY; + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_add_set_elem(&ctx, set, attr); + if (err < 0) + return err; + } + return 0; +} + +static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set, + const struct nlattr *attr) +{ + struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; + struct nft_data_desc desc; + struct nft_set_elem elem; + int err; + + err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy); + if (err < 0) + goto err1; + + err = -EINVAL; + if (nla[NFTA_SET_ELEM_KEY] == NULL) + goto err1; + + err = nft_data_init(ctx, &elem.key, &desc, nla[NFTA_SET_ELEM_KEY]); + if (err < 0) + goto err1; + + err = -EINVAL; + if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) + goto err2; + + err = set->ops->get(set, &elem); + if (err < 0) + goto err2; + + set->ops->remove(set, &elem); + + nft_data_uninit(&elem.key, NFT_DATA_VALUE); + if (set->flags & NFT_SET_MAP) + nft_data_uninit(&elem.data, set->dtype); + +err2: + nft_data_uninit(&elem.key, desc.type); +err1: + return err; +} + +static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nlattr *attr; + struct nft_set *set; + struct nft_ctx ctx; + int rem, err; + + err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + return -EBUSY; + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_del_setelem(&ctx, set, attr); + if (err < 0) + return err; + } + return 0; +} + static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { [NFT_MSG_NEWTABLE] = { .call = nf_tables_newtable, @@ -1438,6 +2377,36 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, + [NFT_MSG_NEWSET] = { + .call = nf_tables_newset, + .attr_count = NFTA_SET_MAX, + .policy = nft_set_policy, + }, + [NFT_MSG_GETSET] = { + .call = nf_tables_getset, + .attr_count = NFTA_SET_MAX, + .policy = nft_set_policy, + }, + [NFT_MSG_DELSET] = { + .call = nf_tables_delset, + .attr_count = NFTA_SET_MAX, + .policy = nft_set_policy, + }, + [NFT_MSG_NEWSETELEM] = { + .call = nf_tables_newsetelem, + .attr_count = NFTA_SET_ELEM_LIST_MAX, + .policy = nft_set_elem_list_policy, + }, + [NFT_MSG_GETSETELEM] = { + .call = nf_tables_getsetelem, + .attr_count = NFTA_SET_ELEM_LIST_MAX, + .policy = nft_set_elem_list_policy, + }, + [NFT_MSG_DELSETELEM] = { + .call = nf_tables_delsetelem, + .attr_count = NFTA_SET_ELEM_LIST_MAX, + .policy = nft_set_elem_list_policy, + }, }; static const struct nfnetlink_subsystem nf_tables_subsys = { @@ -1447,6 +2416,90 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .cb = nf_tables_cb, }; +/* + * Loop detection - walk through the ruleset beginning at the destination chain + * of a new jump until either the source chain is reached (loop) or all + * reachable chains have been traversed. + * + * The loop check is performed whenever a new jump verdict is added to an + * expression or verdict map or a verdict map is bound to a new chain. + */ + +static int nf_tables_check_loops(const struct nft_ctx *ctx, + const struct nft_chain *chain); + +static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem) +{ + switch (elem->data.verdict) { + case NFT_JUMP: + case NFT_GOTO: + return nf_tables_check_loops(ctx, elem->data.chain); + default: + return 0; + } +} + +static int nf_tables_check_loops(const struct nft_ctx *ctx, + const struct nft_chain *chain) +{ + const struct nft_rule *rule; + const struct nft_expr *expr, *last; + const struct nft_data *data; + const struct nft_set *set; + struct nft_set_binding *binding; + struct nft_set_iter iter; + int err; + + if (ctx->chain == chain) + return -ELOOP; + + list_for_each_entry(rule, &chain->rules, list) { + nft_rule_for_each_expr(expr, last, rule) { + if (!expr->ops->get_verdict) + continue; + + data = expr->ops->get_verdict(expr); + if (data == NULL) + break; + + switch (data->verdict) { + case NFT_JUMP: + case NFT_GOTO: + err = nf_tables_check_loops(ctx, data->chain); + if (err < 0) + return err; + default: + break; + } + } + } + + list_for_each_entry(set, &ctx->table->sets, list) { + if (!(set->flags & NFT_SET_MAP) || + set->dtype != NFT_DATA_VERDICT) + continue; + + list_for_each_entry(binding, &set->bindings, list) { + if (binding->chain != chain) + continue; + + iter.skip = 0; + iter.count = 0; + iter.err = 0; + iter.fn = nf_tables_loop_check_setelem; + + set->ops->walk(ctx, set, &iter); + if (iter.err < 0) + return iter.err; + } + } + + return 0; +} + /** * nft_validate_input_register - validate an expressions' input register * @@ -1500,11 +2553,25 @@ int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg, const struct nft_data *data, enum nft_data_types type) { + int err; + switch (reg) { case NFT_REG_VERDICT: if (data == NULL || type != NFT_DATA_VERDICT) return -EINVAL; - // FIXME: do loop detection + + if (data->verdict == NFT_GOTO || data->verdict == NFT_JUMP) { + err = nf_tables_check_loops(ctx, data->chain); + if (err < 0) + return err; + + if (ctx->chain->level + 1 > data->chain->level) { + if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE) + return -EMLINK; + data->chain->level = ctx->chain->level + 1; + } + } + return 0; default: if (data != NULL && type != NFT_DATA_VALUE) @@ -1555,11 +2622,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, if (chain->flags & NFT_BASE_CHAIN) return -EOPNOTSUPP; - if (ctx->chain->level + 1 > chain->level) { - if (ctx->chain->level + 1 == 16) - return -EMLINK; - chain->level = ctx->chain->level + 1; - } chain->use++; data->chain = chain; desc->len = sizeof(data); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index bc7fb85d4002..fd0ecd3255c1 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -20,8 +20,6 @@ #include #include -#define NFT_JUMP_STACK_SIZE 16 - unsigned int nft_do_chain(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 67cc502881f1..3d3f8fce10a5 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -21,11 +21,6 @@ struct nft_hash { struct hlist_head *hash; unsigned int hsize; - enum nft_registers sreg:8; - enum nft_registers dreg:8; - u8 klen; - u8 dlen; - u16 flags; }; struct nft_hash_elem { @@ -42,213 +37,140 @@ static unsigned int nft_hash_data(const struct nft_data *data, { unsigned int h; - // FIXME: can we reasonably guarantee the upper bits are fixed? - h = jhash2(data->data, len >> 2, nft_hash_rnd); + h = jhash(data->data, len, nft_hash_rnd); return ((u64)h * hsize) >> 32; } -static void nft_hash_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static bool nft_hash_lookup(const struct nft_set *set, + const struct nft_data *key, + struct nft_data *data) { - const struct nft_hash *priv = nft_expr_priv(expr); - const struct nft_hash_elem *elem; - const struct nft_data *key = &data[priv->sreg]; + const struct nft_hash *priv = nft_set_priv(set); + const struct nft_hash_elem *he; unsigned int h; - h = nft_hash_data(key, priv->hsize, priv->klen); - hlist_for_each_entry(elem, &priv->hash[h], hnode) { - if (nft_data_cmp(&elem->key, key, priv->klen)) + h = nft_hash_data(key, priv->hsize, set->klen); + hlist_for_each_entry(he, &priv->hash[h], hnode) { + if (nft_data_cmp(&he->key, key, set->klen)) continue; - if (priv->flags & NFT_HASH_MAP) - nft_data_copy(&data[priv->dreg], elem->data); - return; + if (set->flags & NFT_SET_MAP) + nft_data_copy(data, he->data); + return true; } - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + return false; } -static void nft_hash_elem_destroy(const struct nft_expr *expr, - struct nft_hash_elem *elem) +static void nft_hash_elem_destroy(const struct nft_set *set, + struct nft_hash_elem *he) { - const struct nft_hash *priv = nft_expr_priv(expr); - - nft_data_uninit(&elem->key, NFT_DATA_VALUE); - if (priv->flags & NFT_HASH_MAP) - nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg)); - kfree(elem); + nft_data_uninit(&he->key, NFT_DATA_VALUE); + if (set->flags & NFT_SET_MAP) + nft_data_uninit(he->data, set->dtype); + kfree(he); } -static const struct nla_policy nft_he_policy[NFTA_HE_MAX + 1] = { - [NFTA_HE_KEY] = { .type = NLA_NESTED }, - [NFTA_HE_DATA] = { .type = NLA_NESTED }, -}; - -static int nft_hash_elem_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr *nla, - struct nft_hash_elem **new) +static int nft_hash_insert(const struct nft_set *set, + const struct nft_set_elem *elem) { - struct nft_hash *priv = nft_expr_priv(expr); - struct nlattr *tb[NFTA_HE_MAX + 1]; - struct nft_hash_elem *elem; - struct nft_data_desc d1, d2; - unsigned int size; - int err; + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + unsigned int size, h; - err = nla_parse_nested(tb, NFTA_HE_MAX, nla, nft_he_policy); - if (err < 0) - return err; - - if (tb[NFTA_HE_KEY] == NULL) + if (elem->flags != 0) return -EINVAL; - size = sizeof(*elem); - - if (priv->flags & NFT_HASH_MAP) { - if (tb[NFTA_HE_DATA] == NULL) - return -EINVAL; - size += sizeof(elem->data[0]); - } else { - if (tb[NFTA_HE_DATA] != NULL) - return -EINVAL; - } - elem = kzalloc(size, GFP_KERNEL); - if (elem == NULL) + size = sizeof(*he); + if (set->flags & NFT_SET_MAP) + size += sizeof(he->data[0]); + + he = kzalloc(size, GFP_KERNEL); + if (he == NULL) return -ENOMEM; - err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_HE_KEY]); - if (err < 0) - goto err1; - err = -EINVAL; - if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen) - goto err2; - - if (tb[NFTA_HE_DATA] != NULL) { - err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_HE_DATA]); - if (err < 0) - goto err2; - err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type); - if (err < 0) - goto err3; - } + nft_data_copy(&he->key, &elem->key); + if (set->flags & NFT_SET_MAP) + nft_data_copy(he->data, &elem->data); - *new = elem; + h = nft_hash_data(&he->key, priv->hsize, set->klen); + hlist_add_head_rcu(&he->hnode, &priv->hash[h]); return 0; - -err3: - nft_data_uninit(elem->data, d2.type); -err2: - nft_data_uninit(&elem->key, d1.type); -err1: - kfree(elem); - return err; } -static int nft_hash_elem_dump(struct sk_buff *skb, const struct nft_expr *expr, - const struct nft_hash_elem *elem) - +static void nft_hash_remove(const struct nft_set *set, + const struct nft_set_elem *elem) { - const struct nft_hash *priv = nft_expr_priv(expr); - struct nlattr *nest; + struct nft_hash_elem *he = elem->cookie; - nest = nla_nest_start(skb, NFTA_LIST_ELEM); - if (nest == NULL) - goto nla_put_failure; - - if (nft_data_dump(skb, NFTA_HE_KEY, &elem->key, - NFT_DATA_VALUE, priv->klen) < 0) - goto nla_put_failure; + hlist_del_rcu(&he->hnode); + kfree(he); +} - if (priv->flags & NFT_HASH_MAP) { - if (nft_data_dump(skb, NFTA_HE_DATA, elem->data, - NFT_DATA_VALUE, priv->dlen) < 0) - goto nla_put_failure; - } +static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) +{ + const struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + unsigned int h; - nla_nest_end(skb, nest); - return 0; + h = nft_hash_data(&elem->key, priv->hsize, set->klen); + hlist_for_each_entry(he, &priv->hash[h], hnode) { + if (nft_data_cmp(&he->key, &elem->key, set->klen)) + continue; -nla_put_failure: - return -1; + elem->cookie = he; + elem->flags = 0; + if (set->flags & NFT_SET_MAP) + nft_data_copy(&elem->data, he->data); + return 0; + } + return -ENOENT; } -static void nft_hash_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, + struct nft_set_iter *iter) { - const struct nft_hash *priv = nft_expr_priv(expr); - const struct hlist_node *next; - struct nft_hash_elem *elem; + const struct nft_hash *priv = nft_set_priv(set); + const struct nft_hash_elem *he; + struct nft_set_elem elem; unsigned int i; for (i = 0; i < priv->hsize; i++) { - hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) { - hlist_del(&elem->hnode); - nft_hash_elem_destroy(expr, elem); + hlist_for_each_entry(he, &priv->hash[i], hnode) { + if (iter->count < iter->skip) + goto cont; + + memcpy(&elem.key, &he->key, sizeof(elem.key)); + if (set->flags & NFT_SET_MAP) + memcpy(&elem.data, he->data, sizeof(elem.data)); + elem.flags = 0; + + iter->err = iter->fn(ctx, set, iter, &elem); + if (iter->err < 0) + return; +cont: + iter->count++; } } - kfree(priv->hash); } -static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { - [NFTA_HASH_FLAGS] = { .type = NLA_U32 }, - [NFTA_HASH_SREG] = { .type = NLA_U32 }, - [NFTA_HASH_DREG] = { .type = NLA_U32 }, - [NFTA_HASH_KLEN] = { .type = NLA_U32 }, - [NFTA_HASH_ELEMENTS] = { .type = NLA_NESTED }, -}; +static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) +{ + return sizeof(struct nft_hash); +} -static int nft_hash_init(const struct nft_ctx *ctx, const struct nft_expr *expr, +static int nft_hash_init(const struct nft_set *set, const struct nlattr * const tb[]) { - struct nft_hash *priv = nft_expr_priv(expr); - struct nft_hash_elem *elem, *uninitialized_var(new); - const struct nlattr *nla; + struct nft_hash *priv = nft_set_priv(set); unsigned int cnt, i; - unsigned int h; - int err, rem; if (unlikely(!nft_hash_rnd_initted)) { get_random_bytes(&nft_hash_rnd, 4); nft_hash_rnd_initted = true; } - if (tb[NFTA_HASH_SREG] == NULL || - tb[NFTA_HASH_KLEN] == NULL || - tb[NFTA_HASH_ELEMENTS] == NULL) - return -EINVAL; - - if (tb[NFTA_HASH_FLAGS] != NULL) { - priv->flags = ntohl(nla_get_be32(tb[NFTA_HASH_FLAGS])); - if (priv->flags & ~NFT_HASH_MAP) - return -EINVAL; - } - - priv->sreg = ntohl(nla_get_be32(tb[NFTA_HASH_SREG])); - err = nft_validate_input_register(priv->sreg); - if (err < 0) - return err; - - if (tb[NFTA_HASH_DREG] != NULL) { - if (!(priv->flags & NFT_HASH_MAP)) - return -EINVAL; - priv->dreg = ntohl(nla_get_be32(tb[NFTA_HASH_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - } - - priv->klen = ntohl(nla_get_be32(tb[NFTA_HASH_KLEN])); - if (priv->klen == 0) - return -EINVAL; - - cnt = 0; - nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) { - if (nla_type(nla) != NFTA_LIST_ELEM) - return -EINVAL; - cnt++; - } - /* Aim for a load factor of 0.75 */ + // FIXME: temporarily broken until we have set descriptions + cnt = 100; cnt = cnt * 4 / 3; priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL); @@ -259,85 +181,46 @@ static int nft_hash_init(const struct nft_ctx *ctx, const struct nft_expr *expr, for (i = 0; i < cnt; i++) INIT_HLIST_HEAD(&priv->hash[i]); - err = -ENOMEM; - nla_for_each_nested(nla, tb[NFTA_HASH_ELEMENTS], rem) { - err = nft_hash_elem_init(ctx, expr, nla, &new); - if (err < 0) - goto err1; - - h = nft_hash_data(&new->key, priv->hsize, priv->klen); - hlist_for_each_entry(elem, &priv->hash[h], hnode) { - if (nft_data_cmp(&elem->key, &new->key, priv->klen)) - continue; - nft_hash_elem_destroy(expr, new); - err = -EEXIST; - goto err1; - } - hlist_add_head(&new->hnode, &priv->hash[h]); - } return 0; - -err1: - nft_hash_destroy(ctx, expr); - return err; } -static int nft_hash_dump(struct sk_buff *skb, const struct nft_expr *expr) +static void nft_hash_destroy(const struct nft_set *set) { - const struct nft_hash *priv = nft_expr_priv(expr); - const struct nft_hash_elem *elem; - struct nlattr *list; + const struct nft_hash *priv = nft_set_priv(set); + const struct hlist_node *next; + struct nft_hash_elem *elem; unsigned int i; - if (priv->flags) - if (nla_put_be32(skb, NFTA_HASH_FLAGS, htonl(priv->flags))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_HASH_SREG, htonl(priv->sreg))) - goto nla_put_failure; - if (priv->flags & NFT_HASH_MAP) - if (nla_put_be32(skb, NFTA_HASH_DREG, htonl(priv->dreg))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_HASH_KLEN, htonl(priv->klen))) - goto nla_put_failure; - - list = nla_nest_start(skb, NFTA_HASH_ELEMENTS); - if (list == NULL) - goto nla_put_failure; - for (i = 0; i < priv->hsize; i++) { - hlist_for_each_entry(elem, &priv->hash[i], hnode) { - if (nft_hash_elem_dump(skb, expr, elem) < 0) - goto nla_put_failure; + hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) { + hlist_del(&elem->hnode); + nft_hash_elem_destroy(set, elem); } } - - nla_nest_end(skb, list); - return 0; - -nla_put_failure: - return -1; + kfree(priv->hash); } -static struct nft_expr_ops nft_hash_ops __read_mostly = { - .name = "hash", - .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)), - .owner = THIS_MODULE, - .eval = nft_hash_eval, +static struct nft_set_ops nft_hash_ops __read_mostly = { + .privsize = nft_hash_privsize, .init = nft_hash_init, .destroy = nft_hash_destroy, - .dump = nft_hash_dump, - .policy = nft_hash_policy, - .maxattr = NFTA_HASH_MAX, + .get = nft_hash_get, + .insert = nft_hash_insert, + .remove = nft_hash_remove, + .lookup = nft_hash_lookup, + .walk = nft_hash_walk, + .features = NFT_SET_MAP, + .owner = THIS_MODULE, }; static int __init nft_hash_module_init(void) { - return nft_register_expr(&nft_hash_ops); + return nft_register_set(&nft_hash_ops); } static void __exit nft_hash_module_exit(void) { - nft_unregister_expr(&nft_hash_ops); + nft_unregister_set(&nft_hash_ops); } module_init(nft_hash_module_init); @@ -345,4 +228,4 @@ module_exit(nft_hash_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_EXPR("hash"); +MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 3bf42c3cc49a..78334bf37007 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -90,6 +90,16 @@ nla_put_failure: return -1; } +static const struct nft_data *nft_immediate_get_verdict(const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (priv->dreg == NFT_REG_VERDICT) + return &priv->data; + else + return NULL; +} + static struct nft_expr_ops nft_imm_ops __read_mostly = { .name = "immediate", .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), @@ -98,6 +108,7 @@ static struct nft_expr_ops nft_imm_ops __read_mostly = { .init = nft_immediate_init, .destroy = nft_immediate_destroy, .dump = nft_immediate_dump, + .get_verdict = nft_immediate_get_verdict, .policy = nft_immediate_policy, .maxattr = NFTA_IMMEDIATE_MAX, }; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c new file mode 100644 index 000000000000..4962d2173678 --- /dev/null +++ b/net/netfilter/nft_lookup.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2009 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_lookup { + struct nft_set *set; + enum nft_registers sreg:8; + enum nft_registers dreg:8; + struct nft_set_binding binding; +}; + +static void nft_lookup_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_lookup *priv = nft_expr_priv(expr); + const struct nft_set *set = priv->set; + + if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg])) + return; + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { + [NFTA_LOOKUP_SET] = { .type = NLA_STRING }, + [NFTA_LOOKUP_SREG] = { .type = NLA_U32 }, + [NFTA_LOOKUP_DREG] = { .type = NLA_U32 }, +}; + +static int nft_lookup_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_lookup *priv = nft_expr_priv(expr); + struct nft_set *set; + int err; + + if (tb[NFTA_LOOKUP_SET] == NULL || + tb[NFTA_LOOKUP_SREG] == NULL) + return -EINVAL; + + set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]); + if (IS_ERR(set)) + return PTR_ERR(set); + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG])); + err = nft_validate_input_register(priv->sreg); + if (err < 0) + return err; + + if (tb[NFTA_LOOKUP_DREG] != NULL) { + if (!(set->flags & NFT_SET_MAP)) + return -EINVAL; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + if (priv->dreg == NFT_REG_VERDICT) { + if (set->dtype != NFT_DATA_VERDICT) + return -EINVAL; + } else if (set->dtype == NFT_DATA_VERDICT) + return -EINVAL; + } else if (set->flags & NFT_SET_MAP) + return -EINVAL; + + err = nf_tables_bind_set(ctx, set, &priv->binding); + if (err < 0) + return err; + + priv->set = set; + return 0; +} + +static void nft_lookup_destroy(const struct nft_expr *expr) +{ + struct nft_lookup *priv = nft_expr_priv(expr); + + nf_tables_unbind_set(NULL, priv->set, &priv->binding); +} + +static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_lookup *priv = nft_expr_priv(expr); + + if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_LOOKUP_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (priv->set->flags & NFT_SET_MAP) + if (nla_put_be32(skb, NFTA_LOOKUP_DREG, htonl(priv->dreg))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_ops nft_lookup_ops __read_mostly = { + .name = "lookup", + .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), + .owner = THIS_MODULE, + .eval = nft_lookup_eval, + .init = nft_lookup_init, + .destroy = nft_lookup_destroy, + .dump = nft_lookup_dump, + .policy = nft_lookup_policy, + .maxattr = NFTA_LOOKUP_MAX, +}; + +int __init nft_lookup_module_init(void) +{ + return nft_register_expr(&nft_lookup_ops); +} + +void nft_lookup_module_exit(void) +{ + nft_unregister_expr(&nft_lookup_ops); +} diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c new file mode 100644 index 000000000000..ca0c1b231bfe --- /dev/null +++ b/net/netfilter/nft_rbtree.c @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_rbtree { + struct rb_root root; +}; + +struct nft_rbtree_elem { + struct rb_node node; + u16 flags; + struct nft_data key; + struct nft_data data[]; +}; + +static bool nft_rbtree_lookup(const struct nft_set *set, + const struct nft_data *key, + struct nft_data *data) +{ + const struct nft_rbtree *priv = nft_set_priv(set); + const struct nft_rbtree_elem *rbe, *interval = NULL; + const struct rb_node *parent = priv->root.rb_node; + int d; + + while (parent != NULL) { + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + + d = nft_data_cmp(&rbe->key, key, set->klen); + if (d < 0) { + parent = parent->rb_left; + interval = rbe; + } else if (d > 0) + parent = parent->rb_right; + else { +found: + if (rbe->flags & NFT_SET_ELEM_INTERVAL_END) + goto out; + if (set->flags & NFT_SET_MAP) + nft_data_copy(data, rbe->data); + return true; + } + } + + if (set->flags & NFT_SET_INTERVAL && interval != NULL) { + rbe = interval; + goto found; + } +out: + return false; +} + +static void nft_rbtree_elem_destroy(const struct nft_set *set, + struct nft_rbtree_elem *rbe) +{ + nft_data_uninit(&rbe->key, NFT_DATA_VALUE); + if (set->flags & NFT_SET_MAP) + nft_data_uninit(rbe->data, set->dtype); + kfree(rbe); +} + +static int __nft_rbtree_insert(const struct nft_set *set, + struct nft_rbtree_elem *new) +{ + struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree_elem *rbe; + struct rb_node *parent, **p; + int d; + + parent = NULL; + p = &priv->root.rb_node; + while (*p != NULL) { + parent = *p; + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + d = nft_data_cmp(&rbe->key, &new->key, set->klen); + if (d < 0) + p = &parent->rb_left; + else if (d > 0) + p = &parent->rb_right; + else + return -EEXIST; + } + rb_link_node(&new->node, parent, p); + rb_insert_color(&new->node, &priv->root); + return 0; +} + +static int nft_rbtree_insert(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_rbtree_elem *rbe; + unsigned int size; + int err; + + size = sizeof(*rbe); + if (set->flags & NFT_SET_MAP) + size += sizeof(rbe->data[0]); + + rbe = kzalloc(size, GFP_KERNEL); + if (rbe == NULL) + return -ENOMEM; + + rbe->flags = elem->flags; + nft_data_copy(&rbe->key, &elem->key); + if (set->flags & NFT_SET_MAP) + nft_data_copy(rbe->data, &elem->data); + + err = __nft_rbtree_insert(set, rbe); + if (err < 0) + kfree(rbe); + return err; +} + +static void nft_rbtree_remove(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree_elem *rbe = elem->cookie; + + rb_erase(&rbe->node, &priv->root); + kfree(rbe); +} + +static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem) +{ + const struct nft_rbtree *priv = nft_set_priv(set); + const struct rb_node *parent = priv->root.rb_node; + struct nft_rbtree_elem *rbe; + int d; + + while (parent != NULL) { + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + + d = nft_data_cmp(&rbe->key, &elem->key, set->klen); + if (d < 0) + parent = parent->rb_left; + else if (d > 0) + parent = parent->rb_right; + else { + elem->cookie = rbe; + if (set->flags & NFT_SET_MAP) + nft_data_copy(&elem->data, rbe->data); + elem->flags = rbe->flags; + return 0; + } + } + return -ENOENT; +} + +static void nft_rbtree_walk(const struct nft_ctx *ctx, + const struct nft_set *set, + struct nft_set_iter *iter) +{ + const struct nft_rbtree *priv = nft_set_priv(set); + const struct nft_rbtree_elem *rbe; + struct nft_set_elem elem; + struct rb_node *node; + + for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + if (iter->count < iter->skip) + goto cont; + + rbe = rb_entry(node, struct nft_rbtree_elem, node); + nft_data_copy(&elem.key, &rbe->key); + if (set->flags & NFT_SET_MAP) + nft_data_copy(&elem.data, rbe->data); + elem.flags = rbe->flags; + + iter->err = iter->fn(ctx, set, iter, &elem); + if (iter->err < 0) + return; +cont: + iter->count++; + } +} + +static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[]) +{ + return sizeof(struct nft_rbtree); +} + +static int nft_rbtree_init(const struct nft_set *set, + const struct nlattr * const nla[]) +{ + struct nft_rbtree *priv = nft_set_priv(set); + + priv->root = RB_ROOT; + return 0; +} + +static void nft_rbtree_destroy(const struct nft_set *set) +{ + struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree_elem *rbe; + struct rb_node *node; + + while ((node = priv->root.rb_node) != NULL) { + rb_erase(node, &priv->root); + rbe = rb_entry(node, struct nft_rbtree_elem, node); + nft_rbtree_elem_destroy(set, rbe); + } +} + +static struct nft_set_ops nft_rbtree_ops __read_mostly = { + .privsize = nft_rbtree_privsize, + .init = nft_rbtree_init, + .destroy = nft_rbtree_destroy, + .insert = nft_rbtree_insert, + .remove = nft_rbtree_remove, + .get = nft_rbtree_get, + .lookup = nft_rbtree_lookup, + .walk = nft_rbtree_walk, + .features = NFT_SET_INTERVAL | NFT_SET_MAP, + .owner = THIS_MODULE, +}; + +static int __init nft_rbtree_module_init(void) +{ + return nft_register_set(&nft_rbtree_ops); +} + +static void __exit nft_rbtree_module_exit(void) +{ + nft_unregister_set(&nft_rbtree_ops); +} + +module_init(nft_rbtree_module_init); +module_exit(nft_rbtree_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/nft_set.c b/net/netfilter/nft_set.c deleted file mode 100644 index 7b7c8354c327..000000000000 --- a/net/netfilter/nft_set.c +++ /dev/null @@ -1,381 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct nft_set { - struct rb_root root; - enum nft_registers sreg:8; - enum nft_registers dreg:8; - u8 klen; - u8 dlen; - u16 flags; -}; - -struct nft_set_elem { - struct rb_node node; - enum nft_set_elem_flags flags; - struct nft_data key; - struct nft_data data[]; -}; - -static void nft_set_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) -{ - const struct nft_set *priv = nft_expr_priv(expr); - const struct rb_node *parent = priv->root.rb_node; - const struct nft_set_elem *elem, *interval = NULL; - const struct nft_data *key = &data[priv->sreg]; - int d; - - while (parent != NULL) { - elem = rb_entry(parent, struct nft_set_elem, node); - - d = nft_data_cmp(&elem->key, key, priv->klen); - if (d < 0) { - parent = parent->rb_left; - interval = elem; - } else if (d > 0) - parent = parent->rb_right; - else { -found: - if (elem->flags & NFT_SE_INTERVAL_END) - goto out; - if (priv->flags & NFT_SET_MAP) - nft_data_copy(&data[priv->dreg], elem->data); - return; - } - } - - if (priv->flags & NFT_SET_INTERVAL && interval != NULL) { - elem = interval; - goto found; - } -out: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; -} - -static void nft_set_elem_destroy(const struct nft_expr *expr, - struct nft_set_elem *elem) -{ - const struct nft_set *priv = nft_expr_priv(expr); - - nft_data_uninit(&elem->key, NFT_DATA_VALUE); - if (priv->flags & NFT_SET_MAP) - nft_data_uninit(elem->data, nft_dreg_to_type(priv->dreg)); - kfree(elem); -} - -static const struct nla_policy nft_se_policy[NFTA_SE_MAX + 1] = { - [NFTA_SE_KEY] = { .type = NLA_NESTED }, - [NFTA_SE_DATA] = { .type = NLA_NESTED }, - [NFTA_SE_FLAGS] = { .type = NLA_U32 }, -}; - -static int nft_set_elem_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr *nla, - struct nft_set_elem **new) -{ - struct nft_set *priv = nft_expr_priv(expr); - struct nlattr *tb[NFTA_SE_MAX + 1]; - struct nft_set_elem *elem; - struct nft_data_desc d1, d2; - enum nft_set_elem_flags flags = 0; - unsigned int size; - int err; - - err = nla_parse_nested(tb, NFTA_SE_MAX, nla, nft_se_policy); - if (err < 0) - return err; - - if (tb[NFTA_SE_KEY] == NULL) - return -EINVAL; - - if (tb[NFTA_SE_FLAGS] != NULL) { - flags = ntohl(nla_get_be32(tb[NFTA_SE_FLAGS])); - if (flags & ~NFT_SE_INTERVAL_END) - return -EINVAL; - } - - size = sizeof(*elem); - if (priv->flags & NFT_SET_MAP) { - if (tb[NFTA_SE_DATA] == NULL && !(flags & NFT_SE_INTERVAL_END)) - return -EINVAL; - size += sizeof(elem->data[0]); - } else { - if (tb[NFTA_SE_DATA] != NULL) - return -EINVAL; - } - - elem = kzalloc(size, GFP_KERNEL); - if (elem == NULL) - return -ENOMEM; - elem->flags = flags; - - err = nft_data_init(ctx, &elem->key, &d1, tb[NFTA_SE_KEY]); - if (err < 0) - goto err1; - err = -EINVAL; - if (d1.type != NFT_DATA_VALUE || d1.len != priv->klen) - goto err2; - - if (tb[NFTA_SE_DATA] != NULL) { - err = nft_data_init(ctx, elem->data, &d2, tb[NFTA_SE_DATA]); - if (err < 0) - goto err2; - err = -EINVAL; - if (priv->dreg != NFT_REG_VERDICT && d2.len != priv->dlen) - goto err2; - err = nft_validate_data_load(ctx, priv->dreg, elem->data, d2.type); - if (err < 0) - goto err3; - } - - *new = elem; - return 0; - -err3: - nft_data_uninit(elem->data, d2.type); -err2: - nft_data_uninit(&elem->key, d1.type); -err1: - kfree(elem); - return err; -} - -static int nft_set_elem_dump(struct sk_buff *skb, const struct nft_expr *expr, - const struct nft_set_elem *elem) - -{ - const struct nft_set *priv = nft_expr_priv(expr); - struct nlattr *nest; - - nest = nla_nest_start(skb, NFTA_LIST_ELEM); - if (nest == NULL) - goto nla_put_failure; - - if (nft_data_dump(skb, NFTA_SE_KEY, &elem->key, - NFT_DATA_VALUE, priv->klen) < 0) - goto nla_put_failure; - - if (priv->flags & NFT_SET_MAP && !(elem->flags & NFT_SE_INTERVAL_END)) { - if (nft_data_dump(skb, NFTA_SE_DATA, elem->data, - nft_dreg_to_type(priv->dreg), priv->dlen) < 0) - goto nla_put_failure; - } - - if (elem->flags){ - if (nla_put_be32(skb, NFTA_SE_FLAGS, htonl(elem->flags))) - goto nla_put_failure; - } - - nla_nest_end(skb, nest); - return 0; - -nla_put_failure: - return -1; -} - -static void nft_set_destroy(const struct nft_expr *expr) -{ - struct nft_set *priv = nft_expr_priv(expr); - struct nft_set_elem *elem; - struct rb_node *node; - - while ((node = priv->root.rb_node) != NULL) { - rb_erase(node, &priv->root); - elem = rb_entry(node, struct nft_set_elem, node); - nft_set_elem_destroy(expr, elem); - } -} - -static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { - [NFTA_SET_FLAGS] = { .type = NLA_U32 }, - [NFTA_SET_SREG] = { .type = NLA_U32 }, - [NFTA_SET_DREG] = { .type = NLA_U32 }, - [NFTA_SET_KLEN] = { .type = NLA_U32 }, - [NFTA_SET_DLEN] = { .type = NLA_U32 }, - [NFTA_SET_ELEMENTS] = { .type = NLA_NESTED }, -}; - -static int nft_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_set *priv = nft_expr_priv(expr); - struct nft_set_elem *elem, *uninitialized_var(new); - struct rb_node *parent, **p; - const struct nlattr *nla; - int err, rem, d; - - if (tb[NFTA_SET_SREG] == NULL || - tb[NFTA_SET_KLEN] == NULL || - tb[NFTA_SET_ELEMENTS] == NULL) - return -EINVAL; - - priv->root = RB_ROOT; - - if (tb[NFTA_SET_FLAGS] != NULL) { - priv->flags = ntohl(nla_get_be32(tb[NFTA_SET_FLAGS])); - if (priv->flags & ~(NFT_SET_INTERVAL | NFT_SET_MAP)) - return -EINVAL; - } - - priv->sreg = ntohl(nla_get_be32(tb[NFTA_SET_SREG])); - err = nft_validate_input_register(priv->sreg); - if (err < 0) - return err; - - if (tb[NFTA_SET_DREG] != NULL) { - if (!(priv->flags & NFT_SET_MAP)) - return -EINVAL; - if (tb[NFTA_SET_DLEN] == NULL) - return -EINVAL; - - priv->dreg = ntohl(nla_get_be32(tb[NFTA_SET_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - - if (priv->dreg == NFT_REG_VERDICT) - priv->dlen = FIELD_SIZEOF(struct nft_data, data); - else { - priv->dlen = ntohl(nla_get_be32(tb[NFTA_SET_DLEN])); - if (priv->dlen == 0 || - priv->dlen > FIELD_SIZEOF(struct nft_data, data)) - return -EINVAL; - } - } else { - if (priv->flags & NFT_SET_MAP) - return -EINVAL; - if (tb[NFTA_SET_DLEN] != NULL) - return -EINVAL; - } - - priv->klen = ntohl(nla_get_be32(tb[NFTA_SET_KLEN])); - if (priv->klen == 0 || - priv->klen > FIELD_SIZEOF(struct nft_data, data)) - return -EINVAL; - - nla_for_each_nested(nla, tb[NFTA_SET_ELEMENTS], rem) { - err = -EINVAL; - if (nla_type(nla) != NFTA_LIST_ELEM) - goto err1; - - err = nft_set_elem_init(ctx, expr, nla, &new); - if (err < 0) - goto err1; - - parent = NULL; - p = &priv->root.rb_node; - while (*p != NULL) { - parent = *p; - elem = rb_entry(parent, struct nft_set_elem, node); - d = nft_data_cmp(&elem->key, &new->key, priv->klen); - if (d < 0) - p = &parent->rb_left; - else if (d > 0) - p = &parent->rb_right; - else { - err = -EEXIST; - goto err2; - } - } - rb_link_node(&new->node, parent, p); - rb_insert_color(&new->node, &priv->root); - } - - return 0; - -err2: - nft_set_elem_destroy(expr, new); -err1: - nft_set_destroy(expr); - return err; -} - -static int nft_set_dump(struct sk_buff *skb, const struct nft_expr *expr) -{ - struct nft_set *priv = nft_expr_priv(expr); - const struct nft_set_elem *elem; - struct rb_node *node; - struct nlattr *list; - - if (priv->flags) { - if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(priv->flags))) - goto nla_put_failure; - } - - if (nla_put_be32(skb, NFTA_SET_SREG, htonl(priv->sreg))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_SET_KLEN, htonl(priv->klen))) - goto nla_put_failure; - - if (priv->flags & NFT_SET_MAP) { - if (nla_put_be32(skb, NFTA_SET_DREG, htonl(priv->dreg))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_SET_DLEN, htonl(priv->dlen))) - goto nla_put_failure; - } - - list = nla_nest_start(skb, NFTA_SET_ELEMENTS); - if (list == NULL) - goto nla_put_failure; - - for (node = rb_first(&priv->root); node; node = rb_next(node)) { - elem = rb_entry(node, struct nft_set_elem, node); - if (nft_set_elem_dump(skb, expr, elem) < 0) - goto nla_put_failure; - } - - nla_nest_end(skb, list); - return 0; - -nla_put_failure: - return -1; -} - -static struct nft_expr_ops nft_set_ops __read_mostly = { - .name = "set", - .size = NFT_EXPR_SIZE(sizeof(struct nft_set)), - .owner = THIS_MODULE, - .eval = nft_set_eval, - .init = nft_set_init, - .destroy = nft_set_destroy, - .dump = nft_set_dump, - .policy = nft_set_policy, - .maxattr = NFTA_SET_MAX, -}; - -static int __init nft_set_module_init(void) -{ - return nft_register_expr(&nft_set_ops); -} - -static void __exit nft_set_module_exit(void) -{ - nft_unregister_expr(&nft_set_ops); -} - -module_init(nft_set_module_init); -module_exit(nft_set_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_EXPR("set"); -- cgit v1.2.3 From ef1f7df9170dbd875ce198ba84e6ab80f6fc139e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Oct 2013 11:41:20 +0200 Subject: netfilter: nf_tables: expression ops overloading Split the expression ops into two parts and support overloading of the runtime expression ops based on the requested function through a ->select_ops() callback. This can be used to provide optimized implementations, for instance for loading small aligned amounts of data from the packet or inlining frequently used operations into the main evaluation loop. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 101 +++++++++++++++++++++----------------- net/netfilter/nft_bitwise.c | 18 ++++--- net/netfilter/nft_byteorder.c | 18 ++++--- net/netfilter/nft_cmp.c | 18 ++++--- net/netfilter/nft_counter.c | 22 ++++++--- net/netfilter/nft_ct.c | 18 ++++--- net/netfilter/nft_expr_template.c | 20 +++++--- net/netfilter/nft_exthdr.c | 16 ++++-- net/netfilter/nft_immediate.c | 18 ++++--- net/netfilter/nft_limit.c | 18 ++++--- net/netfilter/nft_log.c | 18 ++++--- net/netfilter/nft_lookup.c | 16 ++++-- net/netfilter/nft_meta.c | 18 ++++--- net/netfilter/nft_payload.c | 18 ++++--- 14 files changed, 214 insertions(+), 123 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5092c817c222..6dac9a3c9c40 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -840,64 +840,64 @@ static void nft_ctx_init(struct nft_ctx *ctx, */ /** - * nft_register_expr - register nf_tables expr operations - * @ops: expr operations + * nft_register_expr - register nf_tables expr type + * @ops: expr type * - * Registers the expr operations for use with nf_tables. Returns zero on + * Registers the expr type for use with nf_tables. Returns zero on * success or a negative errno code otherwise. */ -int nft_register_expr(struct nft_expr_ops *ops) +int nft_register_expr(struct nft_expr_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail(&ops->list, &nf_tables_expressions); + list_add_tail(&type->list, &nf_tables_expressions); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } EXPORT_SYMBOL_GPL(nft_register_expr); /** - * nft_unregister_expr - unregister nf_tables expr operations - * @ops: expr operations + * nft_unregister_expr - unregister nf_tables expr type + * @ops: expr type * - * Unregisters the expr operations for use with nf_tables. + * Unregisters the expr typefor use with nf_tables. */ -void nft_unregister_expr(struct nft_expr_ops *ops) +void nft_unregister_expr(struct nft_expr_type *type) { nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del(&ops->list); + list_del(&type->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } EXPORT_SYMBOL_GPL(nft_unregister_expr); -static const struct nft_expr_ops *__nft_expr_ops_get(struct nlattr *nla) +static const struct nft_expr_type *__nft_expr_type_get(struct nlattr *nla) { - const struct nft_expr_ops *ops; + const struct nft_expr_type *type; - list_for_each_entry(ops, &nf_tables_expressions, list) { - if (!nla_strcmp(nla, ops->name)) - return ops; + list_for_each_entry(type, &nf_tables_expressions, list) { + if (!nla_strcmp(nla, type->name)) + return type; } return NULL; } -static const struct nft_expr_ops *nft_expr_ops_get(struct nlattr *nla) +static const struct nft_expr_type *nft_expr_type_get(struct nlattr *nla) { - const struct nft_expr_ops *ops; + const struct nft_expr_type *type; if (nla == NULL) return ERR_PTR(-EINVAL); - ops = __nft_expr_ops_get(nla); - if (ops != NULL && try_module_get(ops->owner)) - return ops; + type = __nft_expr_type_get(nla); + if (type != NULL && try_module_get(type->owner)) + return type; #ifdef CONFIG_MODULES - if (ops == NULL) { + if (type == NULL) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); request_module("nft-expr-%.*s", nla_len(nla), (char *)nla_data(nla)); nfnl_lock(NFNL_SUBSYS_NFTABLES); - if (__nft_expr_ops_get(nla)) + if (__nft_expr_type_get(nla)) return ERR_PTR(-EAGAIN); } #endif @@ -912,7 +912,7 @@ static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = { static int nf_tables_fill_expr_info(struct sk_buff *skb, const struct nft_expr *expr) { - if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->name)) + if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->type->name)) goto nla_put_failure; if (expr->ops->dump) { @@ -932,28 +932,52 @@ nla_put_failure: struct nft_expr_info { const struct nft_expr_ops *ops; - struct nlattr *tb[NFTA_EXPR_MAX + 1]; + struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; }; static int nf_tables_expr_parse(const struct nlattr *nla, struct nft_expr_info *info) { + const struct nft_expr_type *type; const struct nft_expr_ops *ops; + struct nlattr *tb[NFTA_EXPR_MAX + 1]; int err; - err = nla_parse_nested(info->tb, NFTA_EXPR_MAX, nla, nft_expr_policy); + err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy); if (err < 0) return err; - ops = nft_expr_ops_get(info->tb[NFTA_EXPR_NAME]); - if (IS_ERR(ops)) - return PTR_ERR(ops); + type = nft_expr_type_get(tb[NFTA_EXPR_NAME]); + if (IS_ERR(type)) + return PTR_ERR(type); + + if (tb[NFTA_EXPR_DATA]) { + err = nla_parse_nested(info->tb, type->maxattr, + tb[NFTA_EXPR_DATA], type->policy); + if (err < 0) + goto err1; + } else + memset(info->tb, 0, sizeof(info->tb[0]) * (type->maxattr + 1)); + + if (type->select_ops != NULL) { + ops = type->select_ops((const struct nlattr * const *)info->tb); + if (IS_ERR(ops)) { + err = PTR_ERR(ops); + goto err1; + } + } else + ops = type->ops; + info->ops = ops; return 0; + +err1: + module_put(type->owner); + return err; } static int nf_tables_newexpr(const struct nft_ctx *ctx, - struct nft_expr_info *info, + const struct nft_expr_info *info, struct nft_expr *expr) { const struct nft_expr_ops *ops = info->ops; @@ -961,23 +985,11 @@ static int nf_tables_newexpr(const struct nft_ctx *ctx, expr->ops = ops; if (ops->init) { - struct nlattr *ma[ops->maxattr + 1]; - - if (info->tb[NFTA_EXPR_DATA]) { - err = nla_parse_nested(ma, ops->maxattr, - info->tb[NFTA_EXPR_DATA], - ops->policy); - if (err < 0) - goto err1; - } else - memset(ma, 0, sizeof(ma[0]) * (ops->maxattr + 1)); - - err = ops->init(ctx, expr, (const struct nlattr **)ma); + err = ops->init(ctx, expr, (const struct nlattr **)info->tb); if (err < 0) goto err1; } - info->ops = NULL; return 0; err1: @@ -989,7 +1001,7 @@ static void nf_tables_expr_destroy(struct nft_expr *expr) { if (expr->ops->destroy) expr->ops->destroy(expr); - module_put(expr->ops->owner); + module_put(expr->ops->type->owner); } /* @@ -1313,6 +1325,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, err = nf_tables_newexpr(&ctx, &info[i], expr); if (err < 0) goto err2; + info[i].ops = NULL; expr = nft_expr_next(expr); } @@ -1341,7 +1354,7 @@ err2: err1: for (i = 0; i < n; i++) { if (info[i].ops != NULL) - module_put(info[i].ops->owner); + module_put(info[i].ops->type->owner); } return err; } diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 0f7501506367..4fb6ee2c1106 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -118,23 +118,29 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_bitwise_ops __read_mostly = { - .name = "bitwise", +static struct nft_expr_type nft_bitwise_type; +static const struct nft_expr_ops nft_bitwise_ops = { + .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), - .owner = THIS_MODULE, .eval = nft_bitwise_eval, .init = nft_bitwise_init, .dump = nft_bitwise_dump, +}; + +static struct nft_expr_type nft_bitwise_type __read_mostly = { + .name = "bitwise", + .ops = &nft_bitwise_ops, .policy = nft_bitwise_policy, .maxattr = NFTA_BITWISE_MAX, + .owner = THIS_MODULE, }; int __init nft_bitwise_module_init(void) { - return nft_register_expr(&nft_bitwise_ops); + return nft_register_expr(&nft_bitwise_type); } void nft_bitwise_module_exit(void) { - nft_unregister_expr(&nft_bitwise_ops); + nft_unregister_expr(&nft_bitwise_type); } diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 8b0657a4d17b..c39ed8d29df1 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -145,23 +145,29 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_byteorder_ops __read_mostly = { - .name = "byteorder", +static struct nft_expr_type nft_byteorder_type; +static const struct nft_expr_ops nft_byteorder_ops = { + .type = &nft_byteorder_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), - .owner = THIS_MODULE, .eval = nft_byteorder_eval, .init = nft_byteorder_init, .dump = nft_byteorder_dump, +}; + +static struct nft_expr_type nft_byteorder_type __read_mostly = { + .name = "byteorder", + .ops = &nft_byteorder_ops, .policy = nft_byteorder_policy, .maxattr = NFTA_BYTEORDER_MAX, + .owner = THIS_MODULE, }; int __init nft_byteorder_module_init(void) { - return nft_register_expr(&nft_byteorder_ops); + return nft_register_expr(&nft_byteorder_type); } void nft_byteorder_module_exit(void) { - nft_unregister_expr(&nft_byteorder_ops); + nft_unregister_expr(&nft_byteorder_type); } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index e734d670120a..2c9d5fef2e63 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -124,23 +124,29 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_cmp_ops __read_mostly = { - .name = "cmp", +static struct nft_expr_type nft_cmp_type; +static const struct nft_expr_ops nft_cmp_ops = { + .type = &nft_cmp_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)), - .owner = THIS_MODULE, .eval = nft_cmp_eval, .init = nft_cmp_init, .dump = nft_cmp_dump, +}; + +static struct nft_expr_type nft_cmp_type __read_mostly = { + .name = "cmp", + .ops = &nft_cmp_ops, .policy = nft_cmp_policy, .maxattr = NFTA_CMP_MAX, + .owner = THIS_MODULE, }; int __init nft_cmp_module_init(void) { - return nft_register_expr(&nft_cmp_ops); + return nft_register_expr(&nft_cmp_type); } void nft_cmp_module_exit(void) { - nft_unregister_expr(&nft_cmp_ops); + nft_unregister_expr(&nft_cmp_type); } diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 33c5d36819bb..c89ee486ce54 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -78,25 +78,31 @@ static int nft_counter_init(const struct nft_ctx *ctx, return 0; } -static struct nft_expr_ops nft_counter_ops __read_mostly = { - .name = "counter", +static struct nft_expr_type nft_counter_type; +static const struct nft_expr_ops nft_counter_ops = { + .type = &nft_counter_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_counter)), - .policy = nft_counter_policy, - .maxattr = NFTA_COUNTER_MAX, - .owner = THIS_MODULE, .eval = nft_counter_eval, .init = nft_counter_init, .dump = nft_counter_dump, }; +static struct nft_expr_type nft_counter_type __read_mostly = { + .name = "counter", + .ops = &nft_counter_ops, + .policy = nft_counter_policy, + .maxattr = NFTA_COUNTER_MAX, + .owner = THIS_MODULE, +}; + static int __init nft_counter_module_init(void) { - return nft_register_expr(&nft_counter_ops); + return nft_register_expr(&nft_counter_type); } static void __exit nft_counter_module_exit(void) { - nft_unregister_expr(&nft_counter_ops); + nft_unregister_expr(&nft_counter_type); } module_init(nft_counter_module_init); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index a1756d678226..955f4e6e7089 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -222,26 +222,32 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_ct_ops __read_mostly = { - .name = "ct", +static struct nft_expr_type nft_ct_type; +static const struct nft_expr_ops nft_ct_ops = { + .type = &nft_ct_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), - .owner = THIS_MODULE, .eval = nft_ct_eval, .init = nft_ct_init, .destroy = nft_ct_destroy, .dump = nft_ct_dump, +}; + +static struct nft_expr_type nft_ct_type __read_mostly = { + .name = "ct", + .ops = &nft_ct_ops, .policy = nft_ct_policy, .maxattr = NFTA_CT_MAX, + .owner = THIS_MODULE, }; static int __init nft_ct_module_init(void) { - return nft_register_expr(&nft_ct_ops); + return nft_register_expr(&nft_ct_type); } static void __exit nft_ct_module_exit(void) { - nft_unregister_expr(&nft_ct_ops); + nft_unregister_expr(&nft_ct_type); } module_init(nft_ct_module_init); diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c index 9fc8eb308193..b6eed4d5a096 100644 --- a/net/netfilter/nft_expr_template.c +++ b/net/netfilter/nft_expr_template.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -33,7 +33,7 @@ static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = { static int nft_template_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr *tb[]) + const struct nlattr * const tb[]) { struct nft_template *priv = nft_expr_priv(expr); @@ -58,26 +58,32 @@ nla_put_failure: return -1; } -static struct nft_expr_ops template_ops __read_mostly = { - .name = "template", +static struct nft_expr_type nft_template_type; +static const struct nft_expr_ops nft_template_ops = { + .type = &nft_template_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_template)), - .owner = THIS_MODULE, .eval = nft_template_eval, .init = nft_template_init, .destroy = nft_template_destroy, .dump = nft_template_dump, +}; + +static struct nft_expr_type nft_template_type __read_mostly = { + .name = "template", + .ops = &nft_template_ops, .policy = nft_template_policy, .maxattr = NFTA_TEMPLATE_MAX, + .owner = THIS_MODULE, }; static int __init nft_template_module_init(void) { - return nft_register_expr(&template_ops); + return nft_register_expr(&nft_template_type); } static void __exit nft_template_module_exit(void) { - nft_unregister_expr(&template_ops); + nft_unregister_expr(&nft_template_type); } module_init(nft_template_module_init); diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 21c6a6b7b662..8e0bb75e7c51 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -98,25 +98,31 @@ nla_put_failure: return -1; } -static struct nft_expr_ops exthdr_ops __read_mostly = { - .name = "exthdr", +static struct nft_expr_type nft_exthdr_type; +static const struct nft_expr_ops nft_exthdr_ops = { + .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), - .owner = THIS_MODULE, .eval = nft_exthdr_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, +}; + +static struct nft_expr_type nft_exthdr_type __read_mostly = { + .name = "exthdr", + .ops = &nft_exthdr_ops, .policy = nft_exthdr_policy, .maxattr = NFTA_EXTHDR_MAX, + .owner = THIS_MODULE, }; static int __init nft_exthdr_module_init(void) { - return nft_register_expr(&exthdr_ops); + return nft_register_expr(&nft_exthdr_type); } static void __exit nft_exthdr_module_exit(void) { - nft_unregister_expr(&exthdr_ops); + nft_unregister_expr(&nft_exthdr_type); } module_init(nft_exthdr_module_init); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 78334bf37007..1bfeeaf865b6 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -100,25 +100,31 @@ static const struct nft_data *nft_immediate_get_verdict(const struct nft_expr *e return NULL; } -static struct nft_expr_ops nft_imm_ops __read_mostly = { - .name = "immediate", +static struct nft_expr_type nft_imm_type; +static const struct nft_expr_ops nft_imm_ops = { + .type = &nft_imm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), - .owner = THIS_MODULE, .eval = nft_immediate_eval, .init = nft_immediate_init, .destroy = nft_immediate_destroy, .dump = nft_immediate_dump, .get_verdict = nft_immediate_get_verdict, +}; + +static struct nft_expr_type nft_imm_type __read_mostly = { + .name = "immediate", + .ops = &nft_imm_ops, .policy = nft_immediate_policy, .maxattr = NFTA_IMMEDIATE_MAX, + .owner = THIS_MODULE, }; int __init nft_immediate_module_init(void) { - return nft_register_expr(&nft_imm_ops); + return nft_register_expr(&nft_imm_type); } void nft_immediate_module_exit(void) { - nft_unregister_expr(&nft_imm_ops); + nft_unregister_expr(&nft_imm_type); } diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index e0e3fc8aebc3..85da5bd02f64 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -84,25 +84,31 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_limit_ops __read_mostly = { - .name = "limit", +static struct nft_expr_type nft_limit_type; +static const struct nft_expr_ops nft_limit_ops = { + .type = &nft_limit_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)), - .owner = THIS_MODULE, .eval = nft_limit_eval, .init = nft_limit_init, .dump = nft_limit_dump, +}; + +static struct nft_expr_type nft_limit_type __read_mostly = { + .name = "limit", + .ops = &nft_limit_ops, .policy = nft_limit_policy, .maxattr = NFTA_LIMIT_MAX, + .owner = THIS_MODULE, }; static int __init nft_limit_module_init(void) { - return nft_register_expr(&nft_limit_ops); + return nft_register_expr(&nft_limit_type); } static void __exit nft_limit_module_exit(void) { - nft_unregister_expr(&nft_limit_ops); + nft_unregister_expr(&nft_limit_type); } module_init(nft_limit_module_init); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index da495c3b1e7e..57cad072a13e 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -110,26 +110,32 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_log_ops __read_mostly = { - .name = "log", +static struct nft_expr_type nft_log_type; +static const struct nft_expr_ops nft_log_ops = { + .type = &nft_log_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_log)), - .owner = THIS_MODULE, .eval = nft_log_eval, .init = nft_log_init, .destroy = nft_log_destroy, .dump = nft_log_dump, +}; + +static struct nft_expr_type nft_log_type __read_mostly = { + .name = "log", + .ops = &nft_log_ops, .policy = nft_log_policy, .maxattr = NFTA_LOG_MAX, + .owner = THIS_MODULE, }; static int __init nft_log_module_init(void) { - return nft_register_expr(&nft_log_ops); + return nft_register_expr(&nft_log_type); } static void __exit nft_log_module_exit(void) { - nft_unregister_expr(&nft_log_ops); + nft_unregister_expr(&nft_log_type); } module_init(nft_log_module_init); diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 4962d2173678..8a6116b75b5a 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -112,24 +112,30 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_lookup_ops __read_mostly = { - .name = "lookup", +static struct nft_expr_type nft_lookup_type; +static const struct nft_expr_ops nft_lookup_ops = { + .type = &nft_lookup_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), - .owner = THIS_MODULE, .eval = nft_lookup_eval, .init = nft_lookup_init, .destroy = nft_lookup_destroy, .dump = nft_lookup_dump, +}; + +static struct nft_expr_type nft_lookup_type __read_mostly = { + .name = "lookup", + .ops = &nft_lookup_ops, .policy = nft_lookup_policy, .maxattr = NFTA_LOOKUP_MAX, + .owner = THIS_MODULE, }; int __init nft_lookup_module_init(void) { - return nft_register_expr(&nft_lookup_ops); + return nft_register_expr(&nft_lookup_type); } void nft_lookup_module_exit(void) { - nft_unregister_expr(&nft_lookup_ops); + nft_unregister_expr(&nft_lookup_type); } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 96735aa2f039..8c28220a90b3 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -193,25 +193,31 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_meta_ops __read_mostly = { - .name = "meta", +static struct nft_expr_type nft_meta_type; +static const struct nft_expr_ops nft_meta_ops = { + .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), - .owner = THIS_MODULE, .eval = nft_meta_eval, .init = nft_meta_init, .dump = nft_meta_dump, +}; + +static struct nft_expr_type nft_meta_type __read_mostly = { + .name = "meta", + .ops = &nft_meta_ops, .policy = nft_meta_policy, .maxattr = NFTA_META_MAX, + .owner = THIS_MODULE, }; static int __init nft_meta_module_init(void) { - return nft_register_expr(&nft_meta_ops); + return nft_register_expr(&nft_meta_type); } static void __exit nft_meta_module_exit(void) { - nft_unregister_expr(&nft_meta_ops); + nft_unregister_expr(&nft_meta_type); } module_init(nft_meta_module_init); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 329f134b3f89..d99db6e37fb1 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Patrick McHardy + * Copyright (c) 2008-2009 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -115,23 +115,29 @@ nla_put_failure: return -1; } -static struct nft_expr_ops nft_payload_ops __read_mostly = { - .name = "payload", +static struct nft_expr_type nft_payload_type; +static const struct nft_expr_ops nft_payload_ops = { + .type = &nft_payload_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), - .owner = THIS_MODULE, .eval = nft_payload_eval, .init = nft_payload_init, .dump = nft_payload_dump, +}; + +static struct nft_expr_type nft_payload_type __read_mostly = { + .name = "payload", + .ops = &nft_payload_ops, .policy = nft_payload_policy, .maxattr = NFTA_PAYLOAD_MAX, + .owner = THIS_MODULE, }; int __init nft_payload_module_init(void) { - return nft_register_expr(&nft_payload_ops); + return nft_register_expr(&nft_payload_type); } void nft_payload_module_exit(void) { - nft_unregister_expr(&nft_payload_ops); + nft_unregister_expr(&nft_payload_type); } -- cgit v1.2.3 From cb7dbfd0390c9e244339f3270fe8649568241812 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Oct 2013 23:35:40 +0200 Subject: netfilter: nf_tables: add optimized data comparison for small values Add an optimized version of nft_data_cmp() that only handles values of to 4 bytes length. This patch includes original Patrick McHardy's patch entitled (nf_tables: inline nft_cmp_fast_eval() into main evaluation loop). Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 18 ++++++- net/netfilter/nft_cmp.c | 116 +++++++++++++++++++++++++++++++++-------- 2 files changed, 110 insertions(+), 24 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index fd0ecd3255c1..24000182c8e7 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -20,6 +20,18 @@ #include #include +static void nft_cmp_fast_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1]) +{ + const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); + u32 mask; + + mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len); + if ((data[priv->sreg].data[0] & mask) == priv->data) + return; + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + unsigned int nft_do_chain(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, @@ -48,7 +60,11 @@ next_rule: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; list_for_each_entry_continue_rcu(rule, &chain->rules, list) { nft_rule_for_each_expr(expr, last, rule) { - expr->ops->eval(expr, data, &pkt); + if (expr->ops == &nft_cmp_fast_ops) + nft_cmp_fast_eval(expr, data); + else + expr->ops->eval(expr, data, &pkt); + if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE) break; } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 2c9d5fef2e63..37134f3e84fb 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -75,32 +75,11 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct nft_data_desc desc; int err; - if (tb[NFTA_CMP_SREG] == NULL || - tb[NFTA_CMP_OP] == NULL || - tb[NFTA_CMP_DATA] == NULL) - return -EINVAL; - priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); - err = nft_validate_input_register(priv->sreg); - if (err < 0) - return err; - priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); - switch (priv->op) { - case NFT_CMP_EQ: - case NFT_CMP_NEQ: - case NFT_CMP_LT: - case NFT_CMP_LTE: - case NFT_CMP_GT: - case NFT_CMP_GTE: - break; - default: - return -EINVAL; - } err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); - if (err < 0) - return err; + BUG_ON(err < 0); priv->len = desc.len; return 0; @@ -133,9 +112,100 @@ static const struct nft_expr_ops nft_cmp_ops = { .dump = nft_cmp_dump, }; +static int nft_cmp_fast_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc; + struct nft_data data; + u32 mask; + int err; + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); + + err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); + BUG_ON(err < 0); + desc.len *= BITS_PER_BYTE; + + mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len); + priv->data = data.data[0] & mask; + priv->len = desc.len; + return 0; +} + +static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); + struct nft_data data; + + if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CMP_OP, htonl(NFT_CMP_EQ))) + goto nla_put_failure; + + data.data[0] = priv->data; + if (nft_data_dump(skb, NFTA_CMP_DATA, &data, + NFT_DATA_VALUE, priv->len / BITS_PER_BYTE) < 0) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +const struct nft_expr_ops nft_cmp_fast_ops = { + .type = &nft_cmp_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_fast_expr)), + .eval = NULL, /* inlined */ + .init = nft_cmp_fast_init, + .dump = nft_cmp_fast_dump, +}; + +static const struct nft_expr_ops *nft_cmp_select_ops(const struct nlattr * const tb[]) +{ + struct nft_data_desc desc; + struct nft_data data; + enum nft_registers sreg; + enum nft_cmp_ops op; + int err; + + if (tb[NFTA_CMP_SREG] == NULL || + tb[NFTA_CMP_OP] == NULL || + tb[NFTA_CMP_DATA] == NULL) + return ERR_PTR(-EINVAL); + + sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); + err = nft_validate_input_register(sreg); + if (err < 0) + return ERR_PTR(err); + + op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); + switch (op) { + case NFT_CMP_EQ: + case NFT_CMP_NEQ: + case NFT_CMP_LT: + case NFT_CMP_LTE: + case NFT_CMP_GT: + case NFT_CMP_GTE: + break; + default: + return ERR_PTR(-EINVAL); + } + + err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); + if (err < 0) + return ERR_PTR(err); + + if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ) + return &nft_cmp_fast_ops; + else + return &nft_cmp_ops; +} + static struct nft_expr_type nft_cmp_type __read_mostly = { .name = "cmp", - .ops = &nft_cmp_ops, + .select_ops = nft_cmp_select_ops, .policy = nft_cmp_policy, .maxattr = NFTA_CMP_MAX, .owner = THIS_MODULE, -- cgit v1.2.3 From c29b72e02573b8fe5e6cae5d192a6a4772e7bbd6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Oct 2013 11:06:41 +0200 Subject: netfilter: nft_payload: add optimized payload implementation for small loads Add an optimized payload expression implementation for small (up to 4 bytes) aligned data loads from the linear packet area. This patch also includes original Patrick McHardy's entitled (nf_tables: inline nft_payload_fast_eval() into main evaluation loop). Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 31 ++++++++++++++++++- net/netfilter/nft_payload.c | 69 +++++++++++++++++++++++++----------------- 2 files changed, 72 insertions(+), 28 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 24000182c8e7..9aede59ed2d7 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -32,6 +32,34 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr, data[NFT_REG_VERDICT].verdict = NFT_BREAK; } +static bool nft_payload_fast_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_payload *priv = nft_expr_priv(expr); + const struct sk_buff *skb = pkt->skb; + struct nft_data *dest = &data[priv->dreg]; + unsigned char *ptr; + + if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) + ptr = skb_network_header(skb); + else + ptr = skb_transport_header(skb); + + ptr += priv->offset; + + if (unlikely(ptr + priv->len >= skb_tail_pointer(skb))) + return false; + + if (priv->len == 2) + *(u16 *)dest->data = *(u16 *)ptr; + else if (priv->len == 4) + *(u32 *)dest->data = *(u32 *)ptr; + else + *(u8 *)dest->data = *(u8 *)ptr; + return true; +} + unsigned int nft_do_chain(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, @@ -62,7 +90,8 @@ next_rule: nft_rule_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, data); - else + else if (expr->ops != &nft_payload_fast_ops || + !nft_payload_fast_eval(expr, data, &pkt)) expr->ops->eval(expr, data, &pkt); if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index d99db6e37fb1..7cf13f7e1e94 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -17,13 +17,6 @@ #include #include -struct nft_payload { - enum nft_payload_bases base:8; - u8 offset; - u8 len; - enum nft_registers dreg:8; -}; - static void nft_payload_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1], const struct nft_pktinfo *pkt) @@ -71,27 +64,9 @@ static int nft_payload_init(const struct nft_ctx *ctx, struct nft_payload *priv = nft_expr_priv(expr); int err; - if (tb[NFTA_PAYLOAD_DREG] == NULL || - tb[NFTA_PAYLOAD_BASE] == NULL || - tb[NFTA_PAYLOAD_OFFSET] == NULL || - tb[NFTA_PAYLOAD_LEN] == NULL) - return -EINVAL; - - priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); - switch (priv->base) { - case NFT_PAYLOAD_LL_HEADER: - case NFT_PAYLOAD_NETWORK_HEADER: - case NFT_PAYLOAD_TRANSPORT_HEADER: - break; - default: - return -EOPNOTSUPP; - } - + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); - if (priv->len == 0 || - priv->len > FIELD_SIZEOF(struct nft_data, data)) - return -EINVAL; priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG])); err = nft_validate_output_register(priv->dreg); @@ -124,9 +99,49 @@ static const struct nft_expr_ops nft_payload_ops = { .dump = nft_payload_dump, }; +const struct nft_expr_ops nft_payload_fast_ops = { + .type = &nft_payload_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), + .eval = nft_payload_eval, + .init = nft_payload_init, + .dump = nft_payload_dump, +}; + +static const struct nft_expr_ops *nft_payload_select_ops(const struct nlattr * const tb[]) +{ + enum nft_payload_bases base; + unsigned int offset, len; + + if (tb[NFTA_PAYLOAD_DREG] == NULL || + tb[NFTA_PAYLOAD_BASE] == NULL || + tb[NFTA_PAYLOAD_OFFSET] == NULL || + tb[NFTA_PAYLOAD_LEN] == NULL) + return ERR_PTR(-EINVAL); + + base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + switch (base) { + case NFT_PAYLOAD_LL_HEADER: + case NFT_PAYLOAD_NETWORK_HEADER: + case NFT_PAYLOAD_TRANSPORT_HEADER: + break; + default: + return ERR_PTR(-EOPNOTSUPP); + } + + offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data)) + return ERR_PTR(-EINVAL); + + if (len <= 4 && IS_ALIGNED(offset, len) && base != NFT_PAYLOAD_LL_HEADER) + return &nft_payload_fast_ops; + else + return &nft_payload_ops; +} + static struct nft_expr_type nft_payload_type __read_mostly = { .name = "payload", - .ops = &nft_payload_ops, + .select_ops = nft_payload_select_ops, .policy = nft_payload_policy, .maxattr = NFTA_PAYLOAD_MAX, .owner = THIS_MODULE, -- cgit v1.2.3 From 9370761c56b66aa5c65e069a7b010111a025018d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 10 Oct 2013 23:21:26 +0200 Subject: netfilter: nf_tables: convert built-in tables/chains to chain types This patch converts built-in tables/chains to chain types that allows you to deploy customized table and chain configurations from userspace. After this patch, you have to specify the chain type when creating a new chain: add chain ip filter output { type filter hook input priority 0; } ^^^^ ------ The existing chain types after this patch are: filter, route and nat. Note that tables are just containers of chains with no specific semantics, which is a significant change with regards to iptables. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 197 ++++++++++++++++++------------------------ 1 file changed, 86 insertions(+), 111 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6dac9a3c9c40..9c2d8d5af843 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -104,8 +104,7 @@ static struct nft_table *nft_table_lookup(const struct nft_af_info *afi, } static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, - const struct nlattr *nla, - bool autoload) + const struct nlattr *nla) { struct nft_table *table; @@ -116,16 +115,6 @@ static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, if (table != NULL) return table; -#ifdef CONFIG_MODULES - if (autoload) { - nfnl_unlock(NFNL_SUBSYS_NFTABLES); - request_module("nft-table-%u-%*.s", afi->family, - nla_len(nla)-1, (const char *)nla_data(nla)); - nfnl_lock(NFNL_SUBSYS_NFTABLES); - if (nft_table_lookup(afi, nla)) - return ERR_PTR(-EAGAIN); - } -#endif return ERR_PTR(-ENOENT); } @@ -134,6 +123,39 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table) return ++table->hgenerator; } +static struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX]; + +static int __nf_tables_chain_type_lookup(int family, const struct nlattr *nla) +{ + int i; + + for (i=0; iname)) + return i; + } + return -1; +} + +static int nf_tables_chain_type_lookup(const struct nft_af_info *afi, + const struct nlattr *nla, + bool autoload) +{ + int type; + + type = __nf_tables_chain_type_lookup(afi->family, nla); +#ifdef CONFIG_MODULES + if (type < 0 && autoload) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-chain-%u-%*.s", afi->family, + nla_len(nla)-1, (const char *)nla_data(nla)); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + type = __nf_tables_chain_type_lookup(afi->family, nla); + } +#endif + return type; +} + static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING }, }; @@ -258,7 +280,7 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false); + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); if (IS_ERR(table)) return PTR_ERR(table); @@ -294,7 +316,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, return PTR_ERR(afi); name = nla[NFTA_TABLE_NAME]; - table = nf_tables_table_lookup(afi, name, false); + table = nf_tables_table_lookup(afi, name); if (IS_ERR(table)) { if (PTR_ERR(table) != -ENOENT) return PTR_ERR(table); @@ -335,13 +357,10 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], false); + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_BUILTIN) - return -EOPNOTSUPP; - if (table->use) return -EBUSY; @@ -351,99 +370,34 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, return 0; } -static struct nft_table *__nf_tables_table_lookup(const struct nft_af_info *afi, - const char *name) +int nft_register_chain_type(struct nf_chain_type *ctype) { - struct nft_table *table; - - list_for_each_entry(table, &afi->tables, list) { - if (!strcmp(name, table->name)) - return table; - } - - return ERR_PTR(-ENOENT); -} - -static int nf_tables_chain_notify(const struct sk_buff *oskb, - const struct nlmsghdr *nlh, - const struct nft_table *table, - const struct nft_chain *chain, - int event, int family); - -/** - * nft_register_table - register a built-in table - * - * @table: the table to register - * @family: protocol family to register table with - * - * Register a built-in table for use with nf_tables. Returns zero on - * success or a negative errno code otherwise. - */ -int nft_register_table(struct nft_table *table, int family) -{ - struct nft_af_info *afi; - struct nft_table *t; - struct nft_chain *chain; - int err; + int err = 0; nfnl_lock(NFNL_SUBSYS_NFTABLES); -again: - afi = nf_tables_afinfo_lookup(family, true); - if (IS_ERR(afi)) { - err = PTR_ERR(afi); - if (err == -EAGAIN) - goto again; - goto err; - } - - t = __nf_tables_table_lookup(afi, table->name); - if (IS_ERR(t)) { - err = PTR_ERR(t); - if (err != -ENOENT) - goto err; - t = NULL; + if (chain_type[ctype->family][ctype->type] != NULL) { + err = -EBUSY; + goto out; } - if (t != NULL) { - err = -EEXIST; - goto err; - } + if (!try_module_get(ctype->me)) + goto out; - table->flags |= NFT_TABLE_BUILTIN; - INIT_LIST_HEAD(&table->sets); - list_add_tail(&table->list, &afi->tables); - nf_tables_table_notify(NULL, NULL, table, NFT_MSG_NEWTABLE, family); - list_for_each_entry(chain, &table->chains, list) - nf_tables_chain_notify(NULL, NULL, table, chain, - NFT_MSG_NEWCHAIN, family); - err = 0; -err: + chain_type[ctype->family][ctype->type] = ctype; +out: nfnl_unlock(NFNL_SUBSYS_NFTABLES); return err; } -EXPORT_SYMBOL_GPL(nft_register_table); +EXPORT_SYMBOL_GPL(nft_register_chain_type); -/** - * nft_unregister_table - unregister a built-in table - * - * @table: the table to unregister - * @family: protocol family to unregister table with - * - * Unregister a built-in table for use with nf_tables. - */ -void nft_unregister_table(struct nft_table *table, int family) +void nft_unregister_chain_type(struct nf_chain_type *ctype) { - struct nft_chain *chain; - nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_del(&table->list); - list_for_each_entry(chain, &table->chains, list) - nf_tables_chain_notify(NULL, NULL, table, chain, - NFT_MSG_DELCHAIN, family); - nf_tables_table_notify(NULL, NULL, table, NFT_MSG_DELTABLE, family); + chain_type[ctype->family][ctype->type] = NULL; + module_put(ctype->me); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } -EXPORT_SYMBOL_GPL(nft_unregister_table); +EXPORT_SYMBOL_GPL(nft_unregister_chain_type); /* * Chains @@ -484,6 +438,7 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { [NFTA_CHAIN_NAME] = { .type = NLA_STRING, .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED }, + [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING }, }; static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { @@ -526,6 +481,10 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority))) goto nla_put_failure; nla_nest_end(skb, nest); + + if (nla_put_string(skb, NFTA_CHAIN_TYPE, + chain_type[ops->pf][nft_base_chain(chain)->type]->name)) + goto nla_put_failure; } return nlmsg_end(skb, nlh); @@ -633,7 +592,7 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -680,7 +639,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], create); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -722,6 +681,17 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (nla[NFTA_CHAIN_HOOK]) { struct nf_hook_ops *ops; + nf_hookfn *hookfn; + u32 hooknum; + int type = NFT_CHAIN_T_DEFAULT; + + if (nla[NFTA_CHAIN_TYPE]) { + type = nf_tables_chain_type_lookup(afi, + nla[NFTA_CHAIN_TYPE], + create); + if (type < 0) + return -ENOENT; + } err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], nft_hook_policy); @@ -730,12 +700,20 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (ha[NFTA_HOOK_HOOKNUM] == NULL || ha[NFTA_HOOK_PRIORITY] == NULL) return -EINVAL; - if (ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])) >= afi->nhooks) + + hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); + if (hooknum >= afi->nhooks) return -EINVAL; + hookfn = chain_type[family][type]->fn[hooknum]; + if (hookfn == NULL) + return -EOPNOTSUPP; + basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); if (basechain == NULL) return -ENOMEM; + + basechain->type = type; chain = &basechain->chain; ops = &basechain->ops; @@ -744,7 +722,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, ops->hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); ops->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); ops->priv = chain; - ops->hook = nft_do_chain; + ops->hook = hookfn; if (afi->hooks[ops->hooknum]) ops->hook = afi->hooks[ops->hooknum]; @@ -793,7 +771,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -801,9 +779,6 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(chain)) return PTR_ERR(chain); - if (chain->flags & NFT_CHAIN_BUILTIN) - return -EOPNOTSUPP; - if (!list_empty(&chain->rules)) return -EBUSY; @@ -1190,7 +1165,7 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -1268,7 +1243,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], create); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -1374,7 +1349,7 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -1490,7 +1465,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, return PTR_ERR(afi); if (nla[NFTA_SET_TABLE] != NULL) { - table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); } @@ -1820,7 +1795,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], create); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); @@ -2008,7 +1983,7 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE], false); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); -- cgit v1.2.3 From 0ca743a5599199152a31a7146b83213c786c2eb2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Oct 2013 00:06:06 +0200 Subject: netfilter: nf_tables: add compatibility layer for x_tables This patch adds the x_tables compatibility layer. This allows you to use existing x_tables matches and targets from nf_tables. This compatibility later allows us to use existing matches/targets for features that are still missing in nf_tables. We can progressively replace them with native nf_tables extensions. It also provides the userspace compatibility software that allows you to express the rule-set using the iptables syntax but using the nf_tables kernel components. In order to get this compatibility layer working, I've done the following things: * add NFNL_SUBSYS_NFT_COMPAT: this new nfnetlink subsystem is used to query the x_tables match/target revision, so we don't need to use the native x_table getsockopt interface. * emulate xt structures: this required extending the struct nft_pktinfo to include the fragment offset, which is already obtained from ip[6]_tables and that is used by some matches/targets. * add support for default policy to base chains, required to emulate x_tables. * add NFTA_CHAIN_USE attribute to obtain the number of references to chains, required by x_tables emulation. * add chain packet/byte counters using per-cpu. * support 32-64 bits compat. For historical reasons, this patch includes the following patches that were posted in the netfilter-devel mailing list. From Pablo Neira Ayuso: * nf_tables: add default policy to base chains * netfilter: nf_tables: add NFTA_CHAIN_USE attribute * nf_tables: nft_compat: private data of target and matches in contiguous area * nf_tables: validate hooks for compat match/target * nf_tables: nft_compat: release cached matches/targets * nf_tables: x_tables support as a compile time option * nf_tables: fix alias for xtables over nftables module * nf_tables: add packet and byte counters per chain * nf_tables: fix per-chain counter stats if no counters are passed * nf_tables: don't bump chain stats * nf_tables: add protocol and flags for xtables over nf_tables * nf_tables: add ip[6]t_entry emulation * nf_tables: move specific layer 3 compat code to nf_tables_ipv[4|6] * nf_tables: support 32bits-64bits x_tables compat * nf_tables: fix compilation if CONFIG_COMPAT is disabled From Patrick McHardy: * nf_tables: move policy to struct nft_base_chain * nf_tables: send notifications for base chain policy changes From Alexander Primak: * nf_tables: remove the duplicate NF_INET_LOCAL_OUT From Nicolas Dichtel: * nf_tables: fix compilation when nf-netlink is a module Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 9 + net/netfilter/Makefile | 1 + net/netfilter/nf_tables_api.c | 220 ++++++++++-- net/netfilter/nf_tables_core.c | 46 ++- net/netfilter/nft_cmp.c | 3 +- net/netfilter/nft_compat.c | 768 +++++++++++++++++++++++++++++++++++++++++ net/netfilter/nft_immediate.c | 12 +- net/netfilter/nft_payload.c | 4 +- 8 files changed, 1008 insertions(+), 55 deletions(-) create mode 100644 net/netfilter/nft_compat.c (limited to 'net/netfilter') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index aa184a46bbf3..49e362707379 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -450,6 +450,15 @@ config NFT_LIMIT depends on NF_TABLES tristate "Netfilter nf_tables limit module" +config NFT_COMPAT + depends on NF_TABLES + depends on NETFILTER_XTABLES + tristate "Netfilter x_tables over nf_tables module" + help + This is required if you intend to use any of existing + x_tables match/target extensions over the nf_tables + framework. + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index b6b78754e4cc..a6781450b6fb 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -70,6 +70,7 @@ nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o obj-$(CONFIG_NF_TABLES) += nf_tables.o +obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o obj-$(CONFIG_NFT_CT) += nft_ct.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9c2d8d5af843..61e017b349cb 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -438,7 +438,9 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { [NFTA_CHAIN_NAME] = { .type = NLA_STRING, .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED }, + [NFTA_CHAIN_POLICY] = { .type = NLA_U32 }, [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING }, + [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED }, }; static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { @@ -446,6 +448,33 @@ static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { [NFTA_HOOK_PRIORITY] = { .type = NLA_U32 }, }; +static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) +{ + struct nft_stats *cpu_stats, total; + struct nlattr *nest; + int cpu; + + memset(&total, 0, sizeof(total)); + for_each_possible_cpu(cpu) { + cpu_stats = per_cpu_ptr(stats, cpu); + total.pkts += cpu_stats->pkts; + total.bytes += cpu_stats->bytes; + } + nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS); + if (nest == NULL) + goto nla_put_failure; + + if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts)) || + nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes))) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -ENOSPC; +} + static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 flags, int family, const struct nft_table *table, @@ -472,8 +501,11 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, goto nla_put_failure; if (chain->flags & NFT_BASE_CHAIN) { - const struct nf_hook_ops *ops = &nft_base_chain(chain)->ops; - struct nlattr *nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); + const struct nft_base_chain *basechain = nft_base_chain(chain); + const struct nf_hook_ops *ops = &basechain->ops; + struct nlattr *nest; + + nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); if (nest == NULL) goto nla_put_failure; if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum))) @@ -482,11 +514,21 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq, goto nla_put_failure; nla_nest_end(skb, nest); + if (nla_put_be32(skb, NFTA_CHAIN_POLICY, + htonl(basechain->policy))) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_CHAIN_TYPE, chain_type[ops->pf][nft_base_chain(chain)->type]->name)) goto nla_put_failure; + + if (nft_dump_stats(skb, nft_base_chain(chain)->stats)) + goto nla_put_failure; } + if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use))) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -617,6 +659,67 @@ err: return err; } +static int +nf_tables_chain_policy(struct nft_base_chain *chain, const struct nlattr *attr) +{ + switch (ntohl(nla_get_be32(attr))) { + case NF_DROP: + chain->policy = NF_DROP; + break; + case NF_ACCEPT: + chain->policy = NF_ACCEPT; + break; + default: + return -EINVAL; + } + return 0; +} + +static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { + [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, + [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, +}; + +static int +nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr) +{ + struct nlattr *tb[NFTA_COUNTER_MAX+1]; + struct nft_stats __percpu *newstats; + struct nft_stats *stats; + int err; + + err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy); + if (err < 0) + return err; + + if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS]) + return -EINVAL; + + newstats = alloc_percpu(struct nft_stats); + if (newstats == NULL) + return -ENOMEM; + + /* Restore old counters on this cpu, no problem. Per-cpu statistics + * are not exposed to userspace. + */ + stats = this_cpu_ptr(newstats); + stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); + stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + + if (chain->stats) { + /* nfnl_lock is held, add some nfnl function for this, later */ + struct nft_stats __percpu *oldstats = + rcu_dereference_protected(chain->stats, 1); + + rcu_assign_pointer(chain->stats, newstats); + synchronize_rcu(); + free_percpu(oldstats); + } else + rcu_assign_pointer(chain->stats, newstats); + + return 0; +} + static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -626,7 +729,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, const struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; - struct nft_base_chain *basechain; + struct nft_base_chain *basechain = NULL; struct nlattr *ha[NFTA_HOOK_MAX + 1]; int family = nfmsg->nfgen_family; u64 handle = 0; @@ -673,6 +776,26 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]))) return -EEXIST; + if (nla[NFTA_CHAIN_POLICY]) { + if (!(chain->flags & NFT_BASE_CHAIN)) + return -EOPNOTSUPP; + + err = nf_tables_chain_policy(nft_base_chain(chain), + nla[NFTA_CHAIN_POLICY]); + if (err < 0) + return err; + } + + if (nla[NFTA_CHAIN_COUNTERS]) { + if (!(chain->flags & NFT_BASE_CHAIN)) + return -EOPNOTSUPP; + + err = nf_tables_counters(nft_base_chain(chain), + nla[NFTA_CHAIN_COUNTERS]); + if (err < 0) + return err; + } + if (nla[NFTA_CHAIN_HANDLE] && name) nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); @@ -727,6 +850,36 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, ops->hook = afi->hooks[ops->hooknum]; chain->flags |= NFT_BASE_CHAIN; + + if (nla[NFTA_CHAIN_POLICY]) { + err = nf_tables_chain_policy(basechain, + nla[NFTA_CHAIN_POLICY]); + if (err < 0) { + free_percpu(basechain->stats); + kfree(basechain); + return err; + } + } else + basechain->policy = NF_ACCEPT; + + if (nla[NFTA_CHAIN_COUNTERS]) { + err = nf_tables_counters(basechain, + nla[NFTA_CHAIN_COUNTERS]); + if (err < 0) { + free_percpu(basechain->stats); + kfree(basechain); + return err; + } + } else { + struct nft_stats __percpu *newstats; + + newstats = alloc_percpu(struct nft_stats); + if (newstats == NULL) + return -ENOMEM; + + rcu_assign_pointer(nft_base_chain(chain)->stats, + newstats); + } } else { chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (chain == NULL) @@ -739,6 +892,15 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, list_add_tail(&chain->list, &table->chains); table->use++; + + if (chain->flags & NFT_BASE_CHAIN) { + err = nf_register_hook(&nft_base_chain(chain)->ops); + if (err < 0) { + free_percpu(basechain->stats); + kfree(basechain); + return err; + } + } notify: nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN, family); @@ -751,9 +913,10 @@ static void nf_tables_rcu_chain_destroy(struct rcu_head *head) BUG_ON(chain->use > 0); - if (chain->flags & NFT_BASE_CHAIN) + if (chain->flags & NFT_BASE_CHAIN) { + free_percpu(nft_base_chain(chain)->stats); kfree(nft_base_chain(chain)); - else + } else kfree(chain); } @@ -801,13 +964,15 @@ static void nft_ctx_init(struct nft_ctx *ctx, const struct nlmsghdr *nlh, const struct nft_af_info *afi, const struct nft_table *table, - const struct nft_chain *chain) + const struct nft_chain *chain, + const struct nlattr * const *nla) { ctx->skb = skb; ctx->nlh = nlh; ctx->afi = afi; ctx->table = table; ctx->chain = chain; + ctx->nla = nla; } /* @@ -910,7 +1075,8 @@ struct nft_expr_info { struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; }; -static int nf_tables_expr_parse(const struct nlattr *nla, +static int nf_tables_expr_parse(const struct nft_ctx *ctx, + const struct nlattr *nla, struct nft_expr_info *info) { const struct nft_expr_type *type; @@ -935,7 +1101,8 @@ static int nf_tables_expr_parse(const struct nlattr *nla, memset(info->tb, 0, sizeof(info->tb[0]) * (type->maxattr + 1)); if (type->select_ops != NULL) { - ops = type->select_ops((const struct nlattr * const *)info->tb); + ops = type->select_ops(ctx, + (const struct nlattr * const *)info->tb); if (IS_ERR(ops)) { err = PTR_ERR(ops); goto err1; @@ -1012,6 +1179,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, + [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, }; static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, @@ -1269,6 +1437,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, handle = nf_tables_alloc_handle(table); } + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + n = 0; size = 0; if (nla[NFTA_RULE_EXPRESSIONS]) { @@ -1278,7 +1448,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, goto err1; if (n == NFT_RULE_MAXEXPRS) goto err1; - err = nf_tables_expr_parse(tmp, &info[n]); + err = nf_tables_expr_parse(&ctx, tmp, &info[n]); if (err < 0) goto err1; size += info[n].ops->size; @@ -1294,7 +1464,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, rule->handle = handle; rule->dlen = size; - nft_ctx_init(&ctx, skb, nlh, afi, table, chain); expr = nft_expr_first(rule); for (i = 0; i < n; i++) { err = nf_tables_newexpr(&ctx, &info[i], expr); @@ -1304,13 +1473,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, expr = nft_expr_next(expr); } - /* Register hook when first rule is inserted into a base chain */ - if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) { - err = nf_register_hook(&nft_base_chain(chain)->ops); - if (err < 0) - goto err2; - } - if (nlh->nlmsg_flags & NLM_F_REPLACE) { list_replace_rcu(&old_rule->list, &rule->list); nf_tables_rule_destroy(old_rule); @@ -1379,10 +1541,6 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, } } - /* Unregister hook when last rule from base chain is deleted */ - if (list_empty(&chain->rules) && chain->flags & NFT_BASE_CHAIN) - nf_unregister_hook(&nft_base_chain(chain)->ops); - return 0; } @@ -1470,7 +1628,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, return PTR_ERR(table); } - nft_ctx_init(ctx, skb, nlh, afi, table, NULL); + nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); return 0; } @@ -1799,7 +1957,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(table)) return PTR_ERR(table); - nft_ctx_init(&ctx, skb, nlh, afi, table, NULL); + nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) { @@ -1987,7 +2145,7 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, if (IS_ERR(table)) return PTR_ERR(table); - nft_ctx_init(ctx, skb, nlh, afi, table, NULL); + nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); return 0; } @@ -2435,23 +2593,27 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, { const struct nft_rule *rule; const struct nft_expr *expr, *last; - const struct nft_data *data; const struct nft_set *set; struct nft_set_binding *binding; struct nft_set_iter iter; - int err; if (ctx->chain == chain) return -ELOOP; list_for_each_entry(rule, &chain->rules, list) { nft_rule_for_each_expr(expr, last, rule) { - if (!expr->ops->get_verdict) + const struct nft_data *data = NULL; + int err; + + if (!expr->ops->validate) continue; - data = expr->ops->get_verdict(expr); + err = expr->ops->validate(ctx, expr, &data); + if (err < 0) + return err; + if (data == NULL) - break; + continue; switch (data->verdict) { case NFT_JUMP: diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 9aede59ed2d7..e51a45c12128 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -60,27 +60,34 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, return true; } -unsigned int nft_do_chain(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +struct nft_jumpstack { + const struct nft_chain *chain; + const struct nft_rule *rule; +}; + +static inline void +nft_chain_stats(const struct nft_chain *this, const struct nft_pktinfo *pkt, + struct nft_jumpstack *jumpstack, unsigned int stackptr) +{ + struct nft_stats __percpu *stats; + const struct nft_chain *chain = stackptr ? jumpstack[0].chain : this; + + rcu_read_lock_bh(); + stats = rcu_dereference(nft_base_chain(chain)->stats); + __this_cpu_inc(stats->pkts); + __this_cpu_add(stats->bytes, pkt->skb->len); + rcu_read_unlock_bh(); +} + +unsigned int +nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { const struct nft_chain *chain = ops->priv; const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_data data[NFT_REG_MAX + 1]; - const struct nft_pktinfo pkt = { - .skb = skb, - .in = in, - .out = out, - .hooknum = ops->hooknum, - }; unsigned int stackptr = 0; - struct { - const struct nft_chain *chain; - const struct nft_rule *rule; - } jumpstack[NFT_JUMP_STACK_SIZE]; + struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; do_chain: rule = list_entry(&chain->rules, struct nft_rule, list); @@ -91,8 +98,8 @@ next_rule: if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, data); else if (expr->ops != &nft_payload_fast_ops || - !nft_payload_fast_eval(expr, data, &pkt)) - expr->ops->eval(expr, data, &pkt); + !nft_payload_fast_eval(expr, data, pkt)) + expr->ops->eval(expr, data, pkt); if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE) break; @@ -135,10 +142,11 @@ next_rule: rule = jumpstack[stackptr].rule; goto next_rule; } + nft_chain_stats(chain, pkt, jumpstack, stackptr); - return NF_ACCEPT; + return nft_base_chain(chain)->policy; } -EXPORT_SYMBOL_GPL(nft_do_chain); +EXPORT_SYMBOL_GPL(nft_do_chain_pktinfo); int __init nf_tables_core_module_init(void) { diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 37134f3e84fb..954925db414d 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -162,7 +162,8 @@ const struct nft_expr_ops nft_cmp_fast_ops = { .dump = nft_cmp_fast_dump, }; -static const struct nft_expr_ops *nft_cmp_select_ops(const struct nlattr * const tb[]) +static const struct nft_expr_ops * +nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_data_desc desc; struct nft_data data; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c new file mode 100644 index 000000000000..4811f762e060 --- /dev/null +++ b/net/netfilter/nft_compat.c @@ -0,0 +1,768 @@ +/* + * (C) 2012-2013 by Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This software has been sponsored by Sophos Astaro + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for set_fs */ +#include + +union nft_entry { + struct ipt_entry e4; + struct ip6t_entry e6; +}; + +static inline void +nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info) +{ + par->target = xt; + par->targinfo = xt_info; + par->hotdrop = false; +} + +static void nft_target_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + void *info = nft_expr_priv(expr); + struct xt_target *target = expr->ops->data; + struct sk_buff *skb = pkt->skb; + int ret; + + nft_compat_set_par((struct xt_action_param *)&pkt->xt, target, info); + + ret = target->target(skb, &pkt->xt); + + if (pkt->xt.hotdrop) + ret = NF_DROP; + + switch(ret) { + case XT_CONTINUE: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + break; + default: + data[NFT_REG_VERDICT].verdict = ret; + break; + } + return; +} + +static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = { + [NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING }, + [NFTA_TARGET_REV] = { .type = NLA_U32 }, + [NFTA_TARGET_INFO] = { .type = NLA_BINARY }, +}; + +static void +nft_target_set_tgchk_param(struct xt_tgchk_param *par, + const struct nft_ctx *ctx, + struct xt_target *target, void *info, + union nft_entry *entry, u8 proto, bool inv) +{ + par->net = &init_net; + par->table = ctx->table->name; + switch (ctx->afi->family) { + case AF_INET: + entry->e4.ip.proto = proto; + entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; + break; + case AF_INET6: + entry->e6.ipv6.proto = proto; + entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; + break; + } + par->entryinfo = entry; + par->target = target; + par->targinfo = info; + if (ctx->chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); + const struct nf_hook_ops *ops = &basechain->ops; + + par->hook_mask = 1 << ops->hooknum; + } + par->family = ctx->afi->family; +} + +static void target_compat_from_user(struct xt_target *t, void *in, void *out) +{ +#ifdef CONFIG_COMPAT + if (t->compat_from_user) { + int pad; + + t->compat_from_user(out, in); + pad = XT_ALIGN(t->targetsize) - t->targetsize; + if (pad > 0) + memset(out + t->targetsize, 0, pad); + } else +#endif + memcpy(out, in, XT_ALIGN(t->targetsize)); +} + +static inline int nft_compat_target_offset(struct xt_target *target) +{ +#ifdef CONFIG_COMPAT + return xt_compat_target_offset(target); +#else + return 0; +#endif +} + +static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] = { + [NFTA_RULE_COMPAT_PROTO] = { .type = NLA_U32 }, + [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 }, +}; + +static u8 nft_parse_compat(const struct nlattr *attr, bool *inv) +{ + struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; + u32 flags; + int err; + + err = nla_parse_nested(tb, NFTA_RULE_COMPAT_MAX, attr, + nft_rule_compat_policy); + if (err < 0) + return err; + + if (!tb[NFTA_RULE_COMPAT_PROTO] || !tb[NFTA_RULE_COMPAT_FLAGS]) + return -EINVAL; + + flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS])); + if (flags & ~NFT_RULE_COMPAT_F_MASK) + return -EINVAL; + if (flags & NFT_RULE_COMPAT_F_INV) + *inv = true; + + return ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); +} + +static int +nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + void *info = nft_expr_priv(expr); + struct xt_target *target = expr->ops->data; + struct xt_tgchk_param par; + size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO])); + u8 proto = 0; + bool inv = false; + union nft_entry e = {}; + int ret; + + target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info); + + if (ctx->nla[NFTA_RULE_COMPAT]) + proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv); + + nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); + + ret = xt_check_target(&par, size, proto, inv); + if (ret < 0) + goto err; + + /* The standard target cannot be used */ + if (target->target == NULL) { + ret = -EINVAL; + goto err; + } + + return 0; +err: + module_put(target->me); + return ret; +} + +static void +nft_target_destroy(const struct nft_expr *expr) +{ + struct xt_target *target = expr->ops->data; + + module_put(target->me); +} + +static int +target_dump_info(struct sk_buff *skb, const struct xt_target *t, const void *in) +{ + int ret; + +#ifdef CONFIG_COMPAT + if (t->compat_to_user) { + mm_segment_t old_fs; + void *out; + + out = kmalloc(XT_ALIGN(t->targetsize), GFP_ATOMIC); + if (out == NULL) + return -ENOMEM; + + /* We want to reuse existing compat_to_user */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + t->compat_to_user(out, in); + set_fs(old_fs); + ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), out); + kfree(out); + } else +#endif + ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), in); + + return ret; +} + +static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct xt_target *target = expr->ops->data; + void *info = nft_expr_priv(expr); + + if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) || + nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) || + target_dump_info(skb, target, info)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static int nft_target_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + struct xt_target *target = expr->ops->data; + unsigned int hook_mask = 0; + + if (ctx->chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); + const struct nf_hook_ops *ops = &basechain->ops; + + hook_mask = 1 << ops->hooknum; + if (hook_mask & target->hooks) + return 0; + + /* This target is being called from an invalid chain */ + return -EINVAL; + } + return 0; +} + +static void nft_match_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + void *info = nft_expr_priv(expr); + struct xt_match *match = expr->ops->data; + struct sk_buff *skb = pkt->skb; + bool ret; + + nft_compat_set_par((struct xt_action_param *)&pkt->xt, match, info); + + ret = match->match(skb, (struct xt_action_param *)&pkt->xt); + + if (pkt->xt.hotdrop) { + data[NFT_REG_VERDICT].verdict = NF_DROP; + return; + } + + switch(ret) { + case true: + data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + break; + case false: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; + break; + } +} + +static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { + [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING }, + [NFTA_MATCH_REV] = { .type = NLA_U32 }, + [NFTA_MATCH_INFO] = { .type = NLA_BINARY }, +}; + +/* struct xt_mtchk_param and xt_tgchk_param look very similar */ +static void +nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, + struct xt_match *match, void *info, + union nft_entry *entry, u8 proto, bool inv) +{ + par->net = &init_net; + par->table = ctx->table->name; + switch (ctx->afi->family) { + case AF_INET: + entry->e4.ip.proto = proto; + entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; + break; + case AF_INET6: + entry->e6.ipv6.proto = proto; + entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; + break; + } + par->entryinfo = entry; + par->match = match; + par->matchinfo = info; + if (ctx->chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); + const struct nf_hook_ops *ops = &basechain->ops; + + par->hook_mask = 1 << ops->hooknum; + } + par->family = ctx->afi->family; +} + +static void match_compat_from_user(struct xt_match *m, void *in, void *out) +{ +#ifdef CONFIG_COMPAT + if (m->compat_from_user) { + int pad; + + m->compat_from_user(out, in); + pad = XT_ALIGN(m->matchsize) - m->matchsize; + if (pad > 0) + memset(out + m->matchsize, 0, pad); + } else +#endif + memcpy(out, in, XT_ALIGN(m->matchsize)); +} + +static int +nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + void *info = nft_expr_priv(expr); + struct xt_match *match = expr->ops->data; + struct xt_mtchk_param par; + size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); + u8 proto = 0; + bool inv = false; + union nft_entry e = {}; + int ret; + + match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info); + + if (ctx->nla[NFTA_RULE_COMPAT]) + proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv); + + nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); + + ret = xt_check_match(&par, size, proto, inv); + if (ret < 0) + goto err; + + return 0; +err: + module_put(match->me); + return ret; +} + +static void +nft_match_destroy(const struct nft_expr *expr) +{ + struct xt_match *match = expr->ops->data; + + module_put(match->me); +} + +static int +match_dump_info(struct sk_buff *skb, const struct xt_match *m, const void *in) +{ + int ret; + +#ifdef CONFIG_COMPAT + if (m->compat_to_user) { + mm_segment_t old_fs; + void *out; + + out = kmalloc(XT_ALIGN(m->matchsize), GFP_ATOMIC); + if (out == NULL) + return -ENOMEM; + + /* We want to reuse existing compat_to_user */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + m->compat_to_user(out, in); + set_fs(old_fs); + ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), out); + kfree(out); + } else +#endif + ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), in); + + return ret; +} + +static inline int nft_compat_match_offset(struct xt_match *match) +{ +#ifdef CONFIG_COMPAT + return xt_compat_match_offset(match); +#else + return 0; +#endif +} + +static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + void *info = nft_expr_priv(expr); + struct xt_match *match = expr->ops->data; + + if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) || + nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) || + match_dump_info(skb, match, info)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static int nft_match_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + struct xt_match *match = expr->ops->data; + unsigned int hook_mask = 0; + + if (ctx->chain->flags & NFT_BASE_CHAIN) { + const struct nft_base_chain *basechain = + nft_base_chain(ctx->chain); + const struct nf_hook_ops *ops = &basechain->ops; + + hook_mask = 1 << ops->hooknum; + if (hook_mask & match->hooks) + return 0; + + /* This match is being called from an invalid chain */ + return -EINVAL; + } + return 0; +} + +static int +nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, + int event, u16 family, const char *name, + int rev, int target) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = portid ? NLM_F_MULTI : 0; + + event |= NFNL_SUBSYS_NFT_COMPAT << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFTA_COMPAT_NAME, name) || + nla_put_be32(skb, NFTA_COMPAT_REV, htonl(rev)) || + nla_put_be32(skb, NFTA_COMPAT_TYPE, htonl(target))) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return skb->len; + +nlmsg_failure: +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int +nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + int ret = 0, target; + struct nfgenmsg *nfmsg; + const char *fmt; + const char *name; + u32 rev; + struct sk_buff *skb2; + + if (tb[NFTA_COMPAT_NAME] == NULL || + tb[NFTA_COMPAT_REV] == NULL || + tb[NFTA_COMPAT_TYPE] == NULL) + return -EINVAL; + + name = nla_data(tb[NFTA_COMPAT_NAME]); + rev = ntohl(nla_get_be32(tb[NFTA_COMPAT_REV])); + target = ntohl(nla_get_be32(tb[NFTA_COMPAT_TYPE])); + + nfmsg = nlmsg_data(nlh); + + switch(nfmsg->nfgen_family) { + case AF_INET: + fmt = "ipt_%s"; + break; + case AF_INET6: + fmt = "ip6t_%s"; + break; + default: + pr_err("nft_compat: unsupported protocol %d\n", + nfmsg->nfgen_family); + return -EINVAL; + } + + try_then_request_module(xt_find_revision(nfmsg->nfgen_family, name, + rev, target, &ret), + fmt, name); + + if (ret < 0) + return ret; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + /* include the best revision for this extension in the message */ + if (nfnl_compat_fill_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_COMPAT_GET, + nfmsg->nfgen_family, + name, ret, target) <= 0) { + kfree_skb(skb2); + return -ENOSPC; + } + + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; + + return ret == -EAGAIN ? -ENOBUFS : ret; +} + +static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = { + [NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING, + .len = NFT_COMPAT_NAME_MAX-1 }, + [NFTA_COMPAT_REV] = { .type = NLA_U32 }, + [NFTA_COMPAT_TYPE] = { .type = NLA_U32 }, +}; + +static const struct nfnl_callback nfnl_nft_compat_cb[NFNL_MSG_COMPAT_MAX] = { + [NFNL_MSG_COMPAT_GET] = { .call = nfnl_compat_get, + .attr_count = NFTA_COMPAT_MAX, + .policy = nfnl_compat_policy_get }, +}; + +static const struct nfnetlink_subsystem nfnl_compat_subsys = { + .name = "nft-compat", + .subsys_id = NFNL_SUBSYS_NFT_COMPAT, + .cb_count = NFNL_MSG_COMPAT_MAX, + .cb = nfnl_nft_compat_cb, +}; + +static LIST_HEAD(nft_match_list); + +struct nft_xt { + struct list_head head; + struct nft_expr_ops ops; +}; + +static struct nft_expr_type nft_match_type; + +static const struct nft_expr_ops * +nft_match_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + struct nft_xt *nft_match; + struct xt_match *match; + char *mt_name; + __u32 rev, family; + + if (tb[NFTA_MATCH_NAME] == NULL || + tb[NFTA_MATCH_REV] == NULL || + tb[NFTA_MATCH_INFO] == NULL) + return ERR_PTR(-EINVAL); + + mt_name = nla_data(tb[NFTA_MATCH_NAME]); + rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV])); + family = ctx->afi->family; + + /* Re-use the existing match if it's already loaded. */ + list_for_each_entry(nft_match, &nft_match_list, head) { + struct xt_match *match = nft_match->ops.data; + + if (strcmp(match->name, mt_name) == 0 && + match->revision == rev && match->family == family) + return &nft_match->ops; + } + + match = xt_request_find_match(family, mt_name, rev); + if (IS_ERR(match)) + return ERR_PTR(-ENOENT); + + /* This is the first time we use this match, allocate operations */ + nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); + if (nft_match == NULL) + return ERR_PTR(-ENOMEM); + + nft_match->ops.type = &nft_match_type; + nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize) + + nft_compat_match_offset(match)); + nft_match->ops.eval = nft_match_eval; + nft_match->ops.init = nft_match_init; + nft_match->ops.destroy = nft_match_destroy; + nft_match->ops.dump = nft_match_dump; + nft_match->ops.validate = nft_match_validate; + nft_match->ops.data = match; + + list_add(&nft_match->head, &nft_match_list); + + return &nft_match->ops; +} + +static void nft_match_release(void) +{ + struct nft_xt *nft_match; + + list_for_each_entry(nft_match, &nft_match_list, head) + kfree(nft_match); +} + +static struct nft_expr_type nft_match_type __read_mostly = { + .name = "match", + .select_ops = nft_match_select_ops, + .policy = nft_match_policy, + .maxattr = NFTA_MATCH_MAX, + .owner = THIS_MODULE, +}; + +static LIST_HEAD(nft_target_list); + +static struct nft_expr_type nft_target_type; + +static const struct nft_expr_ops * +nft_target_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + struct nft_xt *nft_target; + struct xt_target *target; + char *tg_name; + __u32 rev, family; + + if (tb[NFTA_TARGET_NAME] == NULL || + tb[NFTA_TARGET_REV] == NULL || + tb[NFTA_TARGET_INFO] == NULL) + return ERR_PTR(-EINVAL); + + tg_name = nla_data(tb[NFTA_TARGET_NAME]); + rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV])); + family = ctx->afi->family; + + /* Re-use the existing target if it's already loaded. */ + list_for_each_entry(nft_target, &nft_match_list, head) { + struct xt_target *target = nft_target->ops.data; + + if (strcmp(target->name, tg_name) == 0 && + target->revision == rev && target->family == family) + return &nft_target->ops; + } + + target = xt_request_find_target(family, tg_name, rev); + if (IS_ERR(target)) + return ERR_PTR(-ENOENT); + + /* This is the first time we use this target, allocate operations */ + nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); + if (nft_target == NULL) + return ERR_PTR(-ENOMEM); + + nft_target->ops.type = &nft_target_type; + nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize) + + nft_compat_target_offset(target)); + nft_target->ops.eval = nft_target_eval; + nft_target->ops.init = nft_target_init; + nft_target->ops.destroy = nft_target_destroy; + nft_target->ops.dump = nft_target_dump; + nft_target->ops.validate = nft_target_validate; + nft_target->ops.data = target; + + list_add(&nft_target->head, &nft_target_list); + + return &nft_target->ops; +} + +static void nft_target_release(void) +{ + struct nft_xt *nft_target; + + list_for_each_entry(nft_target, &nft_target_list, head) + kfree(nft_target); +} + +static struct nft_expr_type nft_target_type __read_mostly = { + .name = "target", + .select_ops = nft_target_select_ops, + .policy = nft_target_policy, + .maxattr = NFTA_TARGET_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_compat_module_init(void) +{ + int ret; + + ret = nft_register_expr(&nft_match_type); + if (ret < 0) + return ret; + + ret = nft_register_expr(&nft_target_type); + if (ret < 0) + goto err_match; + + ret = nfnetlink_subsys_register(&nfnl_compat_subsys); + if (ret < 0) { + pr_err("nft_compat: cannot register with nfnetlink.\n"); + goto err_target; + } + + pr_info("nf_tables_compat: (c) 2012 Pablo Neira Ayuso \n"); + + return ret; + +err_target: + nft_unregister_expr(&nft_target_type); +err_match: + nft_unregister_expr(&nft_match_type); + return ret; +} + +static void __exit nft_compat_module_exit(void) +{ + nfnetlink_subsys_unregister(&nfnl_compat_subsys); + nft_unregister_expr(&nft_target_type); + nft_unregister_expr(&nft_match_type); + nft_match_release(); + nft_target_release(); +} + +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT); + +module_init(nft_compat_module_init); +module_exit(nft_compat_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NFT_EXPR("match"); +MODULE_ALIAS_NFT_EXPR("target"); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 1bfeeaf865b6..f169501f1ad4 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -90,14 +90,16 @@ nla_put_failure: return -1; } -static const struct nft_data *nft_immediate_get_verdict(const struct nft_expr *expr) +static int nft_immediate_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); if (priv->dreg == NFT_REG_VERDICT) - return &priv->data; - else - return NULL; + *data = &priv->data; + + return 0; } static struct nft_expr_type nft_imm_type; @@ -108,7 +110,7 @@ static const struct nft_expr_ops nft_imm_ops = { .init = nft_immediate_init, .destroy = nft_immediate_destroy, .dump = nft_immediate_dump, - .get_verdict = nft_immediate_get_verdict, + .validate = nft_immediate_validate, }; static struct nft_expr_type nft_imm_type __read_mostly = { diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 7cf13f7e1e94..bc8bdb2c1ba7 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -107,7 +107,9 @@ const struct nft_expr_ops nft_payload_fast_ops = { .dump = nft_payload_dump, }; -static const struct nft_expr_ops *nft_payload_select_ops(const struct nlattr * const tb[]) +static const struct nft_expr_ops * +nft_payload_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) { enum nft_payload_bases base; unsigned int offset, len; -- cgit v1.2.3 From c54032e05bfcbb261f47aaadf8476e864e8712f4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 11 Oct 2013 10:00:22 +0200 Subject: netfilter: nf_tables: nft_payload: fix transport header base We cannot use skb->transport_header since it's unset, use pkt->xt.thoff instead. Now possible using information made available through the x_tables compatibility layer. Reported-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 2 +- net/netfilter/nft_payload.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index e51a45c12128..3c13007d80df 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -44,7 +44,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) ptr = skb_network_header(skb); else - ptr = skb_transport_header(skb); + ptr = skb_network_header(skb) + pkt->xt.thoff; ptr += priv->offset; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index bc8bdb2c1ba7..a2aeb318678f 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -36,7 +36,7 @@ static void nft_payload_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: - offset = skb_transport_offset(skb); + offset = pkt->xt.thoff; break; default: BUG(); -- cgit v1.2.3 From 9ddf63235749a9efa1fad2eeb74be2ee9b580f8d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 10 Oct 2013 13:26:33 +0200 Subject: netfilter: nf_tables: add support for dormant tables This patch allows you to temporarily disable an entire table. You can change the state of a dormant table via NFT_MSG_NEWTABLE messages. Using this operation you can wake up a table, so their chains are registered. This provides atomicity at chain level. Thus, the rule-set of one chain is applied at once, avoiding any possible intermediate state in every chain. Still, the chains that belongs to a table are registered consecutively. This also allows you to have inactive tables in the kernel. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 97 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 90 insertions(+), 7 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 61e017b349cb..a4dd7ce5ec3e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -158,6 +158,7 @@ static int nf_tables_chain_type_lookup(const struct nft_af_info *afi, static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING }, + [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, }; static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, @@ -177,7 +178,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; - if (nla_put_string(skb, NFTA_TABLE_NAME, table->name)) + if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || + nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags))) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -301,6 +303,74 @@ err: return err; } +static int nf_tables_table_enable(struct nft_table *table) +{ + struct nft_chain *chain; + int err, i = 0; + + list_for_each_entry(chain, &table->chains, list) { + err = nf_register_hook(&nft_base_chain(chain)->ops); + if (err < 0) + goto err; + + i++; + } + return 0; +err: + list_for_each_entry(chain, &table->chains, list) { + if (i-- <= 0) + break; + + nf_unregister_hook(&nft_base_chain(chain)->ops); + } + return err; +} + +static int nf_tables_table_disable(struct nft_table *table) +{ + struct nft_chain *chain; + + list_for_each_entry(chain, &table->chains, list) + nf_unregister_hook(&nft_base_chain(chain)->ops); + + return 0; +} + +static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nla[], + struct nft_af_info *afi, struct nft_table *table) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + int family = nfmsg->nfgen_family, ret = 0; + + if (nla[NFTA_TABLE_FLAGS]) { + __be32 flags; + + flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); + if (flags & ~NFT_TABLE_F_DORMANT) + return -EINVAL; + + if ((flags & NFT_TABLE_F_DORMANT) && + !(table->flags & NFT_TABLE_F_DORMANT)) { + ret = nf_tables_table_disable(table); + if (ret >= 0) + table->flags |= NFT_TABLE_F_DORMANT; + } else if (!(flags & NFT_TABLE_F_DORMANT) && + table->flags & NFT_TABLE_F_DORMANT) { + ret = nf_tables_table_enable(table); + if (ret >= 0) + table->flags &= ~NFT_TABLE_F_DORMANT; + } + if (ret < 0) + goto err; + } + + nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); +err: + return ret; +} + static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -328,7 +398,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - return 0; + return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table); } table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL); @@ -339,6 +409,18 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); + if (nla[NFTA_TABLE_FLAGS]) { + __be32 flags; + + flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); + if (flags & ~NFT_TABLE_F_DORMANT) { + kfree(table); + return -EINVAL; + } + + table->flags |= flags; + } + list_add_tail(&table->list, &afi->tables); nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); return 0; @@ -890,10 +972,8 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, chain->handle = nf_tables_alloc_handle(table); nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); - list_add_tail(&chain->list, &table->chains); - table->use++; - - if (chain->flags & NFT_BASE_CHAIN) { + if (!(table->flags & NFT_TABLE_F_DORMANT) && + chain->flags & NFT_BASE_CHAIN) { err = nf_register_hook(&nft_base_chain(chain)->ops); if (err < 0) { free_percpu(basechain->stats); @@ -901,6 +981,8 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, return err; } } + list_add_tail(&chain->list, &table->chains); + table->use++; notify: nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN, family); @@ -948,7 +1030,8 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, list_del(&chain->list); table->use--; - if (chain->flags & NFT_BASE_CHAIN) + if (!(table->flags & NFT_TABLE_F_DORMANT) && + chain->flags & NFT_BASE_CHAIN) nf_unregister_hook(&nft_base_chain(chain)->ops); nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN, -- cgit v1.2.3 From eb31628e37a0a4e01fffd79dcc7f815d2357f53a Mon Sep 17 00:00:00 2001 From: Tomasz Bursztyka Date: Thu, 10 Oct 2013 13:39:19 +0200 Subject: netfilter: nf_tables: Add support for IPv6 NAT This patch generalizes the NAT expression to support both IPv4 and IPv6 using the existing IPv4/IPv6 NAT infrastructure. This also adds the NAT chain type for IPv6. This patch collapses the following patches that were posted to the netfilter-devel mailing list, from Tomasz: * nf_tables: Change NFTA_NAT_ attributes to better semantic significance * nf_tables: Split IPv4 NAT into NAT expression and IPv4 NAT chain * nf_tables: Add support for IPv6 NAT expression * nf_tables: Add support for IPv6 NAT chain * nf_tables: Fix up build issue on IPv6 NAT support And, from Pablo Neira Ayuso: * fix missing dependencies in nft_chain_nat Signed-off-by: Tomasz Bursztyka Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 6 ++ net/netfilter/Makefile | 1 + net/netfilter/nft_nat.c | 220 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 227 insertions(+) create mode 100644 net/netfilter/nft_nat.c (limited to 'net/netfilter') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 49e362707379..48acec17e27a 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -450,6 +450,12 @@ config NFT_LIMIT depends on NF_TABLES tristate "Netfilter nf_tables limit module" +config NFT_NAT + depends on NF_TABLES + depends on NF_CONNTRACK + depends on NF_NAT + tristate "Netfilter nf_tables nat module" + config NFT_COMPAT depends on NF_TABLES depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index a6781450b6fb..394483b2c193 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -75,6 +75,7 @@ obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o +obj-$(CONFIG_NFT_NAT) += nft_nat.o #nf_tables-objs += nft_meta_target.o obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o obj-$(CONFIG_NFT_HASH) += nft_hash.o diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c new file mode 100644 index 000000000000..b0b87b2d2411 --- /dev/null +++ b/net/netfilter/nft_nat.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy + * Copyright (c) 2012 Pablo Neira Ayuso + * Copyright (c) 2012 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_nat { + enum nft_registers sreg_addr_min:8; + enum nft_registers sreg_addr_max:8; + enum nft_registers sreg_proto_min:8; + enum nft_registers sreg_proto_max:8; + int family; + enum nf_nat_manip_type type; +}; + +static void nft_nat_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo); + struct nf_nat_range range; + + memset(&range, 0, sizeof(range)); + if (priv->sreg_addr_min) { + if (priv->family == AF_INET) { + range.min_addr.ip = data[priv->sreg_addr_min].data[0]; + range.max_addr.ip = data[priv->sreg_addr_max].data[0]; + + } else { + memcpy(range.min_addr.ip6, + data[priv->sreg_addr_min].data, + sizeof(struct nft_data)); + memcpy(range.max_addr.ip6, + data[priv->sreg_addr_max].data, + sizeof(struct nft_data)); + } + range.flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (priv->sreg_proto_min) { + range.min_proto.all = data[priv->sreg_proto_min].data[0]; + range.max_proto.all = data[priv->sreg_proto_max].data[0]; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + data[NFT_REG_VERDICT].verdict = + nf_nat_setup_info(ct, &range, priv->type); +} + +static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { + [NFTA_NAT_TYPE] = { .type = NLA_U32 }, + [NFTA_NAT_FAMILY] = { .type = NLA_U32 }, + [NFTA_NAT_REG_ADDR_MIN] = { .type = NLA_U32 }, + [NFTA_NAT_REG_ADDR_MAX] = { .type = NLA_U32 }, + [NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 }, + [NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 }, +}; + +static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_nat *priv = nft_expr_priv(expr); + int err; + + if (tb[NFTA_NAT_TYPE] == NULL) + return -EINVAL; + + switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) { + case NFT_NAT_SNAT: + priv->type = NF_NAT_MANIP_SRC; + break; + case NFT_NAT_DNAT: + priv->type = NF_NAT_MANIP_DST; + break; + default: + return -EINVAL; + } + + if (tb[NFTA_NAT_FAMILY] == NULL) + return -EINVAL; + + priv->family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); + if (priv->family != AF_INET && priv->family != AF_INET6) + return -EINVAL; + + if (tb[NFTA_NAT_REG_ADDR_MIN]) { + priv->sreg_addr_min = ntohl(nla_get_be32( + tb[NFTA_NAT_REG_ADDR_MIN])); + err = nft_validate_input_register(priv->sreg_addr_min); + if (err < 0) + return err; + } + + if (tb[NFTA_NAT_REG_ADDR_MAX]) { + priv->sreg_addr_max = ntohl(nla_get_be32( + tb[NFTA_NAT_REG_ADDR_MAX])); + err = nft_validate_input_register(priv->sreg_addr_max); + if (err < 0) + return err; + } else + priv->sreg_addr_max = priv->sreg_addr_min; + + if (tb[NFTA_NAT_REG_PROTO_MIN]) { + priv->sreg_proto_min = ntohl(nla_get_be32( + tb[NFTA_NAT_REG_PROTO_MIN])); + err = nft_validate_input_register(priv->sreg_proto_min); + if (err < 0) + return err; + } + + if (tb[NFTA_NAT_REG_PROTO_MAX]) { + priv->sreg_proto_max = ntohl(nla_get_be32( + tb[NFTA_NAT_REG_PROTO_MAX])); + err = nft_validate_input_register(priv->sreg_proto_max); + if (err < 0) + return err; + } else + priv->sreg_proto_max = priv->sreg_proto_min; + + return 0; +} + +static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NF_NAT_MANIP_SRC: + if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT))) + goto nla_put_failure; + break; + case NF_NAT_MANIP_DST: + if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT))) + goto nla_put_failure; + break; + } + + if (nla_put_be32(skb, NFTA_NAT_FAMILY, htonl(priv->family))) + goto nla_put_failure; + if (nla_put_be32(skb, + NFTA_NAT_REG_ADDR_MIN, htonl(priv->sreg_addr_min))) + goto nla_put_failure; + if (nla_put_be32(skb, + NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max))) + goto nla_put_failure; + if (nla_put_be32(skb, + NFTA_NAT_REG_PROTO_MIN, htonl(priv->sreg_proto_min))) + goto nla_put_failure; + if (nla_put_be32(skb, + NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_nat_type; +static const struct nft_expr_ops nft_nat_ops = { + .type = &nft_nat_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), + .eval = nft_nat_eval, + .init = nft_nat_init, + .dump = nft_nat_dump, +}; + +static struct nft_expr_type nft_nat_type __read_mostly = { + .name = "nat", + .ops = &nft_nat_ops, + .policy = nft_nat_policy, + .maxattr = NFTA_NAT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_nat_module_init(void) +{ + int err; + + err = nft_register_expr(&nft_nat_type); + if (err < 0) + return err; + + return 0; +} + +static void __exit nft_nat_module_exit(void) +{ + nft_unregister_expr(&nft_nat_type); +} + +module_init(nft_nat_module_init); +module_exit(nft_nat_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tomasz Bursztyka "); +MODULE_ALIAS_NFT_EXPR("nat"); -- cgit v1.2.3 From 99633ab29b2131b68089a6c7f60458390860e044 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 10 Oct 2013 23:28:33 +0200 Subject: netfilter: nf_tables: complete net namespace support Register family per netnamespace to ensure that sets are only visible in its approapriate namespace. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 83 ++++++++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 28 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a4dd7ce5ec3e..e1ee85047ec1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -18,9 +18,9 @@ #include #include #include +#include #include -static LIST_HEAD(nf_tables_afinfo); static LIST_HEAD(nf_tables_expressions); /** @@ -31,11 +31,11 @@ static LIST_HEAD(nf_tables_expressions); * Register the address family for use with nf_tables. Returns zero on * success or a negative errno code otherwise. */ -int nft_register_afinfo(struct nft_af_info *afi) +int nft_register_afinfo(struct net *net, struct nft_af_info *afi) { INIT_LIST_HEAD(&afi->tables); nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_add_tail(&afi->list, &nf_tables_afinfo); + list_add_tail(&afi->list, &net->nft.af_info); nfnl_unlock(NFNL_SUBSYS_NFTABLES); return 0; } @@ -56,22 +56,23 @@ void nft_unregister_afinfo(struct nft_af_info *afi) } EXPORT_SYMBOL_GPL(nft_unregister_afinfo); -static struct nft_af_info *nft_afinfo_lookup(int family) +static struct nft_af_info *nft_afinfo_lookup(struct net *net, int family) { struct nft_af_info *afi; - list_for_each_entry(afi, &nf_tables_afinfo, list) { + list_for_each_entry(afi, &net->nft.af_info, list) { if (afi->family == family) return afi; } return NULL; } -static struct nft_af_info *nf_tables_afinfo_lookup(int family, bool autoload) +static struct nft_af_info * +nf_tables_afinfo_lookup(struct net *net, int family, bool autoload) { struct nft_af_info *afi; - afi = nft_afinfo_lookup(family); + afi = nft_afinfo_lookup(net, family); if (afi != NULL) return afi; #ifdef CONFIG_MODULES @@ -79,7 +80,7 @@ static struct nft_af_info *nf_tables_afinfo_lookup(int family, bool autoload) nfnl_unlock(NFNL_SUBSYS_NFTABLES); request_module("nft-afinfo-%u", family); nfnl_lock(NFNL_SUBSYS_NFTABLES); - afi = nft_afinfo_lookup(family); + afi = nft_afinfo_lookup(net, family); if (afi != NULL) return ERR_PTR(-EAGAIN); } @@ -232,9 +233,10 @@ static int nf_tables_dump_tables(struct sk_buff *skb, const struct nft_af_info *afi; const struct nft_table *table; unsigned int idx = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - list_for_each_entry(afi, &nf_tables_afinfo, list) { + list_for_each_entry(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; @@ -268,6 +270,7 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, const struct nft_af_info *afi; const struct nft_table *table; struct sk_buff *skb2; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -278,7 +281,7 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, return netlink_dump_start(nlsk, skb, nlh, &c); } - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -379,9 +382,10 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, const struct nlattr *name; struct nft_af_info *afi; struct nft_table *table; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - afi = nf_tables_afinfo_lookup(family, true); + afi = nf_tables_afinfo_lookup(net, family, true); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -433,9 +437,10 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi; struct nft_table *table; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -663,9 +668,10 @@ static int nf_tables_dump_chains(struct sk_buff *skb, const struct nft_table *table; const struct nft_chain *chain; unsigned int idx = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - list_for_each_entry(afi, &nf_tables_afinfo, list) { + list_for_each_entry(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; @@ -702,6 +708,7 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, const struct nft_table *table; const struct nft_chain *chain; struct sk_buff *skb2; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -712,7 +719,7 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, return netlink_dump_start(nlsk, skb, nlh, &c); } - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -813,6 +820,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, struct nft_chain *chain; struct nft_base_chain *basechain = NULL; struct nlattr *ha[NFTA_HOOK_MAX + 1]; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; u64 handle = 0; int err; @@ -820,7 +828,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - afi = nf_tables_afinfo_lookup(family, true); + afi = nf_tables_afinfo_lookup(net, family, true); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1010,9 +1018,10 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, const struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1050,6 +1059,7 @@ static void nft_ctx_init(struct nft_ctx *ctx, const struct nft_chain *chain, const struct nlattr * const *nla) { + ctx->net = sock_net(skb->sk); ctx->skb = skb; ctx->nlh = nlh; ctx->afi = afi; @@ -1361,9 +1371,10 @@ static int nf_tables_dump_rules(struct sk_buff *skb, const struct nft_chain *chain; const struct nft_rule *rule; unsigned int idx = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; - list_for_each_entry(afi, &nf_tables_afinfo, list) { + list_for_each_entry(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) continue; @@ -1402,6 +1413,7 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, const struct nft_chain *chain; const struct nft_rule *rule; struct sk_buff *skb2; + struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -1412,7 +1424,7 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, return netlink_dump_start(nlsk, skb, nlh, &c); } - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1477,6 +1489,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; + struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *old_rule = NULL; @@ -1490,7 +1503,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, create); + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1585,12 +1598,13 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; + struct net *net = sock_net(skb->sk); const struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *tmp; int family = nfmsg->nfgen_family; - afi = nf_tables_afinfo_lookup(family, false); + afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1697,11 +1711,12 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + struct net *net = sock_net(skb->sk); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; const struct nft_table *table = NULL; - afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, false); + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -1818,12 +1833,11 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx, { struct sk_buff *skb; u32 portid = NETLINK_CB(ctx->skb).portid; - struct net *net = sock_net(ctx->skb->sk); bool report; int err; report = nlmsg_report(ctx->nlh); - if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; @@ -1837,11 +1851,11 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx, goto err; } - err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, + err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report, GFP_KERNEL); err: if (err < 0) - nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err); return err; } @@ -1974,6 +1988,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_set_ops *ops; const struct nft_af_info *afi; + struct net *net = sock_net(skb->sk); struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; @@ -2032,7 +2047,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, create); + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -2219,8 +2234,9 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; const struct nft_table *table; + struct net *net = sock_net(skb->sk); - afi = nf_tables_afinfo_lookup(nfmsg->nfgen_family, false); + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); if (IS_ERR(afi)) return PTR_ERR(afi); @@ -3011,6 +3027,16 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, } EXPORT_SYMBOL_GPL(nft_data_dump); +static int nf_tables_init_net(struct net *net) +{ + INIT_LIST_HEAD(&net->nft.af_info); + return 0; +} + +static struct pernet_operations nf_tables_net_ops = { + .init = nf_tables_init_net, +}; + static int __init nf_tables_module_init(void) { int err; @@ -3031,7 +3057,7 @@ static int __init nf_tables_module_init(void) goto err3; pr_info("nf_tables: (c) 2007-2009 Patrick McHardy \n"); - return 0; + return register_pernet_subsys(&nf_tables_net_ops); err3: nf_tables_core_module_exit(); err2: @@ -3042,6 +3068,7 @@ err1: static void __exit nf_tables_module_exit(void) { + unregister_pernet_subsys(&nf_tables_net_ops); nfnetlink_subsys_unregister(&nf_tables_subsys); nf_tables_core_module_exit(); kfree(info); -- cgit v1.2.3 From 5e94846686d027a4c8ecc5d9d52b18036d3e8f7a Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 10 Oct 2013 13:41:44 +0200 Subject: netfilter: nf_tables: add insert operation This patch adds a new rule attribute NFTA_RULE_POSITION which is used to store the position of a rule relatively to the others. By providing the create command and specifying the position, the rule is inserted after the rule with the handle equal to the provided position. Regarding notification, the position attribute specifies the handle of the previous rule to make sure we don't point to any stale rule in notifications coming from the commit path. This patch includes the following fix from Pablo: * nf_tables: fix rule deletion event reporting Signed-off-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e1ee85047ec1..0f140663ec71 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1273,6 +1273,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, + [NFTA_RULE_POSITION] = { .type = NLA_U64 }, }; static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, @@ -1285,9 +1286,10 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, struct nfgenmsg *nfmsg; const struct nft_expr *expr, *next; struct nlattr *list; + const struct nft_rule *prule; + int type = event | NFNL_SUBSYS_NFTABLES << 8; - event |= NFNL_SUBSYS_NFTABLES << 8; - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), + nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags); if (nlh == NULL) goto nla_put_failure; @@ -1304,6 +1306,13 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq, if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle))) goto nla_put_failure; + if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) { + prule = list_entry(rule->list.prev, struct nft_rule, list); + if (nla_put_be64(skb, NFTA_RULE_POSITION, + cpu_to_be64(prule->handle))) + goto nla_put_failure; + } + list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS); if (list == NULL) goto nla_put_failure; @@ -1499,7 +1508,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, unsigned int size, i, n; int err, rem; bool create; - u64 handle; + u64 handle, pos_handle; create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; @@ -1533,6 +1542,16 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, handle = nf_tables_alloc_handle(table); } + if (nla[NFTA_RULE_POSITION]) { + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -EOPNOTSUPP; + + pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); + old_rule = __nf_tables_rule_lookup(chain, pos_handle); + if (IS_ERR(old_rule)) + return PTR_ERR(old_rule); + } + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); n = 0; @@ -1573,9 +1592,16 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, list_replace_rcu(&old_rule->list, &rule->list); nf_tables_rule_destroy(old_rule); } else if (nlh->nlmsg_flags & NLM_F_APPEND) - list_add_tail_rcu(&rule->list, &chain->rules); - else - list_add_rcu(&rule->list, &chain->rules); + if (old_rule) + list_add_rcu(&rule->list, &old_rule->list); + else + list_add_tail_rcu(&rule->list, &chain->rules); + else { + if (old_rule) + list_add_tail_rcu(&rule->list, &old_rule->list); + else + list_add_rcu(&rule->list, &chain->rules); + } nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE, nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), -- cgit v1.2.3 From 0628b123c96d126e617beb3b4fd63b874d0e4f17 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Oct 2013 11:05:33 +0200 Subject: netfilter: nfnetlink: add batch support and use it from nf_tables This patch adds a batch support to nfnetlink. Basically, it adds two new control messages: * NFNL_MSG_BATCH_BEGIN, that indicates the beginning of a batch, the nfgenmsg->res_id indicates the nfnetlink subsystem ID. * NFNL_MSG_BATCH_END, that results in the invocation of the ss->commit callback function. If not specified or an error ocurred in the batch, the ss->abort function is invoked instead. The end message represents the commit operation in nftables, the lack of end message results in an abort. This patch also adds the .call_batch function that is only called from the batch receival path. This patch adds atomic rule updates and dumps based on bitmask generations. This allows to atomically commit a set of rule-set updates incrementally without altering the internal state of existing nf_tables expressions/matches/targets. The idea consists of using a generation cursor of 1 bit and a bitmask of 2 bits per rule. Assuming the gencursor is 0, then the genmask (expressed as a bitmask) can be interpreted as: 00 active in the present, will be active in the next generation. 01 inactive in the present, will be active in the next generation. 10 active in the present, will be deleted in the next generation. ^ gencursor Once you invoke the transition to the next generation, the global gencursor is updated: 00 active in the present, will be active in the next generation. 01 active in the present, needs to zero its future, it becomes 00. 10 inactive in the present, delete now. ^ gencursor If a dump is in progress and nf_tables enters a new generation, the dump will stop and return -EBUSY to let userspace know that it has to retry again. In order to invalidate dumps, a global genctr counter is increased everytime nf_tables enters a new generation. This new operation can be used from the user-space utility that controls the firewall, eg. nft -f restore The rule updates contained in `file' will be applied atomically. cat file ----- add filter INPUT ip saddr 1.1.1.1 counter accept #1 del filter INPUT ip daddr 2.2.2.2 counter drop #2 -EOF- Note that the rule 1 will be inactive until the transition to the next generation, the rule 2 will be evicted in the next generation. There is a penalty during the rule update due to the branch misprediction in the packet matching framework. But that should be quickly resolved once the iteration over the commit list that contain rules that require updates is finished. Event notification happens once the rule-set update has been committed. So we skip notifications is case the rule-set update is aborted, which can happen in case that the rule-set is tested to apply correctly. This patch squashed the following patches from Pablo: * nf_tables: atomic rule updates and dumps * nf_tables: get rid of per rule list_head for commits * nf_tables: use per netns commit list * nfnetlink: add batch support and use it from nf_tables * nf_tables: all rule updates are transactional * nf_tables: attach replacement rule after stale one * nf_tables: do not allow deletion/replacement of stale rules * nf_tables: remove unused NFTA_RULE_FLAGS Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 202 +++++++++++++++++++++++++++++++++++++---- net/netfilter/nf_tables_core.c | 10 ++ net/netfilter/nfnetlink.c | 175 ++++++++++++++++++++++++++++++++++- 3 files changed, 365 insertions(+), 22 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0f140663ec71..79e1418a6043 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -978,6 +978,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&chain->rules); chain->handle = nf_tables_alloc_handle(table); + chain->net = net; nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); if (!(table->flags & NFT_TABLE_F_DORMANT) && @@ -1371,6 +1372,41 @@ err: return err; } +static inline bool +nft_rule_is_active(struct net *net, const struct nft_rule *rule) +{ + return (rule->genmask & (1 << net->nft.gencursor)) == 0; +} + +static inline int gencursor_next(struct net *net) +{ + return net->nft.gencursor+1 == 1 ? 1 : 0; +} + +static inline int +nft_rule_is_active_next(struct net *net, const struct nft_rule *rule) +{ + return (rule->genmask & (1 << gencursor_next(net))) == 0; +} + +static inline void +nft_rule_activate_next(struct net *net, struct nft_rule *rule) +{ + /* Now inactive, will be active in the future */ + rule->genmask = (1 << net->nft.gencursor); +} + +static inline void +nft_rule_disactivate_next(struct net *net, struct nft_rule *rule) +{ + rule->genmask = (1 << gencursor_next(net)); +} + +static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) +{ + rule->genmask = 0; +} + static int nf_tables_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { @@ -1382,6 +1418,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb, unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + u8 genctr = ACCESS_ONCE(net->nft.genctr); + u8 gencursor = ACCESS_ONCE(net->nft.gencursor); list_for_each_entry(afi, &net->nft.af_info, list) { if (family != NFPROTO_UNSPEC && family != afi->family) @@ -1390,6 +1428,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb, list_for_each_entry(table, &afi->tables, list) { list_for_each_entry(chain, &table->chains, list) { list_for_each_entry(rule, &chain->rules, list) { + if (!nft_rule_is_active(net, rule)) + goto cont; if (idx < s_idx) goto cont; if (idx > s_idx) @@ -1408,6 +1448,10 @@ cont: } } done: + /* Invalidate this dump, a transition to the new generation happened */ + if (gencursor != net->nft.gencursor || genctr != net->nft.genctr) + return -EBUSY; + cb->args[0] = idx; return skb->len; } @@ -1492,6 +1536,25 @@ static void nf_tables_rule_destroy(struct nft_rule *rule) static struct nft_expr_info *info; +static struct nft_rule_trans * +nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx) +{ + struct nft_rule_trans *rupd; + + rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL); + if (rupd == NULL) + return NULL; + + rupd->chain = ctx->chain; + rupd->table = ctx->table; + rupd->rule = rule; + rupd->family = ctx->afi->family; + rupd->nlh = ctx->nlh; + list_add_tail(&rupd->list, &ctx->net->nft.commit_list); + + return rupd; +} + static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1502,6 +1565,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *old_rule = NULL; + struct nft_rule_trans *repl = NULL; struct nft_expr *expr; struct nft_ctx ctx; struct nlattr *tmp; @@ -1576,6 +1640,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (rule == NULL) goto err1; + nft_rule_activate_next(net, rule); + rule->handle = handle; rule->dlen = size; @@ -1589,8 +1655,18 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, } if (nlh->nlmsg_flags & NLM_F_REPLACE) { - list_replace_rcu(&old_rule->list, &rule->list); - nf_tables_rule_destroy(old_rule); + if (nft_rule_is_active_next(net, old_rule)) { + repl = nf_tables_trans_add(old_rule, &ctx); + if (repl == NULL) { + err = -ENOMEM; + goto err2; + } + nft_rule_disactivate_next(net, old_rule); + list_add_tail(&rule->list, &old_rule->list); + } else { + err = -ENOENT; + goto err2; + } } else if (nlh->nlmsg_flags & NLM_F_APPEND) if (old_rule) list_add_rcu(&rule->list, &old_rule->list); @@ -1603,11 +1679,20 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, list_add_rcu(&rule->list, &chain->rules); } - nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE, - nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), - nfmsg->nfgen_family); + if (nf_tables_trans_add(rule, &ctx) == NULL) { + err = -ENOMEM; + goto err3; + } return 0; +err3: + list_del_rcu(&rule->list); + if (repl) { + list_del_rcu(&repl->rule->list); + list_del(&repl->list); + nft_rule_clear(net, repl->rule); + kfree(repl); + } err2: nf_tables_rule_destroy(rule); err1: @@ -1618,6 +1703,19 @@ err1: return err; } +static int +nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) +{ + /* You cannot delete the same rule twice */ + if (nft_rule_is_active_next(ctx->net, rule)) { + if (nf_tables_trans_add(rule, ctx) == NULL) + return -ENOMEM; + nft_rule_disactivate_next(ctx->net, rule); + return 0; + } + return -ENOENT; +} + static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1628,7 +1726,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *tmp; - int family = nfmsg->nfgen_family; + int family = nfmsg->nfgen_family, err = 0; + struct nft_ctx ctx; afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) @@ -1642,31 +1741,95 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(chain)) return PTR_ERR(chain); + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); + if (nla[NFTA_RULE_HANDLE]) { rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) return PTR_ERR(rule); - /* List removal must be visible before destroying expressions */ - list_del_rcu(&rule->list); - - nf_tables_rule_notify(skb, nlh, table, chain, rule, - NFT_MSG_DELRULE, 0, family); - nf_tables_rule_destroy(rule); + err = nf_tables_delrule_one(&ctx, rule); } else { /* Remove all rules in this chain */ list_for_each_entry_safe(rule, tmp, &chain->rules, list) { - list_del_rcu(&rule->list); + err = nf_tables_delrule_one(&ctx, rule); + if (err < 0) + break; + } + } + + return err; +} + +static int nf_tables_commit(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct nft_rule_trans *rupd, *tmp; - nf_tables_rule_notify(skb, nlh, table, chain, rule, - NFT_MSG_DELRULE, 0, family); - nf_tables_rule_destroy(rule); + /* Bump generation counter, invalidate any dump in progress */ + net->nft.genctr++; + + /* A new generation has just started */ + net->nft.gencursor = gencursor_next(net); + + /* Make sure all packets have left the previous generation before + * purging old rules. + */ + synchronize_rcu(); + + list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { + /* Delete this rule from the dirty list */ + list_del(&rupd->list); + + /* This rule was inactive in the past and just became active. + * Clear the next bit of the genmask since its meaning has + * changed, now it is the future. + */ + if (nft_rule_is_active(net, rupd->rule)) { + nft_rule_clear(net, rupd->rule); + nf_tables_rule_notify(skb, rupd->nlh, rupd->table, + rupd->chain, rupd->rule, + NFT_MSG_NEWRULE, 0, + rupd->family); + kfree(rupd); + continue; } + + /* This rule is in the past, get rid of it */ + list_del_rcu(&rupd->rule->list); + nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain, + rupd->rule, NFT_MSG_DELRULE, 0, + rupd->family); + nf_tables_rule_destroy(rupd->rule); + kfree(rupd); } return 0; } +static int nf_tables_abort(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + struct nft_rule_trans *rupd, *tmp; + + list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { + /* Delete all rules from the dirty list */ + list_del(&rupd->list); + + if (!nft_rule_is_active_next(net, rupd->rule)) { + nft_rule_clear(net, rupd->rule); + kfree(rupd); + continue; + } + + /* This rule is inactive, get rid of it */ + list_del_rcu(&rupd->rule->list); + nf_tables_rule_destroy(rupd->rule); + kfree(rupd); + } + return 0; +} + /* * Sets */ @@ -2634,7 +2797,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_chain_policy, }, [NFT_MSG_NEWRULE] = { - .call = nf_tables_newrule, + .call_batch = nf_tables_newrule, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, @@ -2644,7 +2807,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_rule_policy, }, [NFT_MSG_DELRULE] = { - .call = nf_tables_delrule, + .call_batch = nf_tables_delrule, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, }, @@ -2685,6 +2848,8 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .subsys_id = NFNL_SUBSYS_NFTABLES, .cb_count = NFT_MSG_MAX, .cb = nf_tables_cb, + .commit = nf_tables_commit, + .abort = nf_tables_abort, }; /* @@ -3056,6 +3221,7 @@ EXPORT_SYMBOL_GPL(nft_data_dump); static int nf_tables_init_net(struct net *net) { INIT_LIST_HEAD(&net->nft.af_info); + INIT_LIST_HEAD(&net->nft.commit_list); return 0; } diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 3c13007d80df..d581ef660248 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -88,12 +88,22 @@ nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) struct nft_data data[NFT_REG_MAX + 1]; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; + /* + * Cache cursor to avoid problems in case that the cursor is updated + * while traversing the ruleset. + */ + unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor); do_chain: rule = list_entry(&chain->rules, struct nft_rule, list); next_rule: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; list_for_each_entry_continue_rcu(rule, &chain->rules, list) { + + /* This rule is not active, skip. */ + if (unlikely(rule->genmask & (1 << gencursor))) + continue; + nft_rule_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, data); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 572d87dc116f..027f16af51a0 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -147,9 +147,6 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) const struct nfnetlink_subsystem *ss; int type, err; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - /* All the messages must at least contain nfgenmsg */ if (nlmsg_len(nlh) < sizeof(struct nfgenmsg)) return 0; @@ -217,9 +214,179 @@ replay: } } +static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, + u_int16_t subsys_id) +{ + struct sk_buff *nskb, *oskb = skb; + struct net *net = sock_net(skb->sk); + const struct nfnetlink_subsystem *ss; + const struct nfnl_callback *nc; + bool success = true, done = false; + int err; + + if (subsys_id >= NFNL_SUBSYS_COUNT) + return netlink_ack(skb, nlh, -EINVAL); +replay: + nskb = netlink_skb_clone(oskb, GFP_KERNEL); + if (!nskb) + return netlink_ack(oskb, nlh, -ENOMEM); + + nskb->sk = oskb->sk; + skb = nskb; + + nfnl_lock(subsys_id); + ss = rcu_dereference_protected(table[subsys_id].subsys, + lockdep_is_held(&table[subsys_id].mutex)); + if (!ss) { +#ifdef CONFIG_MODULES + nfnl_unlock(subsys_id); + request_module("nfnetlink-subsys-%d", subsys_id); + nfnl_lock(subsys_id); + ss = rcu_dereference_protected(table[subsys_id].subsys, + lockdep_is_held(&table[subsys_id].mutex)); + if (!ss) +#endif + { + nfnl_unlock(subsys_id); + kfree_skb(nskb); + return netlink_ack(skb, nlh, -EOPNOTSUPP); + } + } + + if (!ss->commit || !ss->abort) { + nfnl_unlock(subsys_id); + kfree_skb(nskb); + return netlink_ack(skb, nlh, -EOPNOTSUPP); + } + + while (skb->len >= nlmsg_total_size(0)) { + int msglen, type; + + nlh = nlmsg_hdr(skb); + err = 0; + + if (nlh->nlmsg_len < NLMSG_HDRLEN) { + err = -EINVAL; + goto ack; + } + + /* Only requests are handled by the kernel */ + if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) { + err = -EINVAL; + goto ack; + } + + type = nlh->nlmsg_type; + if (type == NFNL_MSG_BATCH_BEGIN) { + /* Malformed: Batch begin twice */ + success = false; + goto done; + } else if (type == NFNL_MSG_BATCH_END) { + done = true; + goto done; + } else if (type < NLMSG_MIN_TYPE) { + err = -EINVAL; + goto ack; + } + + /* We only accept a batch with messages for the same + * subsystem. + */ + if (NFNL_SUBSYS_ID(type) != subsys_id) { + err = -EINVAL; + goto ack; + } + + nc = nfnetlink_find_client(type, ss); + if (!nc) { + err = -EINVAL; + goto ack; + } + + { + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); + u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); + struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; + struct nlattr *attr = (void *)nlh + min_len; + int attrlen = nlh->nlmsg_len - min_len; + + err = nla_parse(cda, ss->cb[cb_id].attr_count, + attr, attrlen, ss->cb[cb_id].policy); + if (err < 0) + goto ack; + + if (nc->call_batch) { + err = nc->call_batch(net->nfnl, skb, nlh, + (const struct nlattr **)cda); + } + + /* The lock was released to autoload some module, we + * have to abort and start from scratch using the + * original skb. + */ + if (err == -EAGAIN) { + ss->abort(skb); + nfnl_unlock(subsys_id); + kfree_skb(nskb); + goto replay; + } + } +ack: + if (nlh->nlmsg_flags & NLM_F_ACK || err) { + /* We don't stop processing the batch on errors, thus, + * userspace gets all the errors that the batch + * triggers. + */ + netlink_ack(skb, nlh, err); + if (err) + success = false; + } + + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + skb_pull(skb, msglen); + } +done: + if (success && done) + ss->commit(skb); + else + ss->abort(skb); + + nfnl_unlock(subsys_id); + kfree_skb(nskb); +} + static void nfnetlink_rcv(struct sk_buff *skb) { - netlink_rcv_skb(skb, &nfnetlink_rcv_msg); + struct nlmsghdr *nlh = nlmsg_hdr(skb); + struct net *net = sock_net(skb->sk); + int msglen; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return netlink_ack(skb, nlh, -EPERM); + + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < nlh->nlmsg_len) + return; + + if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) { + struct nfgenmsg *nfgenmsg; + + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) + return; + + nfgenmsg = nlmsg_data(nlh); + skb_pull(skb, msglen); + nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id); + } else { + netlink_rcv_skb(skb, &nfnetlink_rcv_msg); + } } #ifdef CONFIG_MODULES -- cgit v1.2.3 From b5bc89bfa0b46de37754610f46c0ef4e2280edb4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 10 Oct 2013 16:49:19 +0200 Subject: netfilter: nf_tables: add trace support This patch adds support for tracing the packet travel through the ruleset, in a similar fashion to x_tables. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + net/netfilter/nf_tables_core.c | 57 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 79e1418a6043..dcddc49c0e08 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -979,6 +979,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, INIT_LIST_HEAD(&chain->rules); chain->handle = nf_tables_alloc_handle(table); chain->net = net; + chain->table = table; nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); if (!(table->flags & NFT_TABLE_F_DORMANT) && diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index d581ef660248..cb9e685caae1 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -19,6 +19,7 @@ #include #include #include +#include static void nft_cmp_fast_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1]) @@ -63,6 +64,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, struct nft_jumpstack { const struct nft_chain *chain; const struct nft_rule *rule; + int rulenum; }; static inline void @@ -79,6 +81,40 @@ nft_chain_stats(const struct nft_chain *this, const struct nft_pktinfo *pkt, rcu_read_unlock_bh(); } +enum nft_trace { + NFT_TRACE_RULE, + NFT_TRACE_RETURN, + NFT_TRACE_POLICY, +}; + +static const char *const comments[] = { + [NFT_TRACE_RULE] = "rule", + [NFT_TRACE_RETURN] = "return", + [NFT_TRACE_POLICY] = "policy", +}; + +static struct nf_loginfo trace_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 4, + .logflags = NF_LOG_MASK, + }, + }, +}; + +static inline void nft_trace_packet(const struct nft_pktinfo *pkt, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) +{ + struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); + + nf_log_packet(net, pkt->xt.family, pkt->hooknum, pkt->skb, pkt->in, + pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", + chain->table->name, chain->name, comments[type], + rulenum); +} + unsigned int nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { @@ -88,6 +124,7 @@ nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) struct nft_data data[NFT_REG_MAX + 1]; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; + int rulenum = 0; /* * Cache cursor to avoid problems in case that the cursor is updated * while traversing the ruleset. @@ -104,6 +141,8 @@ next_rule: if (unlikely(rule->genmask & (1 << gencursor))) continue; + rulenum++; + nft_rule_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, data); @@ -129,17 +168,28 @@ next_rule: case NF_ACCEPT: case NF_DROP: case NF_QUEUE: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + return data[NFT_REG_VERDICT].verdict; case NFT_JUMP: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); jumpstack[stackptr].chain = chain; jumpstack[stackptr].rule = rule; + jumpstack[stackptr].rulenum = rulenum; stackptr++; /* fall through */ case NFT_GOTO: chain = data[NFT_REG_VERDICT].chain; goto do_chain; case NFT_RETURN: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); + + /* fall through */ case NFT_CONTINUE: break; default: @@ -147,13 +197,20 @@ next_rule: } if (stackptr > 0) { + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN); + stackptr--; chain = jumpstack[stackptr].chain; rule = jumpstack[stackptr].rule; + rulenum = jumpstack[stackptr].rulenum; goto next_rule; } nft_chain_stats(chain, pkt, jumpstack, stackptr); + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_POLICY); + return nft_base_chain(chain)->policy; } EXPORT_SYMBOL_GPL(nft_do_chain_pktinfo); -- cgit v1.2.3 From 9e4e948a3edafd2b7f4dc14c395e146ffd0d9611 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 9 Oct 2013 09:24:27 +0300 Subject: ipvs: avoid rcu_barrier during netns cleanup commit 578bc3ef1e473a ("ipvs: reorganize dest trash") added rcu_barrier() on cleanup to wait dest users and schedulers like LBLC and LBLCR to put their last dest reference. Using rcu_barrier with many namespaces is problematic. Trying to fix it by freeing dest with kfree_rcu is not a solution, RCU callbacks can run in parallel and execution order is random. Fix it by creating new function ip_vs_dest_put_and_free() which is heavier than ip_vs_dest_put(). We will use it just for schedulers like LBLC, LBLCR that can delay their dest release. By default, dests reference is above 0 if they are present in service and it is 0 when deleted but still in trash list. Change the dest trash code to use ip_vs_dest_put_and_free(), so that refcnt -1 can be used for freeing. As result, such checks remain in slow path and the rcu_barrier() from netns cleanup can be removed. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 6 +----- net/netfilter/ipvs/ip_vs_lblc.c | 2 +- net/netfilter/ipvs/ip_vs_lblcr.c | 2 +- 3 files changed, 3 insertions(+), 7 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a3df9bddc4f7..62786a495cea 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -704,7 +704,7 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest) __ip_vs_dst_cache_reset(dest); __ip_vs_svc_put(svc, false); free_percpu(dest->stats.cpustats); - kfree(dest); + ip_vs_dest_put_and_free(dest); } /* @@ -3820,10 +3820,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); - /* Some dest can be in grace period even before cleanup, we have to - * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called. - */ - rcu_barrier(); ip_vs_trash_cleanup(net); ip_vs_stop_estimator(net, &ipvs->tot_stats); ip_vs_control_net_cleanup_sysctl(net); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index eff13c94498e..ca056a331e60 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -136,7 +136,7 @@ static void ip_vs_lblc_rcu_free(struct rcu_head *head) struct ip_vs_lblc_entry, rcu_head); - ip_vs_dest_put(en->dest); + ip_vs_dest_put_and_free(en->dest); kfree(en); } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 0b8550089a2e..3f21a2f47de1 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -130,7 +130,7 @@ static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head) struct ip_vs_dest_set_elem *e; e = container_of(head, struct ip_vs_dest_set_elem, rcu_head); - ip_vs_dest_put(e->dest); + ip_vs_dest_put_and_free(e->dest); kfree(e); } -- cgit v1.2.3 From 1255ce5f10dbb4646c8d43b8d59faab48ae4a6b2 Mon Sep 17 00:00:00 2001 From: Alexander Frolkin Date: Fri, 27 Sep 2013 11:06:23 +0100 Subject: ipvs: improved SH fallback strategy Improve the SH fallback realserver selection strategy. With sh and sh-fallback, if a realserver is down, this attempts to distribute the traffic that would have gone to that server evenly among the remaining servers. Signed-off-by: Alexander Frolkin Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_sh.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 3588faebe529..cc65b2f42cd4 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -115,27 +115,46 @@ ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s, } -/* As ip_vs_sh_get, but with fallback if selected server is unavailable */ +/* As ip_vs_sh_get, but with fallback if selected server is unavailable + * + * The fallback strategy loops around the table starting from a "random" + * point (in fact, it is chosen to be the original hash value to make the + * algorithm deterministic) to find a new server. + */ static inline struct ip_vs_dest * ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s, const union nf_inet_addr *addr, __be16 port) { - unsigned int offset; - unsigned int hash; + unsigned int offset, roffset; + unsigned int hash, ihash; struct ip_vs_dest *dest; + /* first try the dest it's supposed to go to */ + ihash = ip_vs_sh_hashkey(svc->af, addr, port, 0); + dest = rcu_dereference(s->buckets[ihash].dest); + if (!dest) + return NULL; + if (!is_unavailable(dest)) + return dest; + + IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting", + IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); + + /* if the original dest is unavailable, loop around the table + * starting from ihash to find a new dest + */ for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) { - hash = ip_vs_sh_hashkey(svc->af, addr, port, offset); + roffset = (offset + ihash) % IP_VS_SH_TAB_SIZE; + hash = ip_vs_sh_hashkey(svc->af, addr, port, roffset); dest = rcu_dereference(s->buckets[hash].dest); if (!dest) break; - if (is_unavailable(dest)) - IP_VS_DBG_BUF(6, "SH: selected unavailable server " - "%s:%d (offset %d)", - IP_VS_DBG_ADDR(svc->af, &dest->addr), - ntohs(dest->port), offset); - else + if (!is_unavailable(dest)) return dest; + IP_VS_DBG_BUF(6, "SH: selected unavailable " + "server %s:%d (offset %d), reselecting", + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port), roffset); } return NULL; -- cgit v1.2.3 From 1a8bf6eeef9fe417f90e5338a2fd7fba69c6d0e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Oct 2013 09:03:25 -0700 Subject: netfilter: xt_socket: use sock_gen_put() TCP listener refactoring, part 7 : Use sock_gen_put() instead of xt_socket_put_sk() for future SYN_RECV support. Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_socket.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 3dd0e374bc2b..1ba67931eb1b 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -35,15 +35,6 @@ #include #endif -static void -xt_socket_put_sk(struct sock *sk) -{ - if (sk->sk_state == TCP_TIME_WAIT) - inet_twsk_put(inet_twsk(sk)); - else - sock_put(sk); -} - static int extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, @@ -216,7 +207,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, inet_twsk(sk)->tw_transparent)); if (sk != skb->sk) - xt_socket_put_sk(sk); + sock_gen_put(sk); if (wildcard || !transparent) sk = NULL; @@ -381,7 +372,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) inet_twsk(sk)->tw_transparent)); if (sk != skb->sk) - xt_socket_put_sk(sk); + sock_gen_put(sk); if (wildcard || !transparent) sk = NULL; -- cgit v1.2.3 From c1b1203d65955c179fec617ff17a21273f33a414 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 18 Oct 2013 13:48:25 -0700 Subject: net: misc: Remove extern from function prototypes There are a mix of function prototypes with and without extern in the kernel sources. Standardize on not using extern for function prototypes. Function prototypes don't need to be written with extern. extern is assumed by the compiler. Its use is as unnecessary as using auto to declare automatic/local variables in a block. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/netfilter/nf_internals.h | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 3deec997be89..61a3c927e63c 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -13,26 +13,20 @@ /* core.c */ -extern unsigned int nf_iterate(struct list_head *head, - struct sk_buff *skb, - unsigned int hook, - const struct net_device *indev, - const struct net_device *outdev, - struct nf_hook_ops **elemp, - int (*okfn)(struct sk_buff *), - int hook_thresh); +unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, + unsigned int hook, const struct net_device *indev, + const struct net_device *outdev, + struct nf_hook_ops **elemp, + int (*okfn)(struct sk_buff *), int hook_thresh); /* nf_queue.c */ -extern int nf_queue(struct sk_buff *skb, - struct nf_hook_ops *elem, - u_int8_t pf, unsigned int hook, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), - unsigned int queuenum); -extern int __init netfilter_queue_init(void); +int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, u_int8_t pf, + unsigned int hook, struct net_device *indev, + struct net_device *outdev, int (*okfn)(struct sk_buff *), + unsigned int queuenum); +int __init netfilter_queue_init(void); /* nf_log.c */ -extern int __init netfilter_log_init(void); +int __init netfilter_log_init(void); #endif -- cgit v1.2.3 From 93302880d8a3e5dc6b7da3f9825beb839152c940 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 18 Oct 2013 11:41:55 +0200 Subject: netfilter: ipset: Use netlink callback dump args only Instead of cb->data, use callback dump args only and introduce symbolic names instead of plain numbers at accessing the argument members. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_bitmap_gen.h | 11 +++--- net/netfilter/ipset/ip_set_core.c | 70 ++++++++++++++++----------------- net/netfilter/ipset/ip_set_hash_gen.h | 20 +++++----- net/netfilter/ipset/ip_set_list_set.c | 11 +++--- 4 files changed, 58 insertions(+), 54 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index a13e15be7911..f2c7d83dc23f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -198,13 +198,14 @@ mtype_list(const struct ip_set *set, struct mtype *map = set->data; struct nlattr *adt, *nested; void *x; - u32 id, first = cb->args[2]; + u32 id, first = cb->args[IPSET_CB_ARG0]; adt = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!adt) return -EMSGSIZE; - for (; cb->args[2] < map->elements; cb->args[2]++) { - id = cb->args[2]; + for (; cb->args[IPSET_CB_ARG0] < map->elements; + cb->args[IPSET_CB_ARG0]++) { + id = cb->args[IPSET_CB_ARG0]; x = get_ext(set, map, id); if (!test_bit(id, map->members) || (SET_WITH_TIMEOUT(set) && @@ -231,14 +232,14 @@ mtype_list(const struct ip_set *set, ipset_nest_end(skb, adt); /* Set listing finished */ - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return 0; nla_put_failure: nla_nest_cancel(skb, nested); if (unlikely(id == first)) { - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return -EMSGSIZE; } ipset_nest_end(skb, adt); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index dc9284bdd2dd..bac7e01df67f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1182,10 +1182,12 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, static int ip_set_dump_done(struct netlink_callback *cb) { - struct ip_set_net *inst = (struct ip_set_net *)cb->data; - if (cb->args[2]) { - pr_debug("release set %s\n", nfnl_set(inst, cb->args[1])->name); - __ip_set_put_byindex(inst, (ip_set_id_t) cb->args[1]); + struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET]; + if (cb->args[IPSET_CB_ARG0]) { + pr_debug("release set %s\n", + nfnl_set(inst, cb->args[IPSET_CB_INDEX])->name); + __ip_set_put_byindex(inst, + (ip_set_id_t) cb->args[IPSET_CB_INDEX]); } return 0; } @@ -1203,7 +1205,7 @@ dump_attrs(struct nlmsghdr *nlh) } static int -dump_init(struct netlink_callback *cb) +dump_init(struct netlink_callback *cb, struct ip_set_net *inst) { struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); @@ -1211,15 +1213,15 @@ dump_init(struct netlink_callback *cb) struct nlattr *attr = (void *)nlh + min_len; u32 dump_type; ip_set_id_t index; - struct ip_set_net *inst = (struct ip_set_net *)cb->data; /* Second pass, so parser can't fail */ nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_setname_policy); - /* cb->args[0] : dump single set/all sets - * [1] : set index - * [..]: type specific + /* cb->args[IPSET_CB_NET]: net namespace + * [IPSET_CB_DUMP]: dump single set/all sets + * [IPSET_CB_INDEX]: set index + * [IPSET_CB_ARG0]: type specific */ if (cda[IPSET_ATTR_SETNAME]) { @@ -1231,7 +1233,7 @@ dump_init(struct netlink_callback *cb) return -ENOENT; dump_type = DUMP_ONE; - cb->args[1] = index; + cb->args[IPSET_CB_INDEX] = index; } else dump_type = DUMP_ALL; @@ -1239,7 +1241,8 @@ dump_init(struct netlink_callback *cb) u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]); dump_type |= (f << 16); } - cb->args[0] = dump_type; + cb->args[IPSET_CB_NET] = (unsigned long)inst; + cb->args[IPSET_CB_DUMP] = dump_type; return 0; } @@ -1251,12 +1254,12 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) struct ip_set *set = NULL; struct nlmsghdr *nlh = NULL; unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; + struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); u32 dump_type, dump_flags; int ret = 0; - struct ip_set_net *inst = (struct ip_set_net *)cb->data; - if (!cb->args[0]) { - ret = dump_init(cb); + if (!cb->args[IPSET_CB_DUMP]) { + ret = dump_init(cb, inst); if (ret < 0) { nlh = nlmsg_hdr(cb->skb); /* We have to create and send the error message @@ -1267,17 +1270,18 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) } } - if (cb->args[1] >= inst->ip_set_max) + if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max) goto out; - dump_type = DUMP_TYPE(cb->args[0]); - dump_flags = DUMP_FLAGS(cb->args[0]); - max = dump_type == DUMP_ONE ? cb->args[1] + 1 : inst->ip_set_max; + dump_type = DUMP_TYPE(cb->args[IPSET_CB_DUMP]); + dump_flags = DUMP_FLAGS(cb->args[IPSET_CB_DUMP]); + max = dump_type == DUMP_ONE ? cb->args[IPSET_CB_INDEX] + 1 + : inst->ip_set_max; dump_last: - pr_debug("args[0]: %u %u args[1]: %ld\n", - dump_type, dump_flags, cb->args[1]); - for (; cb->args[1] < max; cb->args[1]++) { - index = (ip_set_id_t) cb->args[1]; + pr_debug("dump type, flag: %u %u index: %ld\n", + dump_type, dump_flags, cb->args[IPSET_CB_INDEX]); + for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) { + index = (ip_set_id_t) cb->args[IPSET_CB_INDEX]; set = nfnl_set(inst, index); if (set == NULL) { if (dump_type == DUMP_ONE) { @@ -1294,7 +1298,7 @@ dump_last: !!(set->type->features & IPSET_DUMP_LAST))) continue; pr_debug("List set: %s\n", set->name); - if (!cb->args[2]) { + if (!cb->args[IPSET_CB_ARG0]) { /* Start listing: make sure set won't be destroyed */ pr_debug("reference set\n"); __ip_set_get(set); @@ -1311,7 +1315,7 @@ dump_last: goto nla_put_failure; if (dump_flags & IPSET_FLAG_LIST_SETNAME) goto next_set; - switch (cb->args[2]) { + switch (cb->args[IPSET_CB_ARG0]) { case 0: /* Core header data */ if (nla_put_string(skb, IPSET_ATTR_TYPENAME, @@ -1331,7 +1335,7 @@ dump_last: read_lock_bh(&set->lock); ret = set->variant->list(set, skb, cb); read_unlock_bh(&set->lock); - if (!cb->args[2]) + if (!cb->args[IPSET_CB_ARG0]) /* Set is done, proceed with next one */ goto next_set; goto release_refcount; @@ -1340,8 +1344,8 @@ dump_last: /* If we dump all sets, continue with dumping last ones */ if (dump_type == DUMP_ALL) { dump_type = DUMP_LAST; - cb->args[0] = dump_type | (dump_flags << 16); - cb->args[1] = 0; + cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16); + cb->args[IPSET_CB_INDEX] = 0; goto dump_last; } goto out; @@ -1350,15 +1354,15 @@ nla_put_failure: ret = -EFAULT; next_set: if (dump_type == DUMP_ONE) - cb->args[1] = IPSET_INVALID_ID; + cb->args[IPSET_CB_INDEX] = IPSET_INVALID_ID; else - cb->args[1]++; + cb->args[IPSET_CB_INDEX]++; release_refcount: /* If there was an error or set is done, release set */ - if (ret || !cb->args[2]) { + if (ret || !cb->args[IPSET_CB_ARG0]) { pr_debug("release set %s\n", nfnl_set(inst, index)->name); __ip_set_put_byindex(inst, index); - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; } out: if (nlh) { @@ -1375,8 +1379,6 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); - if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; @@ -1384,7 +1386,6 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, struct netlink_dump_control c = { .dump = ip_set_dump_start, .done = ip_set_dump_done, - .data = (void *)inst }; return netlink_dump_start(ctnl, skb, nlh, &c); } @@ -1961,7 +1962,6 @@ static int __net_init ip_set_net_init(struct net *net) { struct ip_set_net *inst = ip_set_pernet(net); - struct ip_set **list; inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 6a80dbd30df7..2f80c74c871f 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -931,7 +931,7 @@ mtype_list(const struct ip_set *set, struct nlattr *atd, *nested; const struct hbucket *n; const struct mtype_elem *e; - u32 first = cb->args[2]; + u32 first = cb->args[IPSET_CB_ARG0]; /* We assume that one hash bucket fills into one page */ void *incomplete; int i; @@ -940,20 +940,22 @@ mtype_list(const struct ip_set *set, if (!atd) return -EMSGSIZE; pr_debug("list hash set %s\n", set->name); - for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) { + for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); + cb->args[IPSET_CB_ARG0]++) { incomplete = skb_tail_pointer(skb); - n = hbucket(t, cb->args[2]); - pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n); + n = hbucket(t, cb->args[IPSET_CB_ARG0]); + pr_debug("cb->arg bucket: %lu, t %p n %p\n", + cb->args[IPSET_CB_ARG0], t, n); for (i = 0; i < n->pos; i++) { e = ahash_data(n, i, set->dsize); if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; pr_debug("list hash %lu hbucket %p i %u, data %p\n", - cb->args[2], n, i, e); + cb->args[IPSET_CB_ARG0], n, i, e); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { - if (cb->args[2] == first) { + if (cb->args[IPSET_CB_ARG0] == first) { nla_nest_cancel(skb, atd); return -EMSGSIZE; } else @@ -968,16 +970,16 @@ mtype_list(const struct ip_set *set, } ipset_nest_end(skb, atd); /* Set listing finished */ - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return 0; nla_put_failure: nlmsg_trim(skb, incomplete); - if (unlikely(first == cb->args[2])) { + if (unlikely(first == cb->args[IPSET_CB_ARG0])) { pr_warning("Can't list set %s: one bucket does not fit into " "a message. Please report it!\n", set->name); - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return -EMSGSIZE; } ipset_nest_end(skb, atd); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index ec6f6d15dded..3e2317f3cf68 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -490,14 +490,15 @@ list_set_list(const struct ip_set *set, { const struct list_set *map = set->data; struct nlattr *atd, *nested; - u32 i, first = cb->args[2]; + u32 i, first = cb->args[IPSET_CB_ARG0]; const struct set_elem *e; atd = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!atd) return -EMSGSIZE; - for (; cb->args[2] < map->size; cb->args[2]++) { - i = cb->args[2]; + for (; cb->args[IPSET_CB_ARG0] < map->size; + cb->args[IPSET_CB_ARG0]++) { + i = cb->args[IPSET_CB_ARG0]; e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto finish; @@ -522,13 +523,13 @@ list_set_list(const struct ip_set *set, finish: ipset_nest_end(skb, atd); /* Set listing finished */ - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return 0; nla_put_failure: nla_nest_cancel(skb, nested); if (unlikely(i == first)) { - cb->args[2] = 0; + cb->args[IPSET_CB_ARG0] = 0; return -EMSGSIZE; } ipset_nest_end(skb, atd); -- cgit v1.2.3 From 1a869205c75cb222263fa04f200485ffbe9eaadf Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 18 Oct 2013 11:41:57 +0200 Subject: netfilter: ipset: The unnamed union initialization may lead to compilation error The unnamed union should be possible to be initialized directly, but unfortunately it's not so: /usr/src/ipset/kernel/net/netfilter/ipset/ip_set_hash_netnet.c: In function ?hash_netnet4_kadt?: /usr/src/ipset/kernel/net/netfilter/ipset/ip_set_hash_netnet.c:141: error: unknown field ?cidr? specified in initializer Reported-by: Husnu Demir Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_netnet.c | 22 ++++++++++------------ net/netfilter/ipset/ip_set_hash_netportnet.c | 22 ++++++++++------------ 2 files changed, 20 insertions(+), 24 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 426032706ca9..2bc2dec20b00 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -137,12 +137,11 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet4_elem e = { - .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK, - .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK, - }; + struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; @@ -160,14 +159,14 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet4_elem e = { .cidr[0] = HOST_MASK, - .cidr[1] = HOST_MASK }; + struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, last; u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2; u8 cidr, cidr2; int ret; + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || @@ -364,12 +363,11 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_netnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet6_elem e = { - .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK, - .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK - }; + struct hash_netnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK; @@ -386,11 +384,11 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netnet6_elem e = { .cidr[0] = HOST_MASK, - .cidr[1] = HOST_MASK }; + struct hash_netnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 363fab933d48..703d1192a6a2 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -147,12 +147,11 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet4_elem e = { - .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), - .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK), - }; + struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; @@ -174,8 +173,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet4_elem e = { .cidr[0] = HOST_MASK, - .cidr[1] = HOST_MASK }; + struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to; u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; @@ -183,6 +181,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], u8 cidr, cidr2; int ret; + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || @@ -419,12 +418,11 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet6_elem e = { - .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), - .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK), - }; + struct hash_netportnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK); + e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK); if (adt == IPSET_TEST) e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK; @@ -446,13 +444,13 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], { const struct hash_netportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_netportnet6_elem e = { .cidr[0] = HOST_MASK, - .cidr[1] = HOST_MASK }; + struct hash_netportnet6_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; + e.cidr[0] = e.cidr[1] = HOST_MASK; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || -- cgit v1.2.3 From dc476e7c8e7824fd326c79f50d7302faca7a4c2b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 18 Oct 2013 14:03:41 +0200 Subject: netfilter:ipset: Fix memory allocation for bitmap:port At the restructuring of the bitmap types creation in ipset, for the bitmap:port type wrong (too large) memory allocation was copied (netfilter bugzilla id #859). Reported-by: Quentin Armitage Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index e7603c5b53d7..cf99676e69f8 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -254,7 +254,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], return -ENOMEM; map->elements = last_port - first_port + 1; - map->memsize = map->elements * sizeof(unsigned long); + map->memsize = bitmap_bytes(0, map->elements); set->variant = &bitmap_port; set->dsize = ip_set_elem_len(set, tb, 0); if (!init_map_port(set, map, first_port, last_port)) { -- cgit v1.2.3 From 1e56555ee1f3d04d89bdaa3df57ad2da9fbc0999 Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Sun, 27 Oct 2013 19:06:45 +0100 Subject: netfilter: ipset: remove duplicate define This patch removes a duplicate define from net/netfilter/ipset/ip_set_hash_gen.h Signed-off-by: Michael Opdenacker Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_gen.h | 1 - 1 file changed, 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 2f80c74c871f..be6932ad3a86 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -234,7 +234,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) #define mtype MTYPE -#define mtype_elem IPSET_TOKEN(MTYPE, _elem) #define mtype_add IPSET_TOKEN(MTYPE, _add) #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) -- cgit v1.2.3 From 6e7cd27c0f77847f1b07a81ae2ed17b937a7531a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 25 Oct 2013 11:05:04 +0200 Subject: net: ipvs: sctp: add missing verdict assignments in sctp_conn_schedule If skb_header_pointer() fails, we need to assign a verdict, that is NF_DROP in this case, otherwise, we would leave the verdict from conn_schedule() uninitialized when returning. Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Acked-by: Neil Horman Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto_sctp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 23e596e438b3..9ca7aa033284 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -20,13 +20,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, sctp_sctphdr_t *sh, _sctph; sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); - if (sh == NULL) + if (sh == NULL) { + *verdict = NF_DROP; return 0; + } sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), sizeof(_schunkh), &_schunkh); - if (sch == NULL) + if (sch == NULL) { + *verdict = NF_DROP; return 0; + } + net = skb_net(skb); ipvs = net_ipvs(net); rcu_read_lock(); -- cgit v1.2.3 From 98c37b6b01812d331db4d49cacd603891d0d53ba Mon Sep 17 00:00:00 2001 From: Tomasz Bursztyka Date: Mon, 28 Oct 2013 12:19:45 +0200 Subject: netfilter: nft_nat: Fix endianness issue reported by sparse This patch fixes this: CHECK net/netfilter/nft_nat.c net/netfilter/nft_nat.c:50:43: warning: incorrect type in assignment (different base types) net/netfilter/nft_nat.c:50:43: expected restricted __be32 [addressable] [usertype] ip net/netfilter/nft_nat.c:50:43: got unsigned int [unsigned] [usertype] net/netfilter/nft_nat.c:51:43: warning: incorrect type in assignment (different base types) net/netfilter/nft_nat.c:51:43: expected restricted __be32 [addressable] [usertype] ip net/netfilter/nft_nat.c:51:43: got unsigned int [unsigned] [usertype] net/netfilter/nft_nat.c:65:37: warning: incorrect type in assignment (different base types) net/netfilter/nft_nat.c:65:37: expected restricted __be16 [addressable] [assigned] [usertype] all net/netfilter/nft_nat.c:65:37: got unsigned int [unsigned] net/netfilter/nft_nat.c:66:37: warning: incorrect type in assignment (different base types) net/netfilter/nft_nat.c:66:37: expected restricted __be16 [addressable] [assigned] [usertype] all net/netfilter/nft_nat.c:66:37: got unsigned int [unsigned] Signed-off-by: Tomasz Bursztyka Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_nat.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index b0b87b2d2411..d3b1ffe26181 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -47,8 +47,10 @@ static void nft_nat_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); if (priv->sreg_addr_min) { if (priv->family == AF_INET) { - range.min_addr.ip = data[priv->sreg_addr_min].data[0]; - range.max_addr.ip = data[priv->sreg_addr_max].data[0]; + range.min_addr.ip = (__force __be32) + data[priv->sreg_addr_min].data[0]; + range.max_addr.ip = (__force __be32) + data[priv->sreg_addr_max].data[0]; } else { memcpy(range.min_addr.ip6, @@ -62,8 +64,10 @@ static void nft_nat_eval(const struct nft_expr *expr, } if (priv->sreg_proto_min) { - range.min_proto.all = data[priv->sreg_proto_min].data[0]; - range.max_proto.all = data[priv->sreg_proto_max].data[0]; + range.min_proto.all = (__force __be16) + data[priv->sreg_proto_min].data[0]; + range.max_proto.all = (__force __be16) + data[priv->sreg_proto_max].data[0]; range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } -- cgit v1.2.3 From 97203abe6bc41ee020f37c902bd1a761157f22c1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Oct 2013 10:56:20 +0100 Subject: net: ipvs: sctp: do not recalc sctp csum when ports didn't change Unlike UDP or TCP, we do not take the pseudo-header into account in SCTP checksums. So in case port mapping is the very same, we do not need to recalculate the whole SCTP checksum in software, which is very expensive. Also, similarly as in TCP, take into account when a private helper mangled the packet. In that case, we also need to recalculate the checksum even if ports might be same. Thanks for feedback regarding skb->ip_summed checks from Julian Anastasov; here's a discussion on these checks for snat and dnat: * For snat_handler(), we can see CHECKSUM_PARTIAL from virtual devices, and from LOCAL_OUT, otherwise it should be CHECKSUM_UNNECESSARY. In general, in snat it is more complex. skb contains the original route and ip_vs_route_me_harder() can change the route after snat_handler. So, for locally generated replies from local server we can not preserve the CHECKSUM_PARTIAL mode. It is an chicken or egg dilemma: snat_handler needs the device after rerouting (to check for NETIF_F_SCTP_CSUM), while ip_route_me_harder() wants the snat_handler() to put the new saddr for proper rerouting. * For dnat_handler(), we should not see CHECKSUM_COMPLETE for SCTP, in fact the small set of drivers that support SCTP offloading return CHECKSUM_UNNECESSARY on correctly received SCTP csum. We can see CHECKSUM_PARTIAL from local stack or received from virtual drivers. The idea is that SCTP decides to avoid csum calculation if hardware supports offloading. IPVS can change the device after rerouting to real server but we can preserve the CHECKSUM_PARTIAL mode if the new device supports offloading too. This works because skb dst is changed before dnat_handler and we see the new device. So, checks in the 'if' part will decide whether it is ok to keep CHECKSUM_PARTIAL for the output. If the packet was with CHECKSUM_NONE, hence we deal with unknown checksum. As we recalculate the sum for IP header in all cases, it should be safe to use CHECKSUM_UNNECESSARY. We can forward wrong checksum in this case (without cp->app). In case of CHECKSUM_UNNECESSARY, the csum was valid on receive. Signed-off-by: Daniel Borkmann Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto_sctp.c | 39 +++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 9ca7aa033284..2f7ea7564044 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -81,6 +81,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { sctp_sctphdr_t *sctph; unsigned int sctphoff = iph->len; + bool payload_csum = false; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -92,19 +93,31 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, return 0; if (unlikely(cp->app != NULL)) { + int ret; + /* Some checks before mangling */ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, skb)) + ret = ip_vs_app_pkt_out(cp, skb); + if (ret == 0) return 0; + /* ret=2: csum update is needed after payload mangling */ + if (ret == 2) + payload_csum = true; } sctph = (void *) skb_network_header(skb) + sctphoff; - sctph->source = cp->vport; - sctp_nat_csum(skb, sctph, sctphoff); + /* Only update csum if we really have to */ + if (sctph->source != cp->vport || payload_csum || + skb->ip_summed == CHECKSUM_PARTIAL) { + sctph->source = cp->vport; + sctp_nat_csum(skb, sctph, sctphoff); + } else { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } return 1; } @@ -115,6 +128,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { sctp_sctphdr_t *sctph; unsigned int sctphoff = iph->len; + bool payload_csum = false; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -126,19 +140,32 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, return 0; if (unlikely(cp->app != NULL)) { + int ret; + /* Some checks before mangling */ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_in(cp, skb)) + ret = ip_vs_app_pkt_in(cp, skb); + if (ret == 0) return 0; + /* ret=2: csum update is needed after payload mangling */ + if (ret == 2) + payload_csum = true; } sctph = (void *) skb_network_header(skb) + sctphoff; - sctph->dest = cp->dport; - sctp_nat_csum(skb, sctph, sctphoff); + /* Only update csum if we really have to */ + if (sctph->dest != cp->dport || payload_csum || + (skb->ip_summed == CHECKSUM_PARTIAL && + !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) { + sctph->dest = cp->dport; + sctp_nat_csum(skb, sctph, sctphoff); + } else if (skb->ip_summed != CHECKSUM_PARTIAL) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } return 1; } -- cgit v1.2.3 From f7b13e4330ef3c20e62ac4908cc96c1c318056c2 Mon Sep 17 00:00:00 2001 From: Holger Eitzenberger Date: Thu, 26 Sep 2013 17:31:51 +0200 Subject: netfilter: introduce nf_conn_acct structure Encapsulate counters for both directions into nf_conn_acct. During that process also consistently name pointers to the extend 'acct', not 'counters'. This patch is a cleanup. Signed-off-by: Holger Eitzenberger Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_acct.c | 12 +++++++----- net/netfilter/nf_conntrack_core.c | 16 ++++++++++------ net/netfilter/nf_conntrack_netlink.c | 16 +++++++++------- net/netfilter/xt_connbytes.c | 6 ++++-- 4 files changed, 30 insertions(+), 20 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 2d3030ab5b61..a4b5e2a435ac 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -39,21 +39,23 @@ static struct ctl_table acct_sysctl_table[] = { unsigned int seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) { - struct nf_conn_counter *acct; + struct nf_conn_acct *acct; + struct nf_conn_counter *counter; acct = nf_conn_acct_find(ct); if (!acct) return 0; + counter = acct->counter; return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)atomic64_read(&acct[dir].packets), - (unsigned long long)atomic64_read(&acct[dir].bytes)); + (unsigned long long)atomic64_read(&counter[dir].packets), + (unsigned long long)atomic64_read(&counter[dir].bytes)); }; EXPORT_SYMBOL_GPL(seq_print_acct); static struct nf_ct_ext_type acct_extend __read_mostly = { - .len = sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]), - .align = __alignof__(struct nf_conn_counter[IP_CT_DIR_MAX]), + .len = sizeof(struct nf_conn_acct), + .align = __alignof__(struct nf_conn_acct), .id = NF_CT_EXT_ACCT, }; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5d892febd64c..e22d950c60b3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1109,12 +1109,14 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, acct: if (do_acct) { - struct nf_conn_counter *acct; + struct nf_conn_acct *acct; acct = nf_conn_acct_find(ct); if (acct) { - atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); - atomic64_add(skb->len, &acct[CTINFO2DIR(ctinfo)].bytes); + struct nf_conn_counter *counter = acct->counter; + + atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); + atomic64_add(skb->len, &counter[CTINFO2DIR(ctinfo)].bytes); } } } @@ -1126,13 +1128,15 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, int do_acct) { if (do_acct) { - struct nf_conn_counter *acct; + struct nf_conn_acct *acct; acct = nf_conn_acct_find(ct); if (acct) { - atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); + struct nf_conn_counter *counter = acct->counter; + + atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); atomic64_add(skb->len - skb_network_offset(skb), - &acct[CTINFO2DIR(ctinfo)].bytes); + &counter[CTINFO2DIR(ctinfo)].bytes); } } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index eea936b70d15..ddc3777d8340 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -237,19 +237,21 @@ static int ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, enum ip_conntrack_dir dir, int type) { - struct nf_conn_counter *acct; + struct nf_conn_acct *acct; + struct nf_conn_counter *counter; u64 pkts, bytes; acct = nf_conn_acct_find(ct); if (!acct) return 0; + counter = acct->counter; if (type == IPCTNL_MSG_CT_GET_CTRZERO) { - pkts = atomic64_xchg(&acct[dir].packets, 0); - bytes = atomic64_xchg(&acct[dir].bytes, 0); + pkts = atomic64_xchg(&counter[dir].packets, 0); + bytes = atomic64_xchg(&counter[dir].bytes, 0); } else { - pkts = atomic64_read(&acct[dir].packets); - bytes = atomic64_read(&acct[dir].bytes); + pkts = atomic64_read(&counter[dir].packets); + bytes = atomic64_read(&counter[dir].bytes); } return dump_counters(skb, pkts, bytes, dir); } @@ -530,7 +532,7 @@ ctnetlink_proto_size(const struct nf_conn *ct) } static inline size_t -ctnetlink_counters_size(const struct nf_conn *ct) +ctnetlink_acct_size(const struct nf_conn *ct) { if (!nf_ct_ext_exist(ct, NF_CT_EXT_ACCT)) return 0; @@ -579,7 +581,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */ + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ - + ctnetlink_counters_size(ct) + + ctnetlink_acct_size(ct) + ctnetlink_timestamp_size(ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ + nla_total_size(0) /* CTA_PROTOINFO */ diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index e595e07a759b..1e634615ab9d 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -26,16 +26,18 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par) u_int64_t what = 0; /* initialize to make gcc happy */ u_int64_t bytes = 0; u_int64_t pkts = 0; + const struct nf_conn_acct *acct; const struct nf_conn_counter *counters; ct = nf_ct_get(skb, &ctinfo); if (!ct) return false; - counters = nf_conn_acct_find(ct); - if (!counters) + acct = nf_conn_acct_find(ct); + if (!acct) return false; + counters = acct->counter; switch (sinfo->what) { case XT_CONNBYTES_PKTS: switch (sinfo->direction) { -- cgit v1.2.3 From 4542fa4727f5f83faf9e1f28f35be0b9a2317aec Mon Sep 17 00:00:00 2001 From: Holger Eitzenberger Date: Thu, 26 Sep 2013 17:31:52 +0200 Subject: netfilter: ctnetlink: account both directions in one step With the intent to dump other accounting data later. This patch is a cleanup. Signed-off-by: Holger Eitzenberger Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 49 ++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 25 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ddc3777d8340..08870b859046 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -211,13 +211,23 @@ nla_put_failure: } static int -dump_counters(struct sk_buff *skb, u64 pkts, u64 bytes, - enum ip_conntrack_dir dir) +dump_counters(struct sk_buff *skb, struct nf_conn_acct *acct, + enum ip_conntrack_dir dir, int type) { - enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; + enum ctattr_type attr = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; + struct nf_conn_counter *counter = acct->counter; struct nlattr *nest_count; + u64 pkts, bytes; - nest_count = nla_nest_start(skb, type | NLA_F_NESTED); + if (type == IPCTNL_MSG_CT_GET_CTRZERO) { + pkts = atomic64_xchg(&counter[dir].packets, 0); + bytes = atomic64_xchg(&counter[dir].bytes, 0); + } else { + pkts = atomic64_read(&counter[dir].packets); + bytes = atomic64_read(&counter[dir].bytes); + } + + nest_count = nla_nest_start(skb, attr | NLA_F_NESTED); if (!nest_count) goto nla_put_failure; @@ -234,26 +244,19 @@ nla_put_failure: } static int -ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, - enum ip_conntrack_dir dir, int type) +ctnetlink_dump_acct(struct sk_buff *skb, const struct nf_conn *ct, int type) { - struct nf_conn_acct *acct; - struct nf_conn_counter *counter; - u64 pkts, bytes; + struct nf_conn_acct *acct = nf_conn_acct_find(ct); - acct = nf_conn_acct_find(ct); if (!acct) return 0; - counter = acct->counter; - if (type == IPCTNL_MSG_CT_GET_CTRZERO) { - pkts = atomic64_xchg(&counter[dir].packets, 0); - bytes = atomic64_xchg(&counter[dir].bytes, 0); - } else { - pkts = atomic64_read(&counter[dir].packets); - bytes = atomic64_read(&counter[dir].bytes); - } - return dump_counters(skb, pkts, bytes, dir); + if (dump_counters(skb, acct, IP_CT_DIR_ORIGINAL, type) < 0) + return -1; + if (dump_counters(skb, acct, IP_CT_DIR_REPLY, type) < 0) + return -1; + + return 0; } static int @@ -490,8 +493,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, if (ctnetlink_dump_status(skb, ct) < 0 || ctnetlink_dump_timeout(skb, ct) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL, type) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY, type) < 0 || + ctnetlink_dump_acct(skb, ct, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0 || ctnetlink_dump_protoinfo(skb, ct) < 0 || ctnetlink_dump_helpinfo(skb, ct) < 0 || @@ -675,10 +677,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto nla_put_failure; if (events & (1 << IPCT_DESTROY)) { - if (ctnetlink_dump_counters(skb, ct, - IP_CT_DIR_ORIGINAL, type) < 0 || - ctnetlink_dump_counters(skb, ct, - IP_CT_DIR_REPLY, type) < 0 || + if (ctnetlink_dump_acct(skb, ct, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0) goto nla_put_failure; } else { -- cgit v1.2.3 From c359c4157cf0d852387aff2f2d83fef039aadc2c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 4 Nov 2013 15:58:56 +0300 Subject: netfilter: nft_compat: use _safe version of list_for_each We need to use the _safe version of list_for_each_entry() here otherwise we have a use after free bug. Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 4811f762e060..a82667c64729 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -634,9 +634,9 @@ nft_match_select_ops(const struct nft_ctx *ctx, static void nft_match_release(void) { - struct nft_xt *nft_match; + struct nft_xt *nft_match, *tmp; - list_for_each_entry(nft_match, &nft_match_list, head) + list_for_each_entry_safe(nft_match, tmp, &nft_match_list, head) kfree(nft_match); } @@ -705,9 +705,9 @@ nft_target_select_ops(const struct nft_ctx *ctx, static void nft_target_release(void) { - struct nft_xt *nft_target; + struct nft_xt *nft_target, *tmp; - list_for_each_entry(nft_target, &nft_target_list, head) + list_for_each_entry_safe(nft_target, tmp, &nft_target_list, head) kfree(nft_target); } -- cgit v1.2.3 From cdbe7c2d6d485459801838c244c409b938e32df5 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 7 Nov 2013 19:59:19 +0100 Subject: nfnetlink: do not ack malformed messages Commit 0628b123c96d ("netfilter: nfnetlink: add batch support and use it from nf_tables") introduced a bug leading to various crashes in netlink_ack when netlink message with invalid nlmsg_len was sent by an unprivileged user. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/netfilter/nfnetlink.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 027f16af51a0..046aa13b4fea 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -363,13 +363,15 @@ static void nfnetlink_rcv(struct sk_buff *skb) struct net *net = sock_net(skb->sk); int msglen; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return netlink_ack(skb, nlh, -EPERM); - if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) return; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + netlink_ack(skb, nlh, -EPERM); + return; + } + if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) { struct nfgenmsg *nfgenmsg; -- cgit v1.2.3 From 6aafeef03b9d9ecf255f3a80ed85ee070260e1ae Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 6 Nov 2013 17:52:20 +0100 Subject: netfilter: push reasm skb through instead of original frag skbs Pushing original fragments through causes several problems. For example for matching, frags may not be matched correctly. Take following example: On HOSTA do: ip6tables -I INPUT -p icmpv6 -j DROP ip6tables -I INPUT -p icmpv6 -m icmp6 --icmpv6-type 128 -j ACCEPT and on HOSTB you do: ping6 HOSTA -s2000 (MTU is 1500) Incoming echo requests will be filtered out on HOSTA. This issue does not occur with smaller packets than MTU (where fragmentation does not happen) As was discussed previously, the only correct solution seems to be to use reassembled skb instead of separete frags. Doing this has positive side effects in reducing sk_buff by one pointer (nfct_reasm) and also the reams dances in ipvs and conntrack can be removed. Future plan is to remove net/ipv6/netfilter/nf_conntrack_reasm.c entirely and use code in net/ipv6/reassembly.c instead. Signed-off-by: Jiri Pirko Acked-by: Julian Anastasov Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/netfilter/ipvs/ip_vs_core.c | 55 +-------------------------------------- net/netfilter/ipvs/ip_vs_pe_sip.c | 8 +----- 2 files changed, 2 insertions(+), 61 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 34fda62f40f6..4f26ee46b51f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1139,12 +1139,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iph_skb(af, skb, &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (!iph.fragoffs && skb_nfct_reasm(skb)) { - struct sk_buff *reasm = skb_nfct_reasm(skb); - /* Save fw mark for coming frags */ - reasm->ipvs_property = 1; - reasm->mark = skb->mark; - } if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_out_icmp_v6(skb, &related, @@ -1614,12 +1608,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (!iph.fragoffs && skb_nfct_reasm(skb)) { - struct sk_buff *reasm = skb_nfct_reasm(skb); - /* Save fw mark for coming frags. */ - reasm->ipvs_property = 1; - reasm->mark = skb->mark; - } if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum, @@ -1671,9 +1659,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, af, pp, skb, 0, "ip_vs_in: packet continues traversal as normal"); - if (iph.fragoffs && !skb_nfct_reasm(skb)) { + if (iph.fragoffs) { /* Fragment that couldn't be mapped to a conn entry - * and don't have any pointer to a reasm skb * is missing module nf_defrag_ipv6 */ IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); @@ -1755,38 +1742,6 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, #ifdef CONFIG_IP_VS_IPV6 -/* - * AF_INET6 fragment handling - * Copy info from first fragment, to the rest of them. - */ -static unsigned int -ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct sk_buff *reasm = skb_nfct_reasm(skb); - struct net *net; - - /* Skip if not a "replay" from nf_ct_frag6_output or first fragment. - * ipvs_property is set when checking first fragment - * in ip_vs_in() and ip_vs_out(). - */ - if (reasm) - IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property); - if (!reasm || !reasm->ipvs_property) - return NF_ACCEPT; - - net = skb_net(skb); - if (!net_ipvs(net)->enable) - return NF_ACCEPT; - - /* Copy stored fw mark, saved in ip_vs_{in,out} */ - skb->mark = reasm->mark; - - return NF_ACCEPT; -} - /* * AF_INET6 handler in NF_INET_LOCAL_IN chain * Schedule and forward packets from remote clients @@ -1924,14 +1879,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .priority = 100, }, #ifdef CONFIG_IP_VS_IPV6 - /* After mangle & nat fetch 2:nd fragment and following */ - { - .hook = ip_vs_preroute_frag6, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP6_PRI_NAT_DST + 1, - }, /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_reply6, diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 9ef22bdce9f1..bed5f7042529 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -65,7 +65,6 @@ static int get_callid(const char *dptr, unsigned int dataoff, static int ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) { - struct sk_buff *reasm = skb_nfct_reasm(skb); struct ip_vs_iphdr iph; unsigned int dataoff, datalen, matchoff, matchlen; const char *dptr; @@ -79,15 +78,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) /* todo: IPv6 fragments: * I think this only should be done for the first fragment. /HS */ - if (reasm) { - skb = reasm; - dataoff = iph.thoff_reasm + sizeof(struct udphdr); - } else - dataoff = iph.len + sizeof(struct udphdr); + dataoff = iph.len + sizeof(struct udphdr); if (dataoff >= skb->len) return -EINVAL; - /* todo: Check if this will mess-up the reasm skb !!! /HS */ retc = skb_linearize(skb); if (retc < 0) return retc; -- cgit v1.2.3