diff options
author | David S. Miller <davem@davemloft.net> | 2021-07-07 14:00:14 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-07-07 14:00:14 -0700 |
commit | d7fba8ff3e50fb25ffe583bf945df052f6caffa2 (patch) | |
tree | cc04bf37e26e2b4210fc8b27f3e7c75ffbf37624 /net | |
parent | 0e02bf5de46ae30074a2e1a8194a422a84482a1a (diff) | |
parent | d322957ebfb9c21c2c72b66680f7c3ccd724e081 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
Pablo Neira Ayuso says:
====================
Netfilter fixes for net
The following patchset contains Netfilter fixes for net:
1) Do not refresh timeout in SYN_SENT for syn retransmissions.
Add selftest for unreplied TCP connection, from Florian Westphal.
2) Fix null dereference from error path with hardware offload
in nftables.
3) Remove useless nf_ct_gre_keymap_flush() from netns exit path,
from Vasily Averin.
4) Missing rcu read-lock side in ctnetlink helper info dump,
also from Vasily.
5) Do not mark RST in the reply direction coming after SYN packet
for an out-of-sync entry, from Ali Abdallah and Florian Westphal.
6) Add tcp_ignore_invalid_rst sysctl to allow to disable out of
segment RSTs, from Ali.
7) KCSAN fix for nf_conntrack_all_lock(), from Manfred Spraul.
8) Honor NFTA_LAST_SET in nft_last.
9) Fix incorrect arithmetics when restore last_jiffies in nft_last.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 11 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_netlink.c | 3 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_proto.c | 7 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_proto_gre.c | 13 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_proto_tcp.c | 69 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_standalone.c | 10 | ||||
-rw-r--r-- | net/netfilter/nf_tables_api.c | 3 | ||||
-rw-r--r-- | net/netfilter/nft_last.c | 12 |
8 files changed, 84 insertions, 44 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 96ba19fc8155..83c52df85870 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -149,7 +149,15 @@ static void nf_conntrack_all_lock(void) spin_lock(&nf_conntrack_locks_all_lock); - nf_conntrack_locks_all = true; + /* For nf_contrack_locks_all, only the latest time when another + * CPU will see an update is controlled, by the "release" of the + * spin_lock below. + * The earliest time is not controlled, an thus KCSAN could detect + * a race when nf_conntract_lock() reads the variable. + * WRITE_ONCE() is used to ensure the compiler will not + * optimize the write. + */ + WRITE_ONCE(nf_conntrack_locks_all, true); for (i = 0; i < CONNTRACK_LOCKS; i++) { spin_lock(&nf_conntrack_locks[i]); @@ -2457,7 +2465,6 @@ i_see_dead_people: } list_for_each_entry(net, net_exit_list, exit_list) { - nf_conntrack_proto_pernet_fini(net); nf_conntrack_ecache_pernet_fini(net); nf_conntrack_expect_pernet_fini(net); free_percpu(net->ct.stat); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 4e1a9dba7077..e81af33b233b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -218,6 +218,7 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb, if (!help) return 0; + rcu_read_lock(); helper = rcu_dereference(help->helper); if (!helper) goto out; @@ -233,9 +234,11 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb, nla_nest_end(skb, nest_helper); out: + rcu_read_unlock(); return 0; nla_put_failure: + rcu_read_unlock(); return -1; } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 55647409a9be..8f7a9837349c 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -697,13 +697,6 @@ void nf_conntrack_proto_pernet_init(struct net *net) #endif } -void nf_conntrack_proto_pernet_fini(struct net *net) -{ -#ifdef CONFIG_NF_CT_PROTO_GRE - nf_ct_gre_keymap_flush(net); -#endif -} - module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index db11e403d818..728eeb0aea87 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -55,19 +55,6 @@ static inline struct nf_gre_net *gre_pernet(struct net *net) return &net->ct.nf_ct_proto.gre; } -void nf_ct_gre_keymap_flush(struct net *net) -{ - struct nf_gre_net *net_gre = gre_pernet(net); - struct nf_ct_gre_keymap *km, *tmp; - - spin_lock_bh(&keymap_lock); - list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) { - list_del_rcu(&km->list); - kfree_rcu(km, rcu); - } - spin_unlock_bh(&keymap_lock); -} - static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, const struct nf_conntrack_tuple *t) { diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index f7e8baf59b51..3259416f2ea4 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -823,6 +823,22 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } +static bool tcp_can_early_drop(const struct nf_conn *ct) +{ + switch (ct->proto.tcp.state) { + case TCP_CONNTRACK_FIN_WAIT: + case TCP_CONNTRACK_LAST_ACK: + case TCP_CONNTRACK_TIME_WAIT: + case TCP_CONNTRACK_CLOSE: + case TCP_CONNTRACK_CLOSE_WAIT: + return true; + default: + break; + } + + return false; +} + /* Returns verdict for packet, or -1 for invalid. */ int nf_conntrack_tcp_packet(struct nf_conn *ct, struct sk_buff *skb, @@ -1030,10 +1046,30 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, if (index != TCP_RST_SET) break; - if (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) { + /* If we are closing, tuple might have been re-used already. + * last_index, last_ack, and all other ct fields used for + * sequence/window validation are outdated in that case. + * + * As the conntrack can already be expired by GC under pressure, + * just skip validation checks. + */ + if (tcp_can_early_drop(ct)) + goto in_window; + + /* td_maxack might be outdated if we let a SYN through earlier */ + if ((ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) && + ct->proto.tcp.last_index != TCP_SYN_SET) { u32 seq = ntohl(th->seq); - if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) { + /* If we are not in established state and SEQ=0 this is most + * likely an answer to a SYN we let go through above (last_index + * can be updated due to out-of-order ACKs). + */ + if (seq == 0 && !nf_conntrack_tcp_established(ct)) + break; + + if (before(seq, ct->proto.tcp.seen[!dir].td_maxack) && + !tn->tcp_ignore_invalid_rst) { /* Invalid RST */ spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, state, "invalid rst"); @@ -1134,6 +1170,16 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } + + if (index == TCP_SYN_SET && old_state == TCP_CONNTRACK_SYN_SENT) { + /* do not renew timeout on SYN retransmit. + * + * Else port reuse by client or NAT middlebox can keep + * entry alive indefinitely (including nat info). + */ + return NF_ACCEPT; + } + /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection * pickup with loose=1. Avoid large ESTABLISHED timeout. */ @@ -1155,22 +1201,6 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, return NF_ACCEPT; } -static bool tcp_can_early_drop(const struct nf_conn *ct) -{ - switch (ct->proto.tcp.state) { - case TCP_CONNTRACK_FIN_WAIT: - case TCP_CONNTRACK_LAST_ACK: - case TCP_CONNTRACK_TIME_WAIT: - case TCP_CONNTRACK_CLOSE: - case TCP_CONNTRACK_CLOSE_WAIT: - return true; - default: - break; - } - - return false; -} - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> @@ -1437,6 +1467,9 @@ void nf_conntrack_tcp_init_net(struct net *net) */ tn->tcp_be_liberal = 0; + /* If it's non-zero, we turn off RST sequence number check */ + tn->tcp_ignore_invalid_rst = 0; + /* Max number of the retransmitted packets without receiving an (acceptable) * ACK from the destination. If this number is reached, a shorter timer * will be started. diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index f57a951c9b5e..214d9f9e499b 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -579,6 +579,7 @@ enum nf_ct_sysctl_index { #endif NF_SYSCTL_CT_PROTO_TCP_LOOSE, NF_SYSCTL_CT_PROTO_TCP_LIBERAL, + NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST, NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS, NF_SYSCTL_CT_PROTO_TIMEOUT_UDP, NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM, @@ -798,6 +799,14 @@ static struct ctl_table nf_ct_sysctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + [NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST] = { + .procname = "nf_conntrack_tcp_ignore_invalid_rst", + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, [NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = { .procname = "nf_conntrack_tcp_max_retrans", .maxlen = sizeof(u8), @@ -1004,6 +1013,7 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net, XASSIGN(LOOSE, &tn->tcp_loose); XASSIGN(LIBERAL, &tn->tcp_be_liberal); XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans); + XASSIGN(IGNORE_INVALID_RST, &tn->tcp_ignore_invalid_rst); #undef XASSIGN #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 390d4466567f..de182d1f7c4e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3446,7 +3446,8 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, return 0; err_destroy_flow_rule: - nft_flow_rule_destroy(flow); + if (flow) + nft_flow_rule_destroy(flow); err_release_rule: nf_tables_rule_release(&ctx, rule); err_release_expr: diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 913ac45167f2..8088b99f2ee3 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -23,15 +23,21 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr, { struct nft_last_priv *priv = nft_expr_priv(expr); u64 last_jiffies; + u32 last_set = 0; int err; - if (tb[NFTA_LAST_MSECS]) { + if (tb[NFTA_LAST_SET]) { + last_set = ntohl(nla_get_be32(tb[NFTA_LAST_SET])); + if (last_set == 1) + priv->last_set = 1; + } + + if (last_set && tb[NFTA_LAST_MSECS]) { err = nf_msecs_to_jiffies64(tb[NFTA_LAST_MSECS], &last_jiffies); if (err < 0) return err; - priv->last_jiffies = jiffies + (unsigned long)last_jiffies; - priv->last_set = 1; + priv->last_jiffies = jiffies - (unsigned long)last_jiffies; } return 0; |