diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 5 | ||||
-rw-r--r-- | net/ipv4/fib_lookup.h | 3 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 4 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 27 | ||||
-rw-r--r-- | net/ipv4/gre_offload.c | 15 | ||||
-rw-r--r-- | net/ipv4/ip_input.c | 1 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 1 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 16 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 21 | ||||
-rw-r--r-- | net/ipv4/nexthop.c | 270 | ||||
-rw-r--r-- | net/ipv4/proc.c | 50 | ||||
-rw-r--r-- | net/ipv4/route.c | 13 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 9 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_recovery.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 18 | ||||
-rw-r--r-- | net/ipv4/udp.c | 6 | ||||
-rw-r--r-- | net/ipv4/udp_offload.c | 69 |
20 files changed, 409 insertions, 145 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b94fa8eb831b..2ff5d8058ce6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1419,7 +1419,6 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, out: return segs; } -EXPORT_SYMBOL(inet_gso_segment); static struct sk_buff *ipip_gso_segment(struct sk_buff *skb, netdev_features_t features) @@ -1550,7 +1549,6 @@ out: return pp; } -EXPORT_SYMBOL(inet_gro_receive); static struct sk_buff *ipip_gro_receive(struct list_head *head, struct sk_buff *skb) @@ -1636,7 +1634,6 @@ out_unlock: return err; } -EXPORT_SYMBOL(inet_gro_complete); static int ipip_gro_complete(struct sk_buff *skb, int nhoff) { @@ -1871,6 +1868,8 @@ static __net_init int inet_init_net(struct net *net) net->ipv4.sysctl_igmp_llm_reports = 1; net->ipv4.sysctl_igmp_qrv = 2; + net->ipv4.sysctl_fib_notify_on_flag_change = 0; + return 0; } diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 818916b2a04d..aff454ef0fa3 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -39,9 +39,10 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, struct netlink_ext_ack *extack); bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi); int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - struct fib_rt_info *fri, unsigned int flags); + const struct fib_rt_info *fri, unsigned int flags); void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id, const struct nl_info *info, unsigned int nlm_flags); +size_t fib_nlmsg_size(struct fib_info *fi); static inline void fib_result_assign(struct fib_result *res, struct fib_info *fi) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b5400cec4f69..4c38facf91c0 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -452,7 +452,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) return -1; } -static inline size_t fib_nlmsg_size(struct fib_info *fi) +size_t fib_nlmsg_size(struct fib_info *fi) { size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(4) /* RTA_TABLE */ @@ -1733,7 +1733,7 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) #endif int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, - struct fib_rt_info *fri, unsigned int flags) + const struct fib_rt_info *fri, unsigned int flags) { unsigned int nhs = fib_info_num_path(fri->fi); struct fib_info *fi = fri->fi; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 28117c05dc35..60559b708158 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1038,6 +1038,8 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri) void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) { struct fib_alias *fa_match; + struct sk_buff *skb; + int err; rcu_read_lock(); @@ -1045,9 +1047,34 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) if (!fa_match) goto out; + if (fa_match->offload == fri->offload && fa_match->trap == fri->trap) + goto out; + fa_match->offload = fri->offload; fa_match->trap = fri->trap; + if (!net->ipv4.sysctl_fib_notify_on_flag_change) + goto out; + + skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC); + if (!skb) { + err = -ENOBUFS; + goto errout; + } + + err = fib_dump_info(skb, 0, 0, RTM_NEWROUTE, fri, 0); + if (err < 0) { + /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_ROUTE, NULL, GFP_ATOMIC); + goto out; + +errout: + rtnl_set_sk_err(net, RTNLGRP_IPV4_ROUTE, err); out: rcu_read_unlock(); } diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 10bc49bde9a1..1121a9d5fed9 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -15,10 +15,10 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, netdev_features_t features) { int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); + bool need_csum, offload_csum, gso_partial, need_ipsec; struct sk_buff *segs = ERR_PTR(-EINVAL); u16 mac_offset = skb->mac_header; __be16 protocol = skb->protocol; - bool need_csum, gso_partial; u16 mac_len = skb->mac_len; int gre_offset, outer_hlen; @@ -47,6 +47,11 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, if (need_csum) features &= ~NETIF_F_SCTP_CRC; + need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb)); + /* Try to offload checksum if possible */ + offload_csum = !!(need_csum && !need_ipsec && + (skb->dev->features & NETIF_F_HW_CSUM)); + /* segment inner packet. */ segs = skb_mac_gso_segment(skb, features); if (IS_ERR_OR_NULL(segs)) { @@ -100,7 +105,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, } *(pcsum + 1) = 0; - *pcsum = gso_make_checksum(skb, 0); + if (skb->encapsulation || !offload_csum) { + *pcsum = gso_make_checksum(skb, 0); + } else { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = sizeof(*greh); + } } while ((skb = skb->next)); out: return segs; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index b0c244af1e4d..3a025c011971 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -253,6 +253,7 @@ int ip_local_deliver(struct sk_buff *skb) net, NULL, skb, skb->dev, NULL, ip_local_deliver_finish); } +EXPORT_SYMBOL(ip_local_deliver); static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 959b94e32f2b..3aab53beb4ea 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -434,6 +434,7 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } +EXPORT_SYMBOL(ip_output); /* * copy saddr and daddr, possibly using 64bit load/stores diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 64594aa755f0..76a420c76f16 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -317,7 +317,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) } dev->needed_headroom = t_hlen + hlen; - mtu -= (dev->hard_header_len + t_hlen); + mtu -= t_hlen; if (mtu < IPV4_MIN_MTU) mtu = IPV4_MIN_MTU; @@ -347,7 +347,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, nt = netdev_priv(dev); t_hlen = nt->hlen + sizeof(struct iphdr); dev->min_mtu = ETH_MIN_MTU; - dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen; + dev->max_mtu = IP_MAX_MTU - t_hlen; ip_tunnel_add(itn, nt); return nt; @@ -488,11 +488,10 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, int mtu; tunnel_hlen = md ? tunnel_hlen : tunnel->hlen; - pkt_size = skb->len - tunnel_hlen - dev->hard_header_len; + pkt_size = skb->len - tunnel_hlen; if (df) - mtu = dst_mtu(&rt->dst) - dev->hard_header_len - - sizeof(struct iphdr) - tunnel_hlen; + mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen); else mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; @@ -972,7 +971,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) { struct ip_tunnel *tunnel = netdev_priv(dev); int t_hlen = tunnel->hlen + sizeof(struct iphdr); - int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen; + int max_mtu = IP_MAX_MTU - t_hlen; if (new_mtu < ETH_MIN_MTU) return -EINVAL; @@ -1149,10 +1148,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], mtu = ip_tunnel_bind_dev(dev); if (tb[IFLA_MTU]) { - unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen; + unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr)); - mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, - (unsigned int)(max - sizeof(struct iphdr))); + mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max); } err = dev_set_mtu(dev, mtu); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 3cd13e1bc6a7..f9ab1fb219ec 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -61,7 +61,6 @@ #include <linux/export.h> #include <net/net_namespace.h> #include <net/arp.h> -#include <net/dsa.h> #include <net/ip.h> #include <net/ipconfig.h> #include <net/route.h> @@ -218,9 +217,9 @@ static int __init ic_open_devs(void) last = &ic_first_dev; rtnl_lock(); - /* bring loopback and DSA master network devices up first */ + /* bring loopback device up first */ for_each_netdev(&init_net, dev) { - if (!(dev->flags & IFF_LOOPBACK) && !netdev_uses_dsa(dev)) + if (!(dev->flags & IFF_LOOPBACK)) continue; if (dev_change_flags(dev, dev->flags | IFF_UP, NULL) < 0) pr_err("IP-Config: Failed to open %s\n", dev->name); @@ -305,6 +304,9 @@ have_carrier: return 0; } +/* Close all network interfaces except the one we've autoconfigured, and its + * lowers, in case it's a stacked virtual interface. + */ static void __init ic_close_devs(void) { struct ic_device *d, *next; @@ -313,9 +315,20 @@ static void __init ic_close_devs(void) rtnl_lock(); next = ic_first_dev; while ((d = next)) { + bool bring_down = (d != ic_dev); + struct net_device *lower_dev; + struct list_head *iter; + next = d->next; dev = d->dev; - if (d != ic_dev && !netdev_uses_dsa(dev)) { + + netdev_for_each_lower_dev(ic_dev->dev, lower_dev, iter) { + if (dev == lower_dev) { + bring_down = false; + break; + } + } + if (bring_down) { pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags, NULL); } diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index e6dfca426242..f1c6cbdb9e43 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -71,6 +71,7 @@ __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info, static int nh_notifier_single_info_init(struct nh_notifier_info *info, const struct nexthop *nh) { + info->type = NH_NOTIFIER_INFO_TYPE_SINGLE; info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL); if (!info->nh) return -ENOMEM; @@ -85,13 +86,13 @@ static void nh_notifier_single_info_fini(struct nh_notifier_info *info) kfree(info->nh); } -static int nh_notifier_grp_info_init(struct nh_notifier_info *info, - const struct nexthop *nh) +static int nh_notifier_mp_info_init(struct nh_notifier_info *info, + struct nh_group *nhg) { - struct nh_group *nhg = rtnl_dereference(nh->nh_grp); u16 num_nh = nhg->num_nh; int i; + info->type = NH_NOTIFIER_INFO_TYPE_GRP; info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh), GFP_KERNEL); if (!info->nh_grp) @@ -112,27 +113,41 @@ static int nh_notifier_grp_info_init(struct nh_notifier_info *info, return 0; } -static void nh_notifier_grp_info_fini(struct nh_notifier_info *info) +static int nh_notifier_grp_info_init(struct nh_notifier_info *info, + const struct nexthop *nh) +{ + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + + if (nhg->mpath) + return nh_notifier_mp_info_init(info, nhg); + return -EINVAL; +} + +static void nh_notifier_grp_info_fini(struct nh_notifier_info *info, + const struct nexthop *nh) { - kfree(info->nh_grp); + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + + if (nhg->mpath) + kfree(info->nh_grp); } static int nh_notifier_info_init(struct nh_notifier_info *info, const struct nexthop *nh) { info->id = nh->id; - info->is_grp = nh->is_group; - if (info->is_grp) + if (nh->is_group) return nh_notifier_grp_info_init(info, nh); else return nh_notifier_single_info_init(info, nh); } -static void nh_notifier_info_fini(struct nh_notifier_info *info) +static void nh_notifier_info_fini(struct nh_notifier_info *info, + const struct nexthop *nh) { - if (info->is_grp) - nh_notifier_grp_info_fini(info); + if (nh->is_group) + nh_notifier_grp_info_fini(info, nh); else nh_notifier_single_info_fini(info); } @@ -161,7 +176,7 @@ static int call_nexthop_notifiers(struct net *net, err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, event_type, &info); - nh_notifier_info_fini(&info); + nh_notifier_info_fini(&info, nh); return notifier_to_errno(err); } @@ -182,7 +197,7 @@ static int call_nexthop_notifier(struct notifier_block *nb, struct net *net, return err; err = nb->notifier_call(nb, event_type, &info); - nh_notifier_info_fini(&info); + nh_notifier_info_fini(&info, nh); return notifier_to_errno(err); } @@ -209,7 +224,7 @@ static void nexthop_devhash_add(struct net *net, struct nh_info *nhi) hlist_add_head(&nhi->dev_hash, head); } -static void nexthop_free_mpath(struct nexthop *nh) +static void nexthop_free_group(struct nexthop *nh) { struct nh_group *nhg; int i; @@ -249,7 +264,7 @@ void nexthop_free_rcu(struct rcu_head *head) struct nexthop *nh = container_of(head, struct nexthop, rcu); if (nh->is_group) - nexthop_free_mpath(nh); + nexthop_free_group(nh); else nexthop_free_single(nh); @@ -680,21 +695,16 @@ static bool ipv4_good_nh(const struct fib_nh *nh) return !!(state & NUD_VALID); } -struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) +static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash) { struct nexthop *rc = NULL; - struct nh_group *nhg; int i; - if (!nh->is_group) - return nh; - - nhg = rcu_dereference(nh->nh_grp); for (i = 0; i < nhg->num_nh; ++i) { struct nh_grp_entry *nhge = &nhg->nh_entries[i]; struct nh_info *nhi; - if (hash > atomic_read(&nhge->upper_bound)) + if (hash > atomic_read(&nhge->mpath.upper_bound)) continue; nhi = rcu_dereference(nhge->nh->nh_info); @@ -721,6 +731,21 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) return rc; } + +struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) +{ + struct nh_group *nhg; + + if (!nh->is_group) + return nh; + + nhg = rcu_dereference(nh->nh_grp); + if (nhg->mpath) + return nexthop_select_path_mp(nhg, hash); + + /* Unreachable. */ + return NULL; +} EXPORT_SYMBOL_GPL(nexthop_select_path); int nexthop_for_each_fib6_nh(struct nexthop *nh, @@ -914,7 +939,7 @@ static void nh_group_rebalance(struct nh_group *nhg) w += nhge->weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1; - atomic_set(&nhge->upper_bound, upper_bound); + atomic_set(&nhge->mpath.upper_bound, upper_bound); } } @@ -1456,10 +1481,13 @@ static struct nexthop *nexthop_create_group(struct net *net, nhg->nh_entries[i].nh_parent = nh; } - if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) { + if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) nhg->mpath = 1; + + WARN_ON_ONCE(nhg->mpath != 1); + + if (nhg->mpath) nh_group_rebalance(nhg); - } if (cfg->nh_fdb) nhg->fdb_nh = 1; @@ -1844,37 +1872,44 @@ static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } -static int nh_valid_get_del_req(struct nlmsghdr *nlh, u32 *id, - struct netlink_ext_ack *extack) +static int __nh_valid_get_del_req(const struct nlmsghdr *nlh, + struct nlattr **tb, u32 *id, + struct netlink_ext_ack *extack) { struct nhmsg *nhm = nlmsg_data(nlh); - struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)]; - int err; - err = nlmsg_parse(nlh, sizeof(*nhm), tb, - ARRAY_SIZE(rtm_nh_policy_get) - 1, - rtm_nh_policy_get, extack); - if (err < 0) - return err; - - err = -EINVAL; if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { NL_SET_ERR_MSG(extack, "Invalid values in header"); - goto out; + return -EINVAL; } if (!tb[NHA_ID]) { NL_SET_ERR_MSG(extack, "Nexthop id is missing"); - goto out; + return -EINVAL; } *id = nla_get_u32(tb[NHA_ID]); - if (!(*id)) + if (!(*id)) { NL_SET_ERR_MSG(extack, "Invalid nexthop id"); - else - err = 0; -out: - return err; + return -EINVAL; + } + + return 0; +} + +static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)]; + int err; + + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_get) - 1, + rtm_nh_policy_get, extack); + if (err < 0) + return err; + + return __nh_valid_get_del_req(nlh, tb, id, extack); } /* rtnl */ @@ -1943,16 +1978,23 @@ errout_free: goto out; } -static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx, - bool group_filter, u8 family) +struct nh_dump_filter { + int dev_idx; + int master_idx; + bool group_filter; + bool fdb_filter; +}; + +static bool nh_dump_filtered(struct nexthop *nh, + struct nh_dump_filter *filter, u8 family) { const struct net_device *dev; const struct nh_info *nhi; - if (group_filter && !nh->is_group) + if (filter->group_filter && !nh->is_group) return true; - if (!dev_idx && !master_idx && !family) + if (!filter->dev_idx && !filter->master_idx && !family) return false; if (nh->is_group) @@ -1963,46 +2005,37 @@ static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx, return true; dev = nhi->fib_nhc.nhc_dev; - if (dev_idx && (!dev || dev->ifindex != dev_idx)) + if (filter->dev_idx && (!dev || dev->ifindex != filter->dev_idx)) return true; - if (master_idx) { + if (filter->master_idx) { struct net_device *master; if (!dev) return true; master = netdev_master_upper_dev_get((struct net_device *)dev); - if (!master || master->ifindex != master_idx) + if (!master || master->ifindex != filter->master_idx) return true; } return false; } -static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, - int *master_idx, bool *group_filter, - bool *fdb_filter, struct netlink_callback *cb) +static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb, + struct nh_dump_filter *filter, + struct netlink_ext_ack *extack) { - struct netlink_ext_ack *extack = cb->extack; - struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)]; struct nhmsg *nhm; - int err; u32 idx; - err = nlmsg_parse(nlh, sizeof(*nhm), tb, - ARRAY_SIZE(rtm_nh_policy_dump) - 1, - rtm_nh_policy_dump, NULL); - if (err < 0) - return err; - if (tb[NHA_OIF]) { idx = nla_get_u32(tb[NHA_OIF]); if (idx > INT_MAX) { NL_SET_ERR_MSG(extack, "Invalid device index"); return -EINVAL; } - *dev_idx = idx; + filter->dev_idx = idx; } if (tb[NHA_MASTER]) { idx = nla_get_u32(tb[NHA_MASTER]); @@ -2010,10 +2043,10 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, NL_SET_ERR_MSG(extack, "Invalid master device index"); return -EINVAL; } - *master_idx = idx; + filter->master_idx = idx; } - *group_filter = nla_get_flag(tb[NHA_GROUPS]); - *fdb_filter = nla_get_flag(tb[NHA_FDB]); + filter->group_filter = nla_get_flag(tb[NHA_GROUPS]); + filter->fdb_filter = nla_get_flag(tb[NHA_FDB]); nhm = nlmsg_data(nlh); if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { @@ -2024,24 +2057,49 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, return 0; } -/* rtnl */ -static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) +static int nh_valid_dump_req(const struct nlmsghdr *nlh, + struct nh_dump_filter *filter, + struct netlink_callback *cb) { - bool group_filter = false, fdb_filter = false; - struct nhmsg *nhm = nlmsg_data(cb->nlh); - int dev_filter_idx = 0, master_idx = 0; - struct net *net = sock_net(skb->sk); - struct rb_root *root = &net->nexthop.rb_root; - struct rb_node *node; - int idx = 0, s_idx; + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)]; int err; - err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx, - &group_filter, &fdb_filter, cb); + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_dump) - 1, + rtm_nh_policy_dump, cb->extack); if (err < 0) return err; - s_idx = cb->args[0]; + return __nh_valid_dump_req(nlh, tb, filter, cb->extack); +} + +struct rtm_dump_nh_ctx { + u32 idx; +}; + +static struct rtm_dump_nh_ctx * +rtm_dump_nh_ctx(struct netlink_callback *cb) +{ + struct rtm_dump_nh_ctx *ctx = (void *)cb->ctx; + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); + return ctx; +} + +static int rtm_dump_walk_nexthops(struct sk_buff *skb, + struct netlink_callback *cb, + struct rb_root *root, + struct rtm_dump_nh_ctx *ctx, + int (*nh_cb)(struct sk_buff *skb, + struct netlink_callback *cb, + struct nexthop *nh, void *data), + void *data) +{ + struct rb_node *node; + int idx = 0, s_idx; + int err; + + s_idx = ctx->idx; for (node = rb_first(root); node; node = rb_next(node)) { struct nexthop *nh; @@ -2049,30 +2107,58 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) goto cont; nh = rb_entry(node, struct nexthop, rb_node); - if (nh_dump_filtered(nh, dev_filter_idx, master_idx, - group_filter, nhm->nh_family)) - goto cont; - - err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI); - if (err < 0) { - if (likely(skb->len)) - goto out; - - goto out_err; - } + ctx->idx = idx; + err = nh_cb(skb, cb, nh, data); + if (err) + return err; cont: idx++; } + ctx->idx = idx; + return 0; +} + +static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb, + struct nexthop *nh, void *data) +{ + struct nhmsg *nhm = nlmsg_data(cb->nlh); + struct nh_dump_filter *filter = data; + + if (nh_dump_filtered(nh, filter, nhm->nh_family)) + return 0; + + return nh_fill_node(skb, nh, RTM_NEWNEXTHOP, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI); +} + +/* rtnl */ +static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb); + struct net *net = sock_net(skb->sk); + struct rb_root *root = &net->nexthop.rb_root; + struct nh_dump_filter filter = {}; + int err; + + err = nh_valid_dump_req(cb->nlh, &filter, cb); + if (err < 0) + return err; + + err = rtm_dump_walk_nexthops(skb, cb, root, ctx, + &rtm_dump_nexthop_cb, &filter); + if (err < 0) { + if (likely(skb->len)) + goto out; + goto out_err; + } + out: err = skb->len; out_err: - cb->args[0] = idx; cb->seq = net->nexthop.seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); - return err; } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 63cd370ea29d..6d46297a99f8 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -464,30 +464,52 @@ static int snmp_seq_show(struct seq_file *seq, void *v) */ static int netstat_seq_show(struct seq_file *seq, void *v) { - int i; + const int ip_cnt = ARRAY_SIZE(snmp4_ipextstats_list) - 1; + const int tcp_cnt = ARRAY_SIZE(snmp4_net_list) - 1; struct net *net = seq->private; + unsigned long *buff; + int i; seq_puts(seq, "TcpExt:"); - for (i = 0; snmp4_net_list[i].name; i++) + for (i = 0; i < tcp_cnt; i++) seq_printf(seq, " %s", snmp4_net_list[i].name); seq_puts(seq, "\nTcpExt:"); - for (i = 0; snmp4_net_list[i].name; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.net_statistics, - snmp4_net_list[i].entry)); - + buff = kzalloc(max(tcp_cnt * sizeof(long), ip_cnt * sizeof(u64)), + GFP_KERNEL); + if (buff) { + snmp_get_cpu_field_batch(buff, snmp4_net_list, + net->mib.net_statistics); + for (i = 0; i < tcp_cnt; i++) + seq_printf(seq, " %lu", buff[i]); + } else { + for (i = 0; i < tcp_cnt; i++) + seq_printf(seq, " %lu", + snmp_fold_field(net->mib.net_statistics, + snmp4_net_list[i].entry)); + } seq_puts(seq, "\nIpExt:"); - for (i = 0; snmp4_ipextstats_list[i].name; i++) + for (i = 0; i < ip_cnt; i++) seq_printf(seq, " %s", snmp4_ipextstats_list[i].name); seq_puts(seq, "\nIpExt:"); - for (i = 0; snmp4_ipextstats_list[i].name; i++) - seq_printf(seq, " %llu", - snmp_fold_field64(net->mib.ip_statistics, - snmp4_ipextstats_list[i].entry, - offsetof(struct ipstats_mib, syncp))); - + if (buff) { + u64 *buff64 = (u64 *)buff; + + memset(buff64, 0, ip_cnt * sizeof(u64)); + snmp_get_cpu_field64_batch(buff64, snmp4_ipextstats_list, + net->mib.ip_statistics, + offsetof(struct ipstats_mib, syncp)); + for (i = 0; i < ip_cnt; i++) + seq_printf(seq, " %llu", buff64[i]); + } else { + for (i = 0; i < ip_cnt; i++) + seq_printf(seq, " %llu", + snmp_fold_field64(net->mib.ip_statistics, + snmp4_ipextstats_list[i].entry, + offsetof(struct ipstats_mib, syncp))); + } + kfree(buff); seq_putc(seq, '\n'); mptcp_seq_show(seq); return 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e26652ff7059..be31e2446470 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -133,9 +133,11 @@ static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; * Interface to generic destination cache. */ -static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); +INDIRECT_CALLABLE_SCOPE +struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ipv4_default_advmss(const struct dst_entry *dst); -static unsigned int ipv4_mtu(const struct dst_entry *dst); +INDIRECT_CALLABLE_SCOPE +unsigned int ipv4_mtu(const struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, @@ -1187,7 +1189,8 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) } EXPORT_SYMBOL_GPL(ipv4_sk_redirect); -static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) +INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst, + u32 cookie) { struct rtable *rt = (struct rtable *) dst; @@ -1203,6 +1206,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) return NULL; return dst; } +EXPORT_INDIRECT_CALLABLE(ipv4_dst_check); static void ipv4_send_dest_unreach(struct sk_buff *skb) { @@ -1311,7 +1315,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) return min(advmss, IPV4_MAX_PMTU - header_size); } -static unsigned int ipv4_mtu(const struct dst_entry *dst) +INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst) { const struct rtable *rt = (const struct rtable *)dst; unsigned int mtu = rt->rt_pmtu; @@ -1333,6 +1337,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } +EXPORT_INDIRECT_CALLABLE(ipv4_mtu); static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr) { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3e5f4f2e705e..e5798b3b59d2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1354,6 +1354,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE }, + { + .procname = "fib_notify_on_flag_change", + .data = &init_net.ipv4.sysctl_fib_notify_on_flag_change, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, { } }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d4f66aba9fd8..a8f8f9815953 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2859,7 +2859,8 @@ static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag) } else if (tcp_is_rack(sk)) { u32 prior_retrans = tp->retrans_out; - tcp_rack_mark_lost(sk); + if (tcp_rack_mark_lost(sk)) + *ack_flag &= ~FLAG_SET_XMIT_TIMER; if (prior_retrans > tp->retrans_out) *ack_flag |= FLAG_LOST_RETRANS; } @@ -3392,8 +3393,8 @@ static void tcp_ack_probe(struct sock *sk) } else { unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX); - tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - when, TCP_RTO_MAX); + when = tcp_clamp_probe0_to_user_timeout(sk, when); + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX); } } @@ -3816,9 +3817,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); - /* If needed, reset TLP/RTO timer; RACK may later override this. */ - if (flag & FLAG_SET_XMIT_TIMER) - tcp_set_xmit_timer(sk); if (tcp_ack_is_dubious(sk, flag)) { if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) { @@ -3831,6 +3829,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) &rexmit); } + /* If needed, reset TLP/RTO timer when RACK doesn't set. */ + if (flag & FLAG_SET_XMIT_TIMER) + tcp_set_xmit_timer(sk); + if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) sk_dst_confirm(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 777306b5bc22..611039207d30 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1649,6 +1649,8 @@ u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, return mss; } +INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, + u32)); /* The socket must have it's spinlock held when we get * here, unless it is a TCP_LISTEN socket. * @@ -1668,7 +1670,8 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) sk_mark_napi_id(sk, skb); if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || - !dst->ops->check(dst, 0)) { + !INDIRECT_CALL_1(dst->ops->check, ipv4_dst_check, + dst, 0)) { dst_release(dst); sk->sk_rx_dst = NULL; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b0a6b0b30a6b..fbf140a770d8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -4099,6 +4099,8 @@ void tcp_send_probe0(struct sock *sk) */ timeout = TCP_RESOURCE_PROBE_INTERVAL; } + + timeout = tcp_clamp_probe0_to_user_timeout(sk, timeout); tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX); } diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 177307a3081f..6f1b4ac7fe99 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -96,13 +96,13 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) } } -void tcp_rack_mark_lost(struct sock *sk) +bool tcp_rack_mark_lost(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); u32 timeout; if (!tp->rack.advanced) - return; + return false; /* Reset the advanced flag to avoid unnecessary queue scanning */ tp->rack.advanced = 0; @@ -112,6 +112,7 @@ void tcp_rack_mark_lost(struct sock *sk) inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT, timeout, inet_csk(sk)->icsk_rto); } + return !!timeout; } /* Record the most recently (re)sent time among the (s)acked packets diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index faa92948441b..4ef08079ccfa 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -40,6 +40,24 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining)); } +u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + u32 remaining; + s32 elapsed; + + if (!icsk->icsk_user_timeout || !icsk->icsk_probes_tstamp) + return when; + + elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp; + if (unlikely(elapsed < 0)) + elapsed = 0; + remaining = msecs_to_jiffies(icsk->icsk_user_timeout) - elapsed; + remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN); + + return min_t(u32, remaining, when); +} + /** * tcp_write_err() - close socket and save error info * @sk: The socket the error has appeared on. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 69ea76578abb..48208fb4e895 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -596,6 +596,12 @@ void udp_encap_enable(void) } EXPORT_SYMBOL(udp_encap_enable); +void udp_encap_disable(void) +{ + static_branch_dec(&udp_encap_needed_key); +} +EXPORT_SYMBOL(udp_encap_disable); + /* Handler for tunnels with arbitrary destination ports: no socket lookup, go * through error handlers in encapsulations looking for a match. */ diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 41249705d9e9..b76c48efd37e 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -187,8 +187,67 @@ out_unlock: } EXPORT_SYMBOL(skb_udp_tunnel_segment); +static void __udpv4_gso_segment_csum(struct sk_buff *seg, + __be32 *oldip, __be32 *newip, + __be16 *oldport, __be16 *newport) +{ + struct udphdr *uh; + struct iphdr *iph; + + if (*oldip == *newip && *oldport == *newport) + return; + + uh = udp_hdr(seg); + iph = ip_hdr(seg); + + if (uh->check) { + inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip, + true); + inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport, + false); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + *oldport = *newport; + + csum_replace4(&iph->check, *oldip, *newip); + *oldip = *newip; +} + +static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs) +{ + struct sk_buff *seg; + struct udphdr *uh, *uh2; + struct iphdr *iph, *iph2; + + seg = segs; + uh = udp_hdr(seg); + iph = ip_hdr(seg); + + if ((udp_hdr(seg)->dest == udp_hdr(seg->next)->dest) && + (udp_hdr(seg)->source == udp_hdr(seg->next)->source) && + (ip_hdr(seg)->daddr == ip_hdr(seg->next)->daddr) && + (ip_hdr(seg)->saddr == ip_hdr(seg->next)->saddr)) + return segs; + + while ((seg = seg->next)) { + uh2 = udp_hdr(seg); + iph2 = ip_hdr(seg); + + __udpv4_gso_segment_csum(seg, + &iph2->saddr, &iph->saddr, + &uh2->source, &uh->source); + __udpv4_gso_segment_csum(seg, + &iph2->daddr, &iph->daddr, + &uh2->dest, &uh->dest); + } + + return segs; +} + static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, - netdev_features_t features) + netdev_features_t features, + bool is_ipv6) { unsigned int mss = skb_shinfo(skb)->gso_size; @@ -198,11 +257,11 @@ static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss); - return skb; + return is_ipv6 ? skb : __udpv4_gso_segment_list_csum(skb); } struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, - netdev_features_t features) + netdev_features_t features, bool is_ipv6) { struct sock *sk = gso_skb->sk; unsigned int sum_truesize = 0; @@ -214,7 +273,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, __be16 newlen; if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) - return __udp_gso_segment_list(gso_skb, features); + return __udp_gso_segment_list(gso_skb, features, is_ipv6); mss = skb_shinfo(gso_skb)->gso_size; if (gso_skb->len <= sizeof(*uh) + mss) @@ -328,7 +387,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) - return __udp_gso_segment(skb, features); + return __udp_gso_segment(skb, features, false); mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) |