diff options
Diffstat (limited to 'include/net')
-rw-r--r-- | include/net/checksum.h | 4 | ||||
-rw-r--r-- | include/net/geneve.h | 5 | ||||
-rw-r--r-- | include/net/inet_connection_sock.h | 5 | ||||
-rw-r--r-- | include/net/inet_frag.h | 2 | ||||
-rw-r--r-- | include/net/inet_hashtables.h | 49 | ||||
-rw-r--r-- | include/net/ip.h | 16 | ||||
-rw-r--r-- | include/net/ip6_fib.h | 12 | ||||
-rw-r--r-- | include/net/ip6_route.h | 21 | ||||
-rw-r--r-- | include/net/ipv6.h | 5 | ||||
-rw-r--r-- | include/net/net_namespace.h | 1 | ||||
-rw-r--r-- | include/net/netns/ipv4.h | 3 | ||||
-rw-r--r-- | include/net/sock.h | 9 | ||||
-rw-r--r-- | include/net/switchdev.h | 50 | ||||
-rw-r--r-- | include/net/tcp.h | 50 |
14 files changed, 153 insertions, 79 deletions
diff --git a/include/net/checksum.h b/include/net/checksum.h index 0a55ac715077..2d1d73cb773e 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -122,7 +122,9 @@ static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { - *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), from), to)); + __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); + + *sum = csum_fold(csum_add(tmp, (__force __wsum)to)); } /* Implements RFC 1624 (Incremental Internet Checksum) diff --git a/include/net/geneve.h b/include/net/geneve.h index 14fb8d3390b4..2a0543a1899d 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -62,6 +62,11 @@ struct genevehdr { struct geneve_opt options[]; }; +static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) +{ + return (struct genevehdr *)(udp_hdr(skb) + 1); +} + #ifdef CONFIG_INET struct geneve_sock; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 48a815823587..497bc14cdb85 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -129,9 +129,10 @@ struct inet_connection_sock { u32 probe_timestamp; } icsk_mtup; - u32 icsk_ca_priv[16]; u32 icsk_user_timeout; -#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) + + u64 icsk_ca_priv[64 / sizeof(u64)]; +#define ICSK_CA_PRIV_SIZE (8 * sizeof(u64)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 8d1765577acc..e1300b3dd597 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -43,7 +43,7 @@ enum { * @len: total length of the original datagram * @meat: length of received fragments so far * @flags: fragment queue flags - * @max_size: (ipv4 only) maximum received fragment size with IP_DF set + * @max_size: maximum received fragment size * @net: namespace that this frag belongs to */ struct inet_frag_queue { diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 73fe0f9525d9..b73c88a19dd4 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -24,7 +24,6 @@ #include <linux/spinlock.h> #include <linux/types.h> #include <linux/wait.h> -#include <linux/vmalloc.h> #include <net/inet_connection_sock.h> #include <net/inet_sock.h> @@ -148,8 +147,6 @@ struct inet_hashinfo { */ struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] ____cacheline_aligned_in_smp; - - atomic_t bsockets; }; static inline struct inet_ehash_bucket *inet_ehash_bucket( @@ -166,52 +163,12 @@ static inline spinlock_t *inet_ehash_lockp( return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask]; } -static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) -{ - unsigned int i, size = 256; -#if defined(CONFIG_PROVE_LOCKING) - unsigned int nr_pcpus = 2; -#else - unsigned int nr_pcpus = num_possible_cpus(); -#endif - if (nr_pcpus >= 4) - size = 512; - if (nr_pcpus >= 8) - size = 1024; - if (nr_pcpus >= 16) - size = 2048; - if (nr_pcpus >= 32) - size = 4096; - if (sizeof(spinlock_t) != 0) { -#ifdef CONFIG_NUMA - if (size * sizeof(spinlock_t) > PAGE_SIZE) - hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t)); - else -#endif - hashinfo->ehash_locks = kmalloc(size * sizeof(spinlock_t), - GFP_KERNEL); - if (!hashinfo->ehash_locks) - return ENOMEM; - for (i = 0; i < size; i++) - spin_lock_init(&hashinfo->ehash_locks[i]); - } - hashinfo->ehash_locks_mask = size - 1; - return 0; -} +int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo); static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) { - if (hashinfo->ehash_locks) { -#ifdef CONFIG_NUMA - unsigned int size = (hashinfo->ehash_locks_mask + 1) * - sizeof(spinlock_t); - if (size > PAGE_SIZE) - vfree(hashinfo->ehash_locks); - else -#endif - kfree(hashinfo->ehash_locks); - hashinfo->ehash_locks = NULL; - } + kvfree(hashinfo->ehash_locks); + hashinfo->ehash_locks = NULL; } struct inet_bind_bucket * diff --git a/include/net/ip.h b/include/net/ip.h index 0ed6d768e606..9b976cf99122 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -45,6 +45,7 @@ struct inet_skb_parm { #define IPSKB_FRAG_COMPLETE BIT(3) #define IPSKB_REROUTED BIT(4) #define IPSKB_DOREDIRECT BIT(5) +#define IPSKB_FRAG_PMTU BIT(6) u16 frag_max_size; }; @@ -108,9 +109,8 @@ int ip_local_deliver(struct sk_buff *skb); int ip_mr_input(struct sk_buff *skb); int ip_output(struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct sock *sk, struct sk_buff *skb); -int ip_fragment(struct sock *sk, struct sk_buff *skb, - int (*output)(struct sock *, struct sk_buff *)); -int ip_do_nat(struct sk_buff *skb); +int ip_do_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)); void ip_send_check(struct iphdr *ip); int __ip_local_out(struct sk_buff *skb); int ip_local_out_sk(struct sock *sk, struct sk_buff *skb); @@ -478,6 +478,16 @@ enum ip_defrag_users { IP_DEFRAG_MACVLAN, }; +/* Return true if the value of 'user' is between 'lower_bond' + * and 'upper_bond' inclusively. + */ +static inline bool ip_defrag_user_in_between(u32 user, + enum ip_defrag_users lower_bond, + enum ip_defrag_users upper_bond) +{ + return user >= lower_bond && user <= upper_bond; +} + int ip_defrag(struct sk_buff *skb, u32 user); #ifdef CONFIG_INET struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index e00018011028..3b76849c190f 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -120,7 +120,11 @@ struct rt6_info { struct rt6key rt6i_src; struct rt6key rt6i_prefsrc; + struct list_head rt6i_uncached; + struct uncached_list *rt6i_uncached_list; + struct inet6_dev *rt6i_idev; + struct rt6_info * __percpu *rt6i_pcpu; u32 rt6i_metric; u32 rt6i_pmtu; @@ -159,6 +163,14 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) rt0->rt6i_flags |= RTF_EXPIRES; } +static inline u32 rt6_get_cookie(const struct rt6_info *rt) +{ + if (rt->rt6i_flags & RTF_PCPU || unlikely(rt->dst.flags & DST_NOCACHE)) + rt = (struct rt6_info *)(rt->dst.from); + + return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; +} + static inline void ip6_rt_put(struct rt6_info *rt) { /* dst_release() accepts a NULL parameter. diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 5e192068e6cb..297629aadb19 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -145,7 +145,7 @@ static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_IPV6_SUBTREES np->saddr_cache = saddr; #endif - np->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; + np->dst_cookie = rt6_get_cookie(rt); } static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, @@ -163,11 +163,14 @@ static inline bool ipv6_unicast_destination(const struct sk_buff *skb) return rt->rt6i_flags & RTF_LOCAL; } -static inline bool ipv6_anycast_destination(const struct sk_buff *skb) +static inline bool ipv6_anycast_destination(const struct dst_entry *dst, + const struct in6_addr *daddr) { - struct rt6_info *rt = (struct rt6_info *) skb_dst(skb); + struct rt6_info *rt = (struct rt6_info *)dst; - return rt->rt6i_flags & RTF_ANYCAST; + return rt->rt6i_flags & RTF_ANYCAST || + (rt->rt6i_dst.plen != 128 && + ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); } int ip6_fragment(struct sock *sk, struct sk_buff *skb, @@ -194,9 +197,15 @@ static inline bool ip6_sk_ignore_df(const struct sock *sk) inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; } -static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt) +static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, + struct in6_addr *daddr) { - return &rt->rt6i_gateway; + if (rt->rt6i_flags & RTF_GATEWAY) + return &rt->rt6i_gateway; + else if (unlikely(rt->rt6i_flags & RTF_CACHE)) + return &rt->rt6i_dst.addr; + else + return daddr; } #endif diff --git a/include/net/ipv6.h b/include/net/ipv6.h index aab8190d16e8..35d485c78080 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -671,8 +671,9 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } -void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr, - struct rt6_info *rt); +__be32 ipv6_select_ident(struct net *net, + const struct in6_addr *daddr, + const struct in6_addr *saddr); void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); int ip6_dst_hoplimit(struct dst_entry *dst); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 3f850acc844e..72eb23723294 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -58,6 +58,7 @@ struct net { struct list_head exit_list; /* Use only net_mutex */ struct user_namespace *user_ns; /* Owning user namespace */ + spinlock_t nsid_lock; struct idr netns_ids; struct ns_common ns; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 614a49be68a9..c68926b4899c 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -19,6 +19,7 @@ struct sock; struct local_ports { seqlock_t lock; int range[2]; + bool warned; }; struct ping_group_range { @@ -77,6 +78,8 @@ struct netns_ipv4 { struct local_ports ip_local_ports; int sysctl_tcp_ecn; + int sysctl_tcp_ecn_fallback; + int sysctl_ip_no_pmtu_disc; int sysctl_ip_fwd_use_pmtu; int sysctl_ip_nonlocal_bind; diff --git a/include/net/sock.h b/include/net/sock.h index d882f4c8e438..26c1c3171e00 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1368,7 +1368,7 @@ static inline struct inode *SOCK_INODE(struct socket *socket) * Functions for memory accounting */ int __sk_mem_schedule(struct sock *sk, int size, int kind); -void __sk_mem_reclaim(struct sock *sk); +void __sk_mem_reclaim(struct sock *sk, int amount); #define SK_MEM_QUANTUM ((int)PAGE_SIZE) #define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) @@ -1409,7 +1409,7 @@ static inline void sk_mem_reclaim(struct sock *sk) if (!sk_has_account(sk)) return; if (sk->sk_forward_alloc >= SK_MEM_QUANTUM) - __sk_mem_reclaim(sk); + __sk_mem_reclaim(sk, sk->sk_forward_alloc); } static inline void sk_mem_reclaim_partial(struct sock *sk) @@ -1417,7 +1417,7 @@ static inline void sk_mem_reclaim_partial(struct sock *sk) if (!sk_has_account(sk)) return; if (sk->sk_forward_alloc > SK_MEM_QUANTUM) - __sk_mem_reclaim(sk); + __sk_mem_reclaim(sk, sk->sk_forward_alloc - 1); } static inline void sk_mem_charge(struct sock *sk, int size) @@ -2025,7 +2025,8 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk) } } -struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp); +struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, + bool force_schedule); /** * sk_page_frag - return an appropriate page_frag diff --git a/include/net/switchdev.h b/include/net/switchdev.h index ea5b1c230d3d..437f8fe75705 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -47,11 +47,13 @@ enum switchdev_obj_id { SWITCHDEV_OBJ_UNDEFINED, SWITCHDEV_OBJ_PORT_VLAN, SWITCHDEV_OBJ_IPV4_FIB, + SWITCHDEV_OBJ_PORT_FDB, }; struct switchdev_obj { enum switchdev_obj_id id; enum switchdev_trans trans; + int (*cb)(struct net_device *dev, struct switchdev_obj *obj); union { struct switchdev_obj_vlan { /* PORT_VLAN */ u16 flags; @@ -67,6 +69,10 @@ struct switchdev_obj { u32 nlflags; u32 tb_id; } ipv4_fib; + struct switchdev_obj_fdb { /* PORT_FDB */ + const unsigned char *addr; + u16 vid; + } fdb; } u; }; @@ -80,6 +86,8 @@ struct switchdev_obj { * @switchdev_port_obj_add: Add an object to port (see switchdev_obj). * * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj). + * + * @switchdev_port_obj_dump: Dump port objects (see switchdev_obj). */ struct switchdev_ops { int (*switchdev_port_attr_get)(struct net_device *dev, @@ -90,6 +98,8 @@ struct switchdev_ops { struct switchdev_obj *obj); int (*switchdev_port_obj_del)(struct net_device *dev, struct switchdev_obj *obj); + int (*switchdev_port_obj_dump)(struct net_device *dev, + struct switchdev_obj *obj); }; enum switchdev_notifier_type { @@ -121,6 +131,7 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr); int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj); int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj); +int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj); int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); int call_switchdev_notifiers(unsigned long val, struct net_device *dev, @@ -137,6 +148,15 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 tb_id); void switchdev_fib_ipv4_abort(struct fib_info *fi); +int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, + u16 vid, u16 nlm_flags); +int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, + u16 vid); +int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, + struct net_device *filter_dev, int idx); #else @@ -164,6 +184,12 @@ static inline int switchdev_port_obj_del(struct net_device *dev, return -EOPNOTSUPP; } +static inline int switchdev_port_obj_dump(struct net_device *dev, + struct switchdev_obj *obj) +{ + return -EOPNOTSUPP; +} + static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; @@ -221,6 +247,30 @@ static inline void switchdev_fib_ipv4_abort(struct fib_info *fi) { } +static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, + u16 vid, u16 nlm_flags) +{ + return -EOPNOTSUPP; +} + +static inline int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + return -EOPNOTSUPP; +} + +static inline int switchdev_port_fdb_dump(struct sk_buff *skb, + struct netlink_callback *cb, + struct net_device *dev, + struct net_device *filter_dev, + int idx) +{ + return -EOPNOTSUPP; +} + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/include/net/tcp.h b/include/net/tcp.h index b8ea12880fd9..2bb2bad21d5c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -286,6 +286,14 @@ extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; +/* optimized version of sk_under_memory_pressure() for TCP sockets */ +static inline bool tcp_under_memory_pressure(const struct sock *sk) +{ + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + return !!sk->sk_cgrp->memory_pressure; + + return tcp_memory_pressure; +} /* * The next routines deal with comparing 32 bit unsigned ints * and worry about wraparound (automatic with unsigned arithmetic). @@ -311,6 +319,8 @@ static inline bool tcp_out_of_memory(struct sock *sk) return false; } +void sk_forced_mem_schedule(struct sock *sk, int size); + static inline bool tcp_too_many_orphans(struct sock *sk, int shift) { struct percpu_counter *ocp = sk->sk_prot->orphan_count; @@ -326,18 +336,6 @@ static inline bool tcp_too_many_orphans(struct sock *sk, int shift) bool tcp_check_oom(struct sock *sk, int shift); -/* syncookies: remember time of last synqueue overflow */ -static inline void tcp_synq_overflow(struct sock *sk) -{ - tcp_sk(sk)->rx_opt.ts_recent_stamp = jiffies; -} - -/* syncookies: no recent synqueue overflow on this listening socket? */ -static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) -{ - unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; - return time_after(jiffies, last_overflow + TCP_TIMEOUT_FALLBACK); -} extern struct proto tcp_prot; @@ -483,13 +481,35 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); * i.e. a sent cookie is valid only at most for 2*60 seconds (or less if * the counter advances immediately after a cookie is generated). */ -#define MAX_SYNCOOKIE_AGE 2 +#define MAX_SYNCOOKIE_AGE 2 +#define TCP_SYNCOOKIE_PERIOD (60 * HZ) +#define TCP_SYNCOOKIE_VALID (MAX_SYNCOOKIE_AGE * TCP_SYNCOOKIE_PERIOD) + +/* syncookies: remember time of last synqueue overflow + * But do not dirty this field too often (once per second is enough) + */ +static inline void tcp_synq_overflow(struct sock *sk) +{ + unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned long now = jiffies; + + if (time_after(now, last_overflow + HZ)) + tcp_sk(sk)->rx_opt.ts_recent_stamp = now; +} + +/* syncookies: no recent synqueue overflow on this listening socket? */ +static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) +{ + unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + + return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID); +} static inline u32 tcp_cookie_time(void) { u64 val = get_jiffies_64(); - do_div(val, 60 * HZ); + do_div(val, TCP_SYNCOOKIE_PERIOD); return val; } @@ -692,6 +712,8 @@ static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) #define TCPHDR_ECE 0x40 #define TCPHDR_CWR 0x80 +#define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR) + /* This is what the send packet queuing engine uses to pass * TCP per-packet control information to the transmission code. * We also store the host-order sequence numbers in here too. |