summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/checksum.h4
-rw-r--r--include/net/geneve.h5
-rw-r--r--include/net/inet_connection_sock.h5
-rw-r--r--include/net/inet_frag.h2
-rw-r--r--include/net/inet_hashtables.h49
-rw-r--r--include/net/ip.h16
-rw-r--r--include/net/ip6_fib.h12
-rw-r--r--include/net/ip6_route.h21
-rw-r--r--include/net/ipv6.h5
-rw-r--r--include/net/net_namespace.h1
-rw-r--r--include/net/netns/ipv4.h3
-rw-r--r--include/net/sock.h9
-rw-r--r--include/net/switchdev.h50
-rw-r--r--include/net/tcp.h50
14 files changed, 153 insertions, 79 deletions
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 0a55ac715077..2d1d73cb773e 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -122,7 +122,9 @@ static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum)
static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
{
- *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), from), to));
+ __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from);
+
+ *sum = csum_fold(csum_add(tmp, (__force __wsum)to));
}
/* Implements RFC 1624 (Incremental Internet Checksum)
diff --git a/include/net/geneve.h b/include/net/geneve.h
index 14fb8d3390b4..2a0543a1899d 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -62,6 +62,11 @@ struct genevehdr {
struct geneve_opt options[];
};
+static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
+{
+ return (struct genevehdr *)(udp_hdr(skb) + 1);
+}
+
#ifdef CONFIG_INET
struct geneve_sock;
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 48a815823587..497bc14cdb85 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -129,9 +129,10 @@ struct inet_connection_sock {
u32 probe_timestamp;
} icsk_mtup;
- u32 icsk_ca_priv[16];
u32 icsk_user_timeout;
-#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32))
+
+ u64 icsk_ca_priv[64 / sizeof(u64)];
+#define ICSK_CA_PRIV_SIZE (8 * sizeof(u64))
};
#define ICSK_TIME_RETRANS 1 /* Retransmit timer */
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 8d1765577acc..e1300b3dd597 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -43,7 +43,7 @@ enum {
* @len: total length of the original datagram
* @meat: length of received fragments so far
* @flags: fragment queue flags
- * @max_size: (ipv4 only) maximum received fragment size with IP_DF set
+ * @max_size: maximum received fragment size
* @net: namespace that this frag belongs to
*/
struct inet_frag_queue {
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 73fe0f9525d9..b73c88a19dd4 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -24,7 +24,6 @@
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/wait.h>
-#include <linux/vmalloc.h>
#include <net/inet_connection_sock.h>
#include <net/inet_sock.h>
@@ -148,8 +147,6 @@ struct inet_hashinfo {
*/
struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE]
____cacheline_aligned_in_smp;
-
- atomic_t bsockets;
};
static inline struct inet_ehash_bucket *inet_ehash_bucket(
@@ -166,52 +163,12 @@ static inline spinlock_t *inet_ehash_lockp(
return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask];
}
-static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
-{
- unsigned int i, size = 256;
-#if defined(CONFIG_PROVE_LOCKING)
- unsigned int nr_pcpus = 2;
-#else
- unsigned int nr_pcpus = num_possible_cpus();
-#endif
- if (nr_pcpus >= 4)
- size = 512;
- if (nr_pcpus >= 8)
- size = 1024;
- if (nr_pcpus >= 16)
- size = 2048;
- if (nr_pcpus >= 32)
- size = 4096;
- if (sizeof(spinlock_t) != 0) {
-#ifdef CONFIG_NUMA
- if (size * sizeof(spinlock_t) > PAGE_SIZE)
- hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t));
- else
-#endif
- hashinfo->ehash_locks = kmalloc(size * sizeof(spinlock_t),
- GFP_KERNEL);
- if (!hashinfo->ehash_locks)
- return ENOMEM;
- for (i = 0; i < size; i++)
- spin_lock_init(&hashinfo->ehash_locks[i]);
- }
- hashinfo->ehash_locks_mask = size - 1;
- return 0;
-}
+int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo);
static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
{
- if (hashinfo->ehash_locks) {
-#ifdef CONFIG_NUMA
- unsigned int size = (hashinfo->ehash_locks_mask + 1) *
- sizeof(spinlock_t);
- if (size > PAGE_SIZE)
- vfree(hashinfo->ehash_locks);
- else
-#endif
- kfree(hashinfo->ehash_locks);
- hashinfo->ehash_locks = NULL;
- }
+ kvfree(hashinfo->ehash_locks);
+ hashinfo->ehash_locks = NULL;
}
struct inet_bind_bucket *
diff --git a/include/net/ip.h b/include/net/ip.h
index 0ed6d768e606..9b976cf99122 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -45,6 +45,7 @@ struct inet_skb_parm {
#define IPSKB_FRAG_COMPLETE BIT(3)
#define IPSKB_REROUTED BIT(4)
#define IPSKB_DOREDIRECT BIT(5)
+#define IPSKB_FRAG_PMTU BIT(6)
u16 frag_max_size;
};
@@ -108,9 +109,8 @@ int ip_local_deliver(struct sk_buff *skb);
int ip_mr_input(struct sk_buff *skb);
int ip_output(struct sock *sk, struct sk_buff *skb);
int ip_mc_output(struct sock *sk, struct sk_buff *skb);
-int ip_fragment(struct sock *sk, struct sk_buff *skb,
- int (*output)(struct sock *, struct sk_buff *));
-int ip_do_nat(struct sk_buff *skb);
+int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct sock *, struct sk_buff *));
void ip_send_check(struct iphdr *ip);
int __ip_local_out(struct sk_buff *skb);
int ip_local_out_sk(struct sock *sk, struct sk_buff *skb);
@@ -478,6 +478,16 @@ enum ip_defrag_users {
IP_DEFRAG_MACVLAN,
};
+/* Return true if the value of 'user' is between 'lower_bond'
+ * and 'upper_bond' inclusively.
+ */
+static inline bool ip_defrag_user_in_between(u32 user,
+ enum ip_defrag_users lower_bond,
+ enum ip_defrag_users upper_bond)
+{
+ return user >= lower_bond && user <= upper_bond;
+}
+
int ip_defrag(struct sk_buff *skb, u32 user);
#ifdef CONFIG_INET
struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user);
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index e00018011028..3b76849c190f 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -120,7 +120,11 @@ struct rt6_info {
struct rt6key rt6i_src;
struct rt6key rt6i_prefsrc;
+ struct list_head rt6i_uncached;
+ struct uncached_list *rt6i_uncached_list;
+
struct inet6_dev *rt6i_idev;
+ struct rt6_info * __percpu *rt6i_pcpu;
u32 rt6i_metric;
u32 rt6i_pmtu;
@@ -159,6 +163,14 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
rt0->rt6i_flags |= RTF_EXPIRES;
}
+static inline u32 rt6_get_cookie(const struct rt6_info *rt)
+{
+ if (rt->rt6i_flags & RTF_PCPU || unlikely(rt->dst.flags & DST_NOCACHE))
+ rt = (struct rt6_info *)(rt->dst.from);
+
+ return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
+}
+
static inline void ip6_rt_put(struct rt6_info *rt)
{
/* dst_release() accepts a NULL parameter.
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 5e192068e6cb..297629aadb19 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -145,7 +145,7 @@ static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst,
#ifdef CONFIG_IPV6_SUBTREES
np->saddr_cache = saddr;
#endif
- np->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
+ np->dst_cookie = rt6_get_cookie(rt);
}
static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
@@ -163,11 +163,14 @@ static inline bool ipv6_unicast_destination(const struct sk_buff *skb)
return rt->rt6i_flags & RTF_LOCAL;
}
-static inline bool ipv6_anycast_destination(const struct sk_buff *skb)
+static inline bool ipv6_anycast_destination(const struct dst_entry *dst,
+ const struct in6_addr *daddr)
{
- struct rt6_info *rt = (struct rt6_info *) skb_dst(skb);
+ struct rt6_info *rt = (struct rt6_info *)dst;
- return rt->rt6i_flags & RTF_ANYCAST;
+ return rt->rt6i_flags & RTF_ANYCAST ||
+ (rt->rt6i_dst.plen != 128 &&
+ ipv6_addr_equal(&rt->rt6i_dst.addr, daddr));
}
int ip6_fragment(struct sock *sk, struct sk_buff *skb,
@@ -194,9 +197,15 @@ static inline bool ip6_sk_ignore_df(const struct sock *sk)
inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;
}
-static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt)
+static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
+ struct in6_addr *daddr)
{
- return &rt->rt6i_gateway;
+ if (rt->rt6i_flags & RTF_GATEWAY)
+ return &rt->rt6i_gateway;
+ else if (unlikely(rt->rt6i_flags & RTF_CACHE))
+ return &rt->rt6i_dst.addr;
+ else
+ return daddr;
}
#endif
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index aab8190d16e8..35d485c78080 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -671,8 +671,9 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add
return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr));
}
-void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr,
- struct rt6_info *rt);
+__be32 ipv6_select_ident(struct net *net,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr);
void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb);
int ip6_dst_hoplimit(struct dst_entry *dst);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 3f850acc844e..72eb23723294 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -58,6 +58,7 @@ struct net {
struct list_head exit_list; /* Use only net_mutex */
struct user_namespace *user_ns; /* Owning user namespace */
+ spinlock_t nsid_lock;
struct idr netns_ids;
struct ns_common ns;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 614a49be68a9..c68926b4899c 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -19,6 +19,7 @@ struct sock;
struct local_ports {
seqlock_t lock;
int range[2];
+ bool warned;
};
struct ping_group_range {
@@ -77,6 +78,8 @@ struct netns_ipv4 {
struct local_ports ip_local_ports;
int sysctl_tcp_ecn;
+ int sysctl_tcp_ecn_fallback;
+
int sysctl_ip_no_pmtu_disc;
int sysctl_ip_fwd_use_pmtu;
int sysctl_ip_nonlocal_bind;
diff --git a/include/net/sock.h b/include/net/sock.h
index d882f4c8e438..26c1c3171e00 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1368,7 +1368,7 @@ static inline struct inode *SOCK_INODE(struct socket *socket)
* Functions for memory accounting
*/
int __sk_mem_schedule(struct sock *sk, int size, int kind);
-void __sk_mem_reclaim(struct sock *sk);
+void __sk_mem_reclaim(struct sock *sk, int amount);
#define SK_MEM_QUANTUM ((int)PAGE_SIZE)
#define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM)
@@ -1409,7 +1409,7 @@ static inline void sk_mem_reclaim(struct sock *sk)
if (!sk_has_account(sk))
return;
if (sk->sk_forward_alloc >= SK_MEM_QUANTUM)
- __sk_mem_reclaim(sk);
+ __sk_mem_reclaim(sk, sk->sk_forward_alloc);
}
static inline void sk_mem_reclaim_partial(struct sock *sk)
@@ -1417,7 +1417,7 @@ static inline void sk_mem_reclaim_partial(struct sock *sk)
if (!sk_has_account(sk))
return;
if (sk->sk_forward_alloc > SK_MEM_QUANTUM)
- __sk_mem_reclaim(sk);
+ __sk_mem_reclaim(sk, sk->sk_forward_alloc - 1);
}
static inline void sk_mem_charge(struct sock *sk, int size)
@@ -2025,7 +2025,8 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk)
}
}
-struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp);
+struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
+ bool force_schedule);
/**
* sk_page_frag - return an appropriate page_frag
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index ea5b1c230d3d..437f8fe75705 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -47,11 +47,13 @@ enum switchdev_obj_id {
SWITCHDEV_OBJ_UNDEFINED,
SWITCHDEV_OBJ_PORT_VLAN,
SWITCHDEV_OBJ_IPV4_FIB,
+ SWITCHDEV_OBJ_PORT_FDB,
};
struct switchdev_obj {
enum switchdev_obj_id id;
enum switchdev_trans trans;
+ int (*cb)(struct net_device *dev, struct switchdev_obj *obj);
union {
struct switchdev_obj_vlan { /* PORT_VLAN */
u16 flags;
@@ -67,6 +69,10 @@ struct switchdev_obj {
u32 nlflags;
u32 tb_id;
} ipv4_fib;
+ struct switchdev_obj_fdb { /* PORT_FDB */
+ const unsigned char *addr;
+ u16 vid;
+ } fdb;
} u;
};
@@ -80,6 +86,8 @@ struct switchdev_obj {
* @switchdev_port_obj_add: Add an object to port (see switchdev_obj).
*
* @switchdev_port_obj_del: Delete an object from port (see switchdev_obj).
+ *
+ * @switchdev_port_obj_dump: Dump port objects (see switchdev_obj).
*/
struct switchdev_ops {
int (*switchdev_port_attr_get)(struct net_device *dev,
@@ -90,6 +98,8 @@ struct switchdev_ops {
struct switchdev_obj *obj);
int (*switchdev_port_obj_del)(struct net_device *dev,
struct switchdev_obj *obj);
+ int (*switchdev_port_obj_dump)(struct net_device *dev,
+ struct switchdev_obj *obj);
};
enum switchdev_notifier_type {
@@ -121,6 +131,7 @@ int switchdev_port_attr_set(struct net_device *dev,
struct switchdev_attr *attr);
int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj);
int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj);
+int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj);
int register_switchdev_notifier(struct notifier_block *nb);
int unregister_switchdev_notifier(struct notifier_block *nb);
int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
@@ -137,6 +148,15 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
u8 tos, u8 type, u32 tb_id);
void switchdev_fib_ipv4_abort(struct fib_info *fi);
+int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev, const unsigned char *addr,
+ u16 vid, u16 nlm_flags);
+int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev, const unsigned char *addr,
+ u16 vid);
+int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct net_device *dev,
+ struct net_device *filter_dev, int idx);
#else
@@ -164,6 +184,12 @@ static inline int switchdev_port_obj_del(struct net_device *dev,
return -EOPNOTSUPP;
}
+static inline int switchdev_port_obj_dump(struct net_device *dev,
+ struct switchdev_obj *obj)
+{
+ return -EOPNOTSUPP;
+}
+
static inline int register_switchdev_notifier(struct notifier_block *nb)
{
return 0;
@@ -221,6 +247,30 @@ static inline void switchdev_fib_ipv4_abort(struct fib_info *fi)
{
}
+static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr,
+ u16 vid, u16 nlm_flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr, u16 vid)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int switchdev_port_fdb_dump(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct net_device *dev,
+ struct net_device *filter_dev,
+ int idx)
+{
+ return -EOPNOTSUPP;
+}
+
#endif
#endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b8ea12880fd9..2bb2bad21d5c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -286,6 +286,14 @@ extern atomic_long_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
extern int tcp_memory_pressure;
+/* optimized version of sk_under_memory_pressure() for TCP sockets */
+static inline bool tcp_under_memory_pressure(const struct sock *sk)
+{
+ if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+ return !!sk->sk_cgrp->memory_pressure;
+
+ return tcp_memory_pressure;
+}
/*
* The next routines deal with comparing 32 bit unsigned ints
* and worry about wraparound (automatic with unsigned arithmetic).
@@ -311,6 +319,8 @@ static inline bool tcp_out_of_memory(struct sock *sk)
return false;
}
+void sk_forced_mem_schedule(struct sock *sk, int size);
+
static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
{
struct percpu_counter *ocp = sk->sk_prot->orphan_count;
@@ -326,18 +336,6 @@ static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
bool tcp_check_oom(struct sock *sk, int shift);
-/* syncookies: remember time of last synqueue overflow */
-static inline void tcp_synq_overflow(struct sock *sk)
-{
- tcp_sk(sk)->rx_opt.ts_recent_stamp = jiffies;
-}
-
-/* syncookies: no recent synqueue overflow on this listening socket? */
-static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
-{
- unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
- return time_after(jiffies, last_overflow + TCP_TIMEOUT_FALLBACK);
-}
extern struct proto tcp_prot;
@@ -483,13 +481,35 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
* i.e. a sent cookie is valid only at most for 2*60 seconds (or less if
* the counter advances immediately after a cookie is generated).
*/
-#define MAX_SYNCOOKIE_AGE 2
+#define MAX_SYNCOOKIE_AGE 2
+#define TCP_SYNCOOKIE_PERIOD (60 * HZ)
+#define TCP_SYNCOOKIE_VALID (MAX_SYNCOOKIE_AGE * TCP_SYNCOOKIE_PERIOD)
+
+/* syncookies: remember time of last synqueue overflow
+ * But do not dirty this field too often (once per second is enough)
+ */
+static inline void tcp_synq_overflow(struct sock *sk)
+{
+ unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+ unsigned long now = jiffies;
+
+ if (time_after(now, last_overflow + HZ))
+ tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
+}
+
+/* syncookies: no recent synqueue overflow on this listening socket? */
+static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
+{
+ unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+
+ return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID);
+}
static inline u32 tcp_cookie_time(void)
{
u64 val = get_jiffies_64();
- do_div(val, 60 * HZ);
+ do_div(val, TCP_SYNCOOKIE_PERIOD);
return val;
}
@@ -692,6 +712,8 @@ static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
#define TCPHDR_ECE 0x40
#define TCPHDR_CWR 0x80
+#define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR)
+
/* This is what the send packet queuing engine uses to pass
* TCP per-packet control information to the transmission code.
* We also store the host-order sequence numbers in here too.