summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.txt13
-rw-r--r--include/net/ip.h33
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--include/net/route.h19
-rw-r--r--net/ipv4/ip_forward.c7
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/route.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
8 files changed, 67 insertions, 24 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 7373115407e4..0d71fa962d8a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -32,6 +32,19 @@ ip_no_pmtu_disc - INTEGER
min_pmtu - INTEGER
default 552 - minimum discovered Path MTU
+ip_forward_use_pmtu - BOOLEAN
+ By default we don't trust protocol path MTUs while forwarding
+ because they could be easily forged and can lead to unwanted
+ fragmentation by the router.
+ You only need to enable this if you have user-space software
+ which tries to discover path mtus by itself and depends on the
+ kernel honoring this information. This is normally not the
+ case.
+ Default: 0 (disabled)
+ Possible values:
+ 0 - disabled
+ 1 - enabled
+
route/max_size - INTEGER
Maximum number of routes allowed in the kernel. Increase
this when using large numbers of interfaces and/or routes.
diff --git a/include/net/ip.h b/include/net/ip.h
index 535664477c4a..0dab95c2e4d5 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -263,6 +263,39 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst)
!(dst_metric_locked(dst, RTAX_MTU)));
}
+static inline bool ip_sk_accept_pmtu(const struct sock *sk)
+{
+ return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE;
+}
+
+static inline bool ip_sk_use_pmtu(const struct sock *sk)
+{
+ return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE;
+}
+
+static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
+ bool forwarding)
+{
+ struct net *net = dev_net(dst->dev);
+
+ if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
+ dst_metric_locked(dst, RTAX_MTU) ||
+ !forwarding)
+ return dst_mtu(dst);
+
+ return min(dst->dev->mtu, IP_MAX_MTU);
+}
+
+static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb)
+{
+ if (!skb->sk || ip_sk_use_pmtu(skb->sk)) {
+ bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
+ return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding);
+ } else {
+ return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU);
+ }
+}
+
void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more);
static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 929a668e91a9..80f500a29498 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -70,6 +70,7 @@ struct netns_ipv4 {
int sysctl_tcp_ecn;
int sysctl_ip_no_pmtu_disc;
+ int sysctl_ip_fwd_use_pmtu;
kgid_t sysctl_ping_group_range[2];
diff --git a/include/net/route.h b/include/net/route.h
index 638e3ebf76f3..9d1f423d5944 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -36,6 +36,9 @@
#include <linux/cache.h>
#include <linux/security.h>
+/* IPv4 datagram length is stored into 16bit field (tot_len) */
+#define IP_MAX_MTU 0xFFFFU
+
#define RTO_ONLINK 0x01
#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
@@ -311,20 +314,4 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
return hoplimit;
}
-static inline bool ip_sk_accept_pmtu(const struct sock *sk)
-{
- return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE;
-}
-
-static inline bool ip_sk_use_pmtu(const struct sock *sk)
-{
- return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE;
-}
-
-static inline int ip_skb_dst_mtu(const struct sk_buff *skb)
-{
- return (!skb->sk || ip_sk_use_pmtu(skb->sk)) ?
- dst_mtu(skb_dst(skb)) : skb_dst(skb)->dev->mtu;
-}
-
#endif /* _ROUTE_H */
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 694de3b7aebf..e9f1217a8afd 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -54,6 +54,7 @@ static int ip_forward_finish(struct sk_buff *skb)
int ip_forward(struct sk_buff *skb)
{
+ u32 mtu;
struct iphdr *iph; /* Our header */
struct rtable *rt; /* Route we use */
struct ip_options *opt = &(IPCB(skb)->opt);
@@ -88,11 +89,13 @@ int ip_forward(struct sk_buff *skb)
if (opt->is_strictroute && rt->rt_uses_gateway)
goto sr_failed;
- if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
+ IPCB(skb)->flags |= IPSKB_FORWARDED;
+ mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
+ if (unlikely(skb->len > mtu && !skb_is_gso(skb) &&
(ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(dst_mtu(&rt->dst)));
+ htonl(mtu));
goto drop;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index df184616493f..9a78804cfe9c 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -449,6 +449,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
__be16 not_last_frag;
struct rtable *rt = skb_rtable(skb);
int err = 0;
+ bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
dev = rt->dst.dev;
@@ -458,12 +459,13 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
iph = ip_hdr(skb);
+ mtu = ip_dst_mtu_maybe_forward(&rt->dst, forwarding);
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) ||
(IPCB(skb)->frag_max_size &&
- IPCB(skb)->frag_max_size > dst_mtu(&rt->dst)))) {
+ IPCB(skb)->frag_max_size > mtu))) {
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(ip_skb_dst_mtu(skb)));
+ htonl(mtu));
kfree_skb(skb);
return -EMSGSIZE;
}
@@ -473,7 +475,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
*/
hlen = iph->ihl * 4;
- mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */
+ mtu = mtu - hlen; /* Size of data space */
#ifdef CONFIG_BRIDGE_NETFILTER
if (skb->nf_bridge)
mtu -= nf_bridge_mtu_reduction(skb);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f8da28278014..25071b48921c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,9 +112,6 @@
#define RT_FL_TOS(oldflp4) \
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
-/* IPv4 datagram length is stored into 16bit field (tot_len) */
-#define IP_MAX_MTU 0xFFFF
-
#define RT_GC_TIMEOUT (300*HZ)
static int ip_rt_max_size;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 1d2480ac2bb6..44eba052b43d 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -831,6 +831,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "ip_forward_use_pmtu",
+ .data = &init_net.ipv4.sysctl_ip_fwd_use_pmtu,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
{ }
};