summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-03-13 15:29:23 -0700
committerDavid S. Miller <davem@davemloft.net>2017-03-13 15:29:23 -0700
commit6a019c5c5059aa0d14788d015aba2f5934aa16ec (patch)
treef048a169201c566cd931aa29d17b0ce75c79e2aa
parentb66239b6824f387361bf18747a7e93760ac3c0fb (diff)
parenta59166e470868d92f0813977817e99e699398af5 (diff)
Merge branch 'mpls-ttl-propagation'
Robert Shearman says: ==================== mpls: allow TTL propagation from IP packets to be configured Allow TTL propagation from IP packets to MPLS packets to be configured. Add a new optional LWT attribute, MPLS_IPTUNNEL_TTL, which allows the TTL to be set in the resulting MPLS packet, with the value of 0 having the semantics of enabling propagation of the TTL from the IP header (i.e. non-zero values disable propagation). Also allow the configuration to be overridden globally by reusing the same sysctl to control whether the TTL is propagated from IP packets into the MPLS header. If the per-LWT attribute is set then it overrides the global configuration. If the TTL isn't propagated then a default TTL value is used which can be configured via a new sysctl, "net.mpls.default_ttl". This is kept separate from the configuration of whether IP TTL propagation is enabled as it can be used in the future when non-IP payloads are supported (i.e. where there is no payload TTL that can be propagated). ==================== Signed-off-by: Robert Shearman <rshearma@brocade.com> Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/mpls-sysctl.txt19
-rw-r--r--include/net/mpls_iptunnel.h2
-rw-r--r--include/net/netns/mpls.h3
-rw-r--r--include/uapi/linux/mpls_iptunnel.h2
-rw-r--r--include/uapi/linux/rtnetlink.h1
-rw-r--r--net/mpls/af_mpls.c98
-rw-r--r--net/mpls/internal.h7
-rw-r--r--net/mpls/mpls_iptunnel.c73
8 files changed, 184 insertions, 21 deletions
diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt
index 15d8d16934fd..2f24a1912a48 100644
--- a/Documentation/networking/mpls-sysctl.txt
+++ b/Documentation/networking/mpls-sysctl.txt
@@ -19,6 +19,25 @@ platform_labels - INTEGER
Possible values: 0 - 1048575
Default: 0
+ip_ttl_propagate - BOOL
+ Control whether TTL is propagated from the IPv4/IPv6 header to
+ the MPLS header on imposing labels and propagated from the
+ MPLS header to the IPv4/IPv6 header on popping the last label.
+
+ If disabled, the MPLS transport network will appear as a
+ single hop to transit traffic.
+
+ 0 - disabled / RFC 3443 [Short] Pipe Model
+ 1 - enabled / RFC 3443 Uniform Model (default)
+
+default_ttl - BOOL
+ Default TTL value to use for MPLS packets where it cannot be
+ propagated from an IP header, either because one isn't present
+ or ip_ttl_propagate has been disabled.
+
+ Possible values: 1 - 255
+ Default: 255
+
conf/<interface>/input - BOOL
Control whether packets can be input on this interface.
diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h
index 179253f9dcfd..a18af6a16eb5 100644
--- a/include/net/mpls_iptunnel.h
+++ b/include/net/mpls_iptunnel.h
@@ -19,6 +19,8 @@
struct mpls_iptunnel_encap {
u32 label[MAX_NEW_LABELS];
u8 labels;
+ u8 ttl_propagate;
+ u8 default_ttl;
};
static inline struct mpls_iptunnel_encap *mpls_lwtunnel_encap(struct lwtunnel_state *lwtstate)
diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h
index d29203651c01..6608b3693385 100644
--- a/include/net/netns/mpls.h
+++ b/include/net/netns/mpls.h
@@ -9,8 +9,11 @@ struct mpls_route;
struct ctl_table_header;
struct netns_mpls {
+ int ip_ttl_propagate;
+ int default_ttl;
size_t platform_labels;
struct mpls_route __rcu * __rcu *platform_label;
+
struct ctl_table_header *ctl;
};
diff --git a/include/uapi/linux/mpls_iptunnel.h b/include/uapi/linux/mpls_iptunnel.h
index d80a0498f77e..f5e45095b0bb 100644
--- a/include/uapi/linux/mpls_iptunnel.h
+++ b/include/uapi/linux/mpls_iptunnel.h
@@ -16,11 +16,13 @@
/* MPLS tunnel attributes
* [RTA_ENCAP] = {
* [MPLS_IPTUNNEL_DST]
+ * [MPLS_IPTUNNEL_TTL]
* }
*/
enum {
MPLS_IPTUNNEL_UNSPEC,
MPLS_IPTUNNEL_DST,
+ MPLS_IPTUNNEL_TTL,
__MPLS_IPTUNNEL_MAX,
};
#define MPLS_IPTUNNEL_MAX (__MPLS_IPTUNNEL_MAX - 1)
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 75fcf5eff093..3dd72aee4d32 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -319,6 +319,7 @@ enum rtattr_type_t {
RTA_EXPIRES,
RTA_PAD,
RTA_UID,
+ RTA_TTL_PROPAGATE,
__RTA_MAX
};
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 3818686182b2..0c5d111abe36 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -32,7 +32,9 @@
#define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
static int zero = 0;
+static int one = 1;
static int label_limit = (1 << 20) - 1;
+static int ttl_max = 255;
static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
struct nlmsghdr *nlh, struct net *net, u32 portid,
@@ -220,8 +222,8 @@ out:
return &rt->rt_nh[nh_index];
}
-static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
- struct mpls_entry_decoded dec)
+static bool mpls_egress(struct net *net, struct mpls_route *rt,
+ struct sk_buff *skb, struct mpls_entry_decoded dec)
{
enum mpls_payload_type payload_type;
bool success = false;
@@ -246,22 +248,46 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
switch (payload_type) {
case MPT_IPV4: {
struct iphdr *hdr4 = ip_hdr(skb);
+ u8 new_ttl;
skb->protocol = htons(ETH_P_IP);
+
+ /* If propagating TTL, take the decremented TTL from
+ * the incoming MPLS header, otherwise decrement the
+ * TTL, but only if not 0 to avoid underflow.
+ */
+ if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
+ (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+ net->mpls.ip_ttl_propagate))
+ new_ttl = dec.ttl;
+ else
+ new_ttl = hdr4->ttl ? hdr4->ttl - 1 : 0;
+
csum_replace2(&hdr4->check,
htons(hdr4->ttl << 8),
- htons(dec.ttl << 8));
- hdr4->ttl = dec.ttl;
+ htons(new_ttl << 8));
+ hdr4->ttl = new_ttl;
success = true;
break;
}
case MPT_IPV6: {
struct ipv6hdr *hdr6 = ipv6_hdr(skb);
skb->protocol = htons(ETH_P_IPV6);
- hdr6->hop_limit = dec.ttl;
+
+ /* If propagating TTL, take the decremented TTL from
+ * the incoming MPLS header, otherwise decrement the
+ * hop limit, but only if not 0 to avoid underflow.
+ */
+ if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
+ (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+ net->mpls.ip_ttl_propagate))
+ hdr6->hop_limit = dec.ttl;
+ else if (hdr6->hop_limit)
+ hdr6->hop_limit = hdr6->hop_limit - 1;
success = true;
break;
}
case MPT_UNSPEC:
+ /* Should have decided which protocol it is by now */
break;
}
@@ -361,7 +387,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
if (unlikely(!new_header_size && dec.bos)) {
/* Penultimate hop popping */
- if (!mpls_egress(rt, skb, dec))
+ if (!mpls_egress(dev_net(out_dev), rt, skb, dec))
goto err;
} else {
bool bos;
@@ -412,6 +438,7 @@ static struct packet_type mpls_packet_type __read_mostly = {
static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
[RTA_DST] = { .type = NLA_U32 },
[RTA_OIF] = { .type = NLA_U32 },
+ [RTA_TTL_PROPAGATE] = { .type = NLA_U8 },
};
struct mpls_route_config {
@@ -421,6 +448,7 @@ struct mpls_route_config {
u8 rc_via_alen;
u8 rc_via[MAX_VIA_ALEN];
u32 rc_label;
+ u8 rc_ttl_propagate;
u8 rc_output_labels;
u32 rc_output_label[MAX_NEW_LABELS];
u32 rc_nlflags;
@@ -856,6 +884,7 @@ static int mpls_route_add(struct mpls_route_config *cfg)
rt->rt_protocol = cfg->rc_protocol;
rt->rt_payload_type = cfg->rc_payload_type;
+ rt->rt_ttl_propagate = cfg->rc_ttl_propagate;
if (cfg->rc_mp)
err = mpls_nh_build_multi(cfg, rt);
@@ -1576,6 +1605,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->rc_label = LABEL_NOT_SPECIFIED;
cfg->rc_protocol = rtm->rtm_protocol;
cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC;
+ cfg->rc_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
cfg->rc_nlflags = nlh->nlmsg_flags;
cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->rc_nlinfo.nlh = nlh;
@@ -1622,6 +1652,17 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->rc_mp_len = nla_len(nla);
break;
}
+ case RTA_TTL_PROPAGATE:
+ {
+ u8 ttl_propagate = nla_get_u8(nla);
+
+ if (ttl_propagate > 1)
+ goto errout;
+ cfg->rc_ttl_propagate = ttl_propagate ?
+ MPLS_TTL_PROP_ENABLED :
+ MPLS_TTL_PROP_DISABLED;
+ break;
+ }
default:
/* Unsupported attribute */
goto errout;
@@ -1682,6 +1723,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
if (nla_put_labels(skb, RTA_DST, 1, &label))
goto nla_put_failure;
+
+ if (rt->rt_ttl_propagate != MPLS_TTL_PROP_DEFAULT) {
+ bool ttl_propagate =
+ rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED;
+
+ if (nla_put_u8(skb, RTA_TTL_PROPAGATE,
+ ttl_propagate))
+ goto nla_put_failure;
+ }
if (rt->rt_nhn == 1) {
const struct mpls_nh *nh = rt->rt_nh;
@@ -1792,7 +1842,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
{
size_t payload =
NLMSG_ALIGN(sizeof(struct rtmsg))
- + nla_total_size(4); /* RTA_DST */
+ + nla_total_size(4) /* RTA_DST */
+ + nla_total_size(1); /* RTA_TTL_PROPAGATE */
if (rt->rt_nhn == 1) {
struct mpls_nh *nh = rt->rt_nh;
@@ -1876,6 +1927,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo);
rt0->rt_protocol = RTPROT_KERNEL;
rt0->rt_payload_type = MPT_IPV4;
+ rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
rt0->rt_nh->nh_via_alen = lo->addr_len;
memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr,
@@ -1889,6 +1941,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo);
rt2->rt_protocol = RTPROT_KERNEL;
rt2->rt_payload_type = MPT_IPV6;
+ rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
rt2->rt_nh->nh_via_alen = lo->addr_len;
memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr,
@@ -1970,6 +2023,9 @@ static int mpls_platform_labels(struct ctl_table *table, int write,
return ret;
}
+#define MPLS_NS_SYSCTL_OFFSET(field) \
+ (&((struct net *)0)->field)
+
static const struct ctl_table mpls_table[] = {
{
.procname = "platform_labels",
@@ -1978,21 +2034,47 @@ static const struct ctl_table mpls_table[] = {
.mode = 0644,
.proc_handler = mpls_platform_labels,
},
+ {
+ .procname = "ip_ttl_propagate",
+ .data = MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate),
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .procname = "default_ttl",
+ .data = MPLS_NS_SYSCTL_OFFSET(mpls.default_ttl),
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &ttl_max,
+ },
{ }
};
static int mpls_net_init(struct net *net)
{
struct ctl_table *table;
+ int i;
net->mpls.platform_labels = 0;
net->mpls.platform_label = NULL;
+ net->mpls.ip_ttl_propagate = 1;
+ net->mpls.default_ttl = 255;
table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
if (table == NULL)
return -ENOMEM;
- table[0].data = net;
+ /* Table data contains only offsets relative to the base of
+ * the mdev at this point, so make them absolute.
+ */
+ for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++)
+ table[i].data = (char *)net + (uintptr_t)table[i].data;
+
net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
if (net->mpls.ctl == NULL) {
kfree(table);
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 76360d8b9579..62928d8fabd1 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -90,6 +90,12 @@ struct mpls_nh { /* next hop label forwarding entry */
u8 nh_via_table;
};
+enum mpls_ttl_propagation {
+ MPLS_TTL_PROP_DEFAULT,
+ MPLS_TTL_PROP_ENABLED,
+ MPLS_TTL_PROP_DISABLED,
+};
+
/* The route, nexthops and vias are stored together in the same memory
* block:
*
@@ -116,6 +122,7 @@ struct mpls_route { /* next hop label forwarding entry */
u8 rt_protocol;
u8 rt_payload_type;
u8 rt_max_alen;
+ u8 rt_ttl_propagate;
unsigned int rt_nhn;
unsigned int rt_nhn_alive;
struct mpls_nh rt_nh[0];
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index e4e4424f9eb1..22f71fce0bfb 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -29,6 +29,7 @@
static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = {
[MPLS_IPTUNNEL_DST] = { .type = NLA_U32 },
+ [MPLS_IPTUNNEL_TTL] = { .type = NLA_U8 },
};
static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
@@ -49,6 +50,7 @@ static int mpls_xmit(struct sk_buff *skb)
struct rtable *rt = NULL;
struct rt6_info *rt6 = NULL;
struct mpls_dev *out_mdev;
+ struct net *net;
int err = 0;
bool bos;
int i;
@@ -56,17 +58,7 @@ static int mpls_xmit(struct sk_buff *skb)
/* Find the output device */
out_dev = dst->dev;
-
- /* Obtain the ttl */
- if (dst->ops->family == AF_INET) {
- ttl = ip_hdr(skb)->ttl;
- rt = (struct rtable *)dst;
- } else if (dst->ops->family == AF_INET6) {
- ttl = ipv6_hdr(skb)->hop_limit;
- rt6 = (struct rt6_info *)dst;
- } else {
- goto drop;
- }
+ net = dev_net(out_dev);
skb_orphan(skb);
@@ -78,6 +70,38 @@ static int mpls_xmit(struct sk_buff *skb)
tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate);
+ /* Obtain the ttl using the following set of rules.
+ *
+ * LWT ttl propagation setting:
+ * - disabled => use default TTL value from LWT
+ * - enabled => use TTL value from IPv4/IPv6 header
+ * - default =>
+ * Global ttl propagation setting:
+ * - disabled => use default TTL value from global setting
+ * - enabled => use TTL value from IPv4/IPv6 header
+ */
+ if (dst->ops->family == AF_INET) {
+ if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
+ ttl = tun_encap_info->default_ttl;
+ else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+ !net->mpls.ip_ttl_propagate)
+ ttl = net->mpls.default_ttl;
+ else
+ ttl = ip_hdr(skb)->ttl;
+ rt = (struct rtable *)dst;
+ } else if (dst->ops->family == AF_INET6) {
+ if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
+ ttl = tun_encap_info->default_ttl;
+ else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
+ !net->mpls.ip_ttl_propagate)
+ ttl = net->mpls.default_ttl;
+ else
+ ttl = ipv6_hdr(skb)->hop_limit;
+ rt6 = (struct rt6_info *)dst;
+ } else {
+ goto drop;
+ }
+
/* Verify the destination can hold the packet */
new_header_size = mpls_encap_size(tun_encap_info);
mtu = mpls_dev_mtu(out_dev);
@@ -160,6 +184,17 @@ static int mpls_build_state(struct nlattr *nla,
&tun_encap_info->labels, tun_encap_info->label);
if (ret)
goto errout;
+
+ tun_encap_info->ttl_propagate = MPLS_TTL_PROP_DEFAULT;
+
+ if (tb[MPLS_IPTUNNEL_TTL]) {
+ tun_encap_info->default_ttl = nla_get_u8(tb[MPLS_IPTUNNEL_TTL]);
+ /* TTL 0 implies propagate from IP header */
+ tun_encap_info->ttl_propagate = tun_encap_info->default_ttl ?
+ MPLS_TTL_PROP_DISABLED :
+ MPLS_TTL_PROP_ENABLED;
+ }
+
newts->type = LWTUNNEL_ENCAP_MPLS;
newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
newts->headroom = mpls_encap_size(tun_encap_info);
@@ -186,6 +221,10 @@ static int mpls_fill_encap_info(struct sk_buff *skb,
tun_encap_info->label))
goto nla_put_failure;
+ if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT &&
+ nla_put_u8(skb, MPLS_IPTUNNEL_TTL, tun_encap_info->default_ttl))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -195,10 +234,16 @@ nla_put_failure:
static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate)
{
struct mpls_iptunnel_encap *tun_encap_info;
+ int nlsize;
tun_encap_info = mpls_lwtunnel_encap(lwtstate);
- return nla_total_size(tun_encap_info->labels * 4);
+ nlsize = nla_total_size(tun_encap_info->labels * 4);
+
+ if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT)
+ nlsize += nla_total_size(1);
+
+ return nlsize;
}
static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
@@ -207,7 +252,9 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(b);
int l;
- if (a_hdr->labels != b_hdr->labels)
+ if (a_hdr->labels != b_hdr->labels ||
+ a_hdr->ttl_propagate != b_hdr->ttl_propagate ||
+ a_hdr->default_ttl != b_hdr->default_ttl)
return 1;
for (l = 0; l < MAX_NEW_LABELS; l++)