diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/openvswitch/actions.c | 30 | ||||
-rw-r--r-- | net/sched/Kconfig | 11 | ||||
-rw-r--r-- | net/sched/Makefile | 1 | ||||
-rw-r--r-- | net/sched/act_bpf.c | 5 | ||||
-rw-r--r-- | net/sched/act_skbmod.c | 301 | ||||
-rw-r--r-- | net/sched/act_tunnel_key.c | 17 | ||||
-rw-r--r-- | net/sched/cls_bpf.c | 3 | ||||
-rw-r--r-- | net/sched/cls_flower.c | 21 |
8 files changed, 358 insertions, 31 deletions
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 4fe9032b1160..863e992dfbc0 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -71,6 +71,8 @@ struct ovs_frag_data { static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage); #define DEFERRED_ACTION_FIFO_SIZE 10 +#define OVS_RECURSION_LIMIT 5 +#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2) struct action_fifo { int head; int tail; @@ -78,7 +80,12 @@ struct action_fifo { struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE]; }; +struct recirc_keys { + struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD]; +}; + static struct action_fifo __percpu *action_fifos; +static struct recirc_keys __percpu *recirc_keys; static DEFINE_PER_CPU(int, exec_actions_level); static void action_fifo_init(struct action_fifo *fifo) @@ -1020,6 +1027,7 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb, const struct nlattr *a, int rem) { struct deferred_action *da; + int level; if (!is_flow_key_valid(key)) { int err; @@ -1043,6 +1051,18 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb, return 0; } + level = this_cpu_read(exec_actions_level); + if (level <= OVS_DEFERRED_ACTION_THRESHOLD) { + struct recirc_keys *rks = this_cpu_ptr(recirc_keys); + struct sw_flow_key *recirc_key = &rks->key[level - 1]; + + *recirc_key = *key; + recirc_key->recirc_id = nla_get_u32(a); + ovs_dp_process_packet(skb, recirc_key); + + return 0; + } + da = add_deferred_actions(skb, key, NULL); if (da) { da->pkt_key.recirc_id = nla_get_u32(a); @@ -1209,11 +1229,10 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, const struct sw_flow_actions *acts, struct sw_flow_key *key) { - static const int ovs_recursion_limit = 5; int err, level; level = __this_cpu_inc_return(exec_actions_level); - if (unlikely(level > ovs_recursion_limit)) { + if (unlikely(level > OVS_RECURSION_LIMIT)) { net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n", ovs_dp_name(dp)); kfree_skb(skb); @@ -1238,10 +1257,17 @@ int action_fifos_init(void) if (!action_fifos) return -ENOMEM; + recirc_keys = alloc_percpu(struct recirc_keys); + if (!recirc_keys) { + free_percpu(action_fifos); + return -ENOMEM; + } + return 0; } void action_fifos_exit(void) { free_percpu(action_fifos); + free_percpu(recirc_keys); } diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 72e3426fa48f..7795d5a3f79a 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -749,6 +749,17 @@ config NET_ACT_CONNMARK To compile this code as a module, choose M here: the module will be called act_connmark. +config NET_ACT_SKBMOD + tristate "skb data modification action" + depends on NET_CLS_ACT + ---help--- + Say Y here to allow modification of skb data + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_skbmod. + config NET_ACT_IFE tristate "Inter-FE action based on IETF ForCES InterFE LFB" depends on NET_CLS_ACT diff --git a/net/sched/Makefile b/net/sched/Makefile index b9d046b9535a..148ae0d5ac2c 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o +obj-$(CONFIG_NET_ACT_SKBMOD) += act_skbmod.o obj-$(CONFIG_NET_ACT_IFE) += act_ife.o obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index bfa870731e74..1d3960033f61 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -39,13 +39,10 @@ static struct tc_action_ops act_bpf_ops; static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, struct tcf_result *res) { + bool at_ingress = skb_at_tc_ingress(skb); struct tcf_bpf *prog = to_bpf(act); struct bpf_prog *filter; int action, filter_res; - bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; - - if (unlikely(!skb_mac_header_was_set(skb))) - return TC_ACT_UNSPEC; tcf_lastuse_update(&prog->tcf_tm); bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb); diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c new file mode 100644 index 000000000000..e7d96381c908 --- /dev/null +++ b/net/sched/act_skbmod.c @@ -0,0 +1,301 @@ +/* + * net/sched/act_skbmod.c skb data modifier + * + * Copyright (c) 2016 Jamal Hadi Salim <jhs@mojatatu.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> + +#include <linux/tc_act/tc_skbmod.h> +#include <net/tc_act/tc_skbmod.h> + +#define SKBMOD_TAB_MASK 15 + +static int skbmod_net_id; +static struct tc_action_ops act_skbmod_ops; + +#define MAX_EDIT_LEN ETH_HLEN +static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_skbmod *d = to_skbmod(a); + int action; + struct tcf_skbmod_params *p; + u64 flags; + int err; + + tcf_lastuse_update(&d->tcf_tm); + bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); + + /* XXX: if you are going to edit more fields beyond ethernet header + * (example when you add IP header replacement or vlan swap) + * then MAX_EDIT_LEN needs to change appropriately + */ + err = skb_ensure_writable(skb, MAX_EDIT_LEN); + if (unlikely(err)) { /* best policy is to drop on the floor */ + qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); + return TC_ACT_SHOT; + } + + rcu_read_lock(); + action = READ_ONCE(d->tcf_action); + if (unlikely(action == TC_ACT_SHOT)) { + qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); + rcu_read_unlock(); + return action; + } + + p = rcu_dereference(d->skbmod_p); + flags = p->flags; + if (flags & SKBMOD_F_DMAC) + ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst); + if (flags & SKBMOD_F_SMAC) + ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src); + if (flags & SKBMOD_F_ETYPE) + eth_hdr(skb)->h_proto = p->eth_type; + rcu_read_unlock(); + + if (flags & SKBMOD_F_SWAPMAC) { + u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */ + /*XXX: I am sure we can come up with more efficient swapping*/ + ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest); + ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source); + ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr); + } + + return action; +} + +static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { + [TCA_SKBMOD_PARMS] = { .len = sizeof(struct tc_skbmod) }, + [TCA_SKBMOD_DMAC] = { .len = ETH_ALEN }, + [TCA_SKBMOD_SMAC] = { .len = ETH_ALEN }, + [TCA_SKBMOD_ETYPE] = { .type = NLA_U16 }, +}; + +static int tcf_skbmod_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, + int ovr, int bind) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + struct nlattr *tb[TCA_SKBMOD_MAX + 1]; + struct tcf_skbmod_params *p, *p_old; + struct tc_skbmod *parm; + struct tcf_skbmod *d; + bool exists = false; + u8 *daddr = NULL; + u8 *saddr = NULL; + u16 eth_type = 0; + u32 lflags = 0; + int ret = 0, err; + + if (!nla) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy); + if (err < 0) + return err; + + if (!tb[TCA_SKBMOD_PARMS]) + return -EINVAL; + + if (tb[TCA_SKBMOD_DMAC]) { + daddr = nla_data(tb[TCA_SKBMOD_DMAC]); + lflags |= SKBMOD_F_DMAC; + } + + if (tb[TCA_SKBMOD_SMAC]) { + saddr = nla_data(tb[TCA_SKBMOD_SMAC]); + lflags |= SKBMOD_F_SMAC; + } + + if (tb[TCA_SKBMOD_ETYPE]) { + eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]); + lflags |= SKBMOD_F_ETYPE; + } + + parm = nla_data(tb[TCA_SKBMOD_PARMS]); + if (parm->flags & SKBMOD_F_SWAPMAC) + lflags = SKBMOD_F_SWAPMAC; + + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + if (!lflags) + return -EINVAL; + + if (!exists) { + ret = tcf_hash_create(tn, parm->index, est, a, + &act_skbmod_ops, bind, true); + if (ret) + return ret; + + ret = ACT_P_CREATED; + } else { + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; + } + + d = to_skbmod(*a); + + ASSERT_RTNL(); + p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); + if (unlikely(!p)) { + if (ovr) + tcf_hash_release(*a, bind); + return -ENOMEM; + } + + p->flags = lflags; + d->tcf_action = parm->action; + + p_old = rtnl_dereference(d->skbmod_p); + + if (ovr) + spin_lock_bh(&d->tcf_lock); + + if (lflags & SKBMOD_F_DMAC) + ether_addr_copy(p->eth_dst, daddr); + if (lflags & SKBMOD_F_SMAC) + ether_addr_copy(p->eth_src, saddr); + if (lflags & SKBMOD_F_ETYPE) + p->eth_type = htons(eth_type); + + rcu_assign_pointer(d->skbmod_p, p); + if (ovr) + spin_unlock_bh(&d->tcf_lock); + + if (p_old) + kfree_rcu(p_old, rcu); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(tn, *a); + return ret; +} + +static void tcf_skbmod_cleanup(struct tc_action *a, int bind) +{ + struct tcf_skbmod *d = to_skbmod(a); + struct tcf_skbmod_params *p; + + p = rcu_dereference_protected(d->skbmod_p, 1); + kfree_rcu(p, rcu); +} + +static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + struct tcf_skbmod *d = to_skbmod(a); + unsigned char *b = skb_tail_pointer(skb); + struct tcf_skbmod_params *p = rtnl_dereference(d->skbmod_p); + struct tc_skbmod opt = { + .index = d->tcf_index, + .refcnt = d->tcf_refcnt - ref, + .bindcnt = d->tcf_bindcnt - bind, + .action = d->tcf_action, + }; + struct tcf_t t; + + opt.flags = p->flags; + if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_DMAC) && + nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, p->eth_dst)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_SMAC) && + nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, p->eth_src)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_ETYPE) && + nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(p->eth_type))) + goto nla_put_failure; + + tcf_tm_dump(&t, &d->tcf_tm); + if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD)) + goto nla_put_failure; + + return skb->len; +nla_put_failure: + rcu_read_unlock(); + nlmsg_trim(skb, b); + return -1; +} + +static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops); +} + +static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tcf_hash_search(tn, a, index); +} + +static struct tc_action_ops act_skbmod_ops = { + .kind = "skbmod", + .type = TCA_ACT_SKBMOD, + .owner = THIS_MODULE, + .act = tcf_skbmod_run, + .dump = tcf_skbmod_dump, + .init = tcf_skbmod_init, + .cleanup = tcf_skbmod_cleanup, + .walk = tcf_skbmod_walker, + .lookup = tcf_skbmod_search, + .size = sizeof(struct tcf_skbmod), +}; + +static __net_init int skbmod_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK); +} + +static void __net_exit skbmod_exit_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + tc_action_net_exit(tn); +} + +static struct pernet_operations skbmod_net_ops = { + .init = skbmod_init_net, + .exit = skbmod_exit_net, + .id = &skbmod_net_id, + .size = sizeof(struct tc_action_net), +}; + +MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>"); +MODULE_DESCRIPTION("SKB data mod-ing"); +MODULE_LICENSE("GPL"); + +static int __init skbmod_init_module(void) +{ + return tcf_register_action(&act_skbmod_ops, &skbmod_net_ops); +} + +static void __exit skbmod_cleanup_module(void) +{ + tcf_unregister_action(&act_skbmod_ops, &skbmod_net_ops); +} + +module_init(skbmod_init_module); +module_exit(skbmod_cleanup_module); diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index dceff7412dc3..af47bdf2f483 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -194,15 +194,12 @@ static void tunnel_key_release(struct tc_action *a, int bind) struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params; - rcu_read_lock(); - params = rcu_dereference(t->params); + params = rcu_dereference_protected(t->params, 1); if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) dst_release(¶ms->tcft_enc_metadata->dst); kfree_rcu(params, rcu); - - rcu_read_unlock(); } static int tunnel_key_dump_addresses(struct sk_buff *skb, @@ -245,10 +242,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, .bindcnt = t->tcf_bindcnt - bind, }; struct tcf_t tm; - int ret = -1; - rcu_read_lock(); - params = rcu_dereference(t->params); + params = rtnl_dereference(t->params); opt.t_action = params->tcft_action; opt.action = params->action; @@ -272,15 +267,11 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, &tm, TCA_TUNNEL_KEY_PAD)) goto nla_put_failure; - ret = skb->len; - goto out; + return skb->len; nla_put_failure: nlmsg_trim(skb, b); -out: - rcu_read_unlock(); - - return ret; + return -1; } static int tunnel_key_walker(struct net *net, struct sk_buff *skb, diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 4742f415ee5b..1d92d4d3f222 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -83,9 +83,6 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_bpf_prog *prog; int ret = -1; - if (unlikely(!skb_mac_header_was_set(skb))) - return -1; - /* Needed here for accessing maps. */ rcu_read_lock(); list_for_each_entry_rcu(prog, &head->plist, link) { diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index b084b2aab2d7..a3f4c706dfaa 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -33,7 +33,6 @@ struct fl_flow_key { struct flow_dissector_key_basic basic; struct flow_dissector_key_eth_addrs eth; struct flow_dissector_key_vlan vlan; - struct flow_dissector_key_addrs ipaddrs; union { struct flow_dissector_key_ipv4_addrs ipv4; struct flow_dissector_key_ipv6_addrs ipv6; @@ -335,6 +334,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) }, [TCA_FLOWER_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, [TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_TCP_SRC_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NLA_U16 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -432,17 +435,17 @@ static int fl_set_key(struct net *net, struct nlattr **tb, if (key->basic.ip_proto == IPPROTO_TCP) { fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK, sizeof(key->tp.src)); fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK, sizeof(key->tp.dst)); } else if (key->basic.ip_proto == IPPROTO_UDP) { fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK, sizeof(key->tp.src)); fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, sizeof(key->tp.dst)); } @@ -877,18 +880,18 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (key->basic.ip_proto == IPPROTO_TCP && (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK, sizeof(key->tp.src)) || fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK, sizeof(key->tp.dst)))) goto nla_put_failure; else if (key->basic.ip_proto == IPPROTO_UDP && (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK, sizeof(key->tp.src)) || fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, sizeof(key->tp.dst)))) goto nla_put_failure; |