diff options
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r-- | drivers/net/tun.c | 172 |
1 files changed, 144 insertions, 28 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 4f4a842a1c9c..e7c5f4b2a9a6 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -180,6 +180,7 @@ struct tun_file { struct list_head next; struct tun_struct *detached; struct skb_array tx_array; + struct xdp_rxq_info xdp_rxq; }; struct tun_flow_entry { @@ -195,6 +196,11 @@ struct tun_flow_entry { #define TUN_NUM_FLOW_ENTRIES 1024 +struct tun_steering_prog { + struct rcu_head rcu; + struct bpf_prog *prog; +}; + /* Since the socket were moved to tun_file, to preserve the behavior of persist * device, socket filter, sndbuf and vnet header size were restore when the * file were attached to a persist device. @@ -232,6 +238,7 @@ struct tun_struct { u32 rx_batched; struct tun_pcpu_stats __percpu *pcpu_stats; struct bpf_prog __rcu *xdp_prog; + struct tun_steering_prog __rcu *steering_prog; }; static int tun_napi_receive(struct napi_struct *napi, int budget) @@ -537,15 +544,12 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash) * different rxq no. here. If we could not get rxhash, then we would * hope the rxq no. may help here. */ -static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv, select_queue_fallback_t fallback) +static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb) { - struct tun_struct *tun = netdev_priv(dev); struct tun_flow_entry *e; u32 txq = 0; u32 numqueues = 0; - rcu_read_lock(); numqueues = READ_ONCE(tun->numqueues); txq = __skb_get_hash_symmetric(skb); @@ -563,10 +567,37 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, txq -= numqueues; } - rcu_read_unlock(); return txq; } +static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb) +{ + struct tun_steering_prog *prog; + u16 ret = 0; + + prog = rcu_dereference(tun->steering_prog); + if (prog) + ret = bpf_prog_run_clear_cb(prog->prog, skb); + + return ret % tun->numqueues; +} + +static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback) +{ + struct tun_struct *tun = netdev_priv(dev); + u16 ret; + + rcu_read_lock(); + if (rcu_dereference(tun->steering_prog)) + ret = tun_ebpf_select_queue(tun, skb); + else + ret = tun_automq_select_queue(tun, skb); + rcu_read_unlock(); + + return ret; +} + static inline bool tun_not_capable(struct tun_struct *tun) { const struct cred *cred = current_cred(); @@ -657,8 +688,10 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } - if (tun) + if (tun) { skb_array_cleanup(&tfile->tx_array); + xdp_rxq_info_unreg(&tfile->xdp_rxq); + } sock_put(&tfile->sk); } } @@ -673,7 +706,6 @@ static void tun_detach(struct tun_file *tfile, bool clean) static void tun_detach_all(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); - struct bpf_prog *xdp_prog = rtnl_dereference(tun->xdp_prog); struct tun_file *tfile, *tmp; int i, n = tun->numqueues; @@ -699,18 +731,17 @@ static void tun_detach_all(struct net_device *dev) tun_napi_del(tun, tfile); /* Drop read queue */ tun_queue_purge(tfile); + xdp_rxq_info_unreg(&tfile->xdp_rxq); sock_put(&tfile->sk); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { tun_enable_queue(tfile); tun_queue_purge(tfile); + xdp_rxq_info_unreg(&tfile->xdp_rxq); sock_put(&tfile->sk); } BUG_ON(tun->numdisabled != 0); - if (xdp_prog) - bpf_prog_put(xdp_prog); - if (tun->flags & IFF_PERSIST) module_put(THIS_MODULE); } @@ -758,6 +789,22 @@ static int tun_attach(struct tun_struct *tun, struct file *file, tfile->queue_index = tun->numqueues; tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN; + + if (tfile->detached) { + /* Re-attach detached tfile, updating XDP queue_index */ + WARN_ON(!xdp_rxq_info_is_reg(&tfile->xdp_rxq)); + + if (tfile->xdp_rxq.queue_index != tfile->queue_index) + tfile->xdp_rxq.queue_index = tfile->queue_index; + } else { + /* Setup XDP RX-queue info, for new tfile getting attached */ + err = xdp_rxq_info_reg(&tfile->xdp_rxq, + tun->dev, tfile->queue_index); + if (err < 0) + goto out; + err = 0; + } + rcu_assign_pointer(tfile->tun, tun); rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); tun->numqueues++; @@ -937,23 +984,10 @@ static int tun_net_close(struct net_device *dev) } /* Net device start xmit */ -static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) +static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb) { - struct tun_struct *tun = netdev_priv(dev); - int txq = skb->queue_mapping; - struct tun_file *tfile; - u32 numqueues = 0; - - rcu_read_lock(); - tfile = rcu_dereference(tun->tfiles[txq]); - numqueues = READ_ONCE(tun->numqueues); - - /* Drop packet if interface is not attached */ - if (txq >= numqueues) - goto drop; - #ifdef CONFIG_RPS - if (numqueues == 1 && static_key_false(&rps_needed)) { + if (tun->numqueues == 1 && static_key_false(&rps_needed)) { /* Select queue was not called for the skbuff, so we extract the * RPS hash and save it into the flow_table here. */ @@ -969,6 +1003,24 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) } } #endif +} + +/* Net device start xmit */ +static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct tun_struct *tun = netdev_priv(dev); + int txq = skb->queue_mapping; + struct tun_file *tfile; + + rcu_read_lock(); + tfile = rcu_dereference(tun->tfiles[txq]); + + /* Drop packet if interface is not attached */ + if (txq >= tun->numqueues) + goto drop; + + if (!rcu_dereference(tun->steering_prog)) + tun_automq_xmit(tun, skb); tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); @@ -1477,6 +1529,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, xdp.data = buf + pad; xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + len; + xdp.rxq = &tfile->xdp_rxq; orig_data = xdp.data; act = bpf_prog_run_xdp(xdp_prog, &xdp); @@ -1551,7 +1604,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, int copylen; bool zerocopy = false; int err; - u32 rxhash; + u32 rxhash = 0; int skb_xdp = 1; bool frags = tun_napi_frags_enabled(tun); @@ -1739,7 +1792,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, rcu_read_unlock(); } - rxhash = __skb_get_hash_symmetric(skb); + rcu_read_lock(); + if (!rcu_dereference(tun->steering_prog)) + rxhash = __skb_get_hash_symmetric(skb); + rcu_read_unlock(); if (frags) { /* Exercise flow dissector code path. */ @@ -1783,7 +1839,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, u64_stats_update_end(&stats->syncp); put_cpu_ptr(stats); - tun_flow_update(tun, rxhash, tfile); + if (rxhash) + tun_flow_update(tun, rxhash, tfile); + return total_len; } @@ -1991,6 +2049,39 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) return ret; } +static void tun_steering_prog_free(struct rcu_head *rcu) +{ + struct tun_steering_prog *prog = container_of(rcu, + struct tun_steering_prog, rcu); + + bpf_prog_destroy(prog->prog); + kfree(prog); +} + +static int __tun_set_steering_ebpf(struct tun_struct *tun, + struct bpf_prog *prog) +{ + struct tun_steering_prog *old, *new = NULL; + + if (prog) { + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return -ENOMEM; + new->prog = prog; + } + + spin_lock_bh(&tun->lock); + old = rcu_dereference_protected(tun->steering_prog, + lockdep_is_held(&tun->lock)); + rcu_assign_pointer(tun->steering_prog, new); + spin_unlock_bh(&tun->lock); + + if (old) + call_rcu(&old->rcu, tun_steering_prog_free); + + return 0; +} + static void tun_free_netdev(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); @@ -1999,6 +2090,7 @@ static void tun_free_netdev(struct net_device *dev) free_percpu(tun->pcpu_stats); tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); + __tun_set_steering_ebpf(tun, NULL); } static void tun_setup(struct net_device *dev) @@ -2287,6 +2379,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun->filter_attached = false; tun->sndbuf = tfile->socket.sk->sk_sndbuf; tun->rx_batched = 0; + RCU_INIT_POINTER(tun->steering_prog, NULL); tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats); if (!tun->pcpu_stats) { @@ -2479,6 +2572,25 @@ unlock: return ret; } +static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data) +{ + struct bpf_prog *prog; + int fd; + + if (copy_from_user(&fd, data, sizeof(fd))) + return -EFAULT; + + if (fd == -1) { + prog = NULL; + } else { + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + + return __tun_set_steering_ebpf(tun, prog); +} + static long __tun_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg, int ifreq_len) { @@ -2755,6 +2867,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, ret = 0; break; + case TUNSETSTEERINGEBPF: + ret = tun_set_steering_ebpf(tun, argp); + break; + default: ret = -EINVAL; break; |