From d47effe1be0c4fc983306a9c704632e3a087eed8 Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Tue, 1 Mar 2011 17:06:37 +0530
Subject: vhost: Cleanup vhost.c and net.c

Minor cleanup of vhost.c and net.c to match coding style.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c   | 19 +++++++++++-------
 drivers/vhost/vhost.c | 53 +++++++++++++++++++++++++++++++++++----------------
 2 files changed, 49 insertions(+), 23 deletions(-)

(limited to 'drivers')

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f616cefc95ba..59dad9fe52dd 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -60,6 +60,7 @@ static int move_iovec_hdr(struct iovec *from, struct iovec *to,
 {
 	int seg = 0;
 	size_t size;
+
 	while (len && seg < iov_count) {
 		size = min(from->iov_len, len);
 		to->iov_base = from->iov_base;
@@ -79,6 +80,7 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
 {
 	int seg = 0;
 	size_t size;
+
 	while (len && seg < iovcount) {
 		size = min(from->iov_len, len);
 		to->iov_base = from->iov_base;
@@ -296,17 +298,16 @@ static void handle_rx_big(struct vhost_net *net)
 		.msg_iov = vq->iov,
 		.msg_flags = MSG_DONTWAIT,
 	};
-
 	struct virtio_net_hdr hdr = {
 		.flags = 0,
 		.gso_type = VIRTIO_NET_HDR_GSO_NONE
 	};
-
 	size_t len, total_len = 0;
 	int err;
 	size_t hdr_size;
 	/* TODO: check that we are running from vhost_worker? */
 	struct socket *sock = rcu_dereference_check(vq->private_data, 1);
+
 	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
 		return;
 
@@ -405,18 +406,17 @@ static void handle_rx_mergeable(struct vhost_net *net)
 		.msg_iov = vq->iov,
 		.msg_flags = MSG_DONTWAIT,
 	};
-
 	struct virtio_net_hdr_mrg_rxbuf hdr = {
 		.hdr.flags = 0,
 		.hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
 	};
-
 	size_t total_len = 0;
 	int err, headcount;
 	size_t vhost_hlen, sock_hlen;
 	size_t vhost_len, sock_len;
 	/* TODO: check that we are running from vhost_worker? */
 	struct socket *sock = rcu_dereference_check(vq->private_data, 1);
+
 	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
 		return;
 
@@ -654,6 +654,7 @@ static struct socket *get_raw_socket(int fd)
 	} uaddr;
 	int uaddr_len = sizeof uaddr, r;
 	struct socket *sock = sockfd_lookup(fd, &r);
+
 	if (!sock)
 		return ERR_PTR(-ENOTSOCK);
 
@@ -682,6 +683,7 @@ static struct socket *get_tap_socket(int fd)
 {
 	struct file *file = fget(fd);
 	struct socket *sock;
+
 	if (!file)
 		return ERR_PTR(-EBADF);
 	sock = tun_get_socket(file);
@@ -696,6 +698,7 @@ static struct socket *get_tap_socket(int fd)
 static struct socket *get_socket(int fd)
 {
 	struct socket *sock;
+
 	/* special case to disable backend */
 	if (fd == -1)
 		return NULL;
@@ -741,9 +744,9 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
 	oldsock = rcu_dereference_protected(vq->private_data,
 					    lockdep_is_held(&vq->mutex));
 	if (sock != oldsock) {
-                vhost_net_disable_vq(n, vq);
-                rcu_assign_pointer(vq->private_data, sock);
-                vhost_net_enable_vq(n, vq);
+		vhost_net_disable_vq(n, vq);
+		rcu_assign_pointer(vq->private_data, sock);
+		vhost_net_enable_vq(n, vq);
 	}
 
 	mutex_unlock(&vq->mutex);
@@ -768,6 +771,7 @@ static long vhost_net_reset_owner(struct vhost_net *n)
 	struct socket *tx_sock = NULL;
 	struct socket *rx_sock = NULL;
 	long err;
+
 	mutex_lock(&n->dev.mutex);
 	err = vhost_dev_check_owner(&n->dev);
 	if (err)
@@ -829,6 +833,7 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
 	struct vhost_vring_file backend;
 	u64 features;
 	int r;
+
 	switch (ioctl) {
 	case VHOST_NET_SET_BACKEND:
 		if (copy_from_user(&backend, argp, sizeof backend))
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index ade0568c07a4..b0cc7f8ca4de 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -41,8 +41,8 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
 			    poll_table *pt)
 {
 	struct vhost_poll *poll;
-	poll = container_of(pt, struct vhost_poll, table);
 
+	poll = container_of(pt, struct vhost_poll, table);
 	poll->wqh = wqh;
 	add_wait_queue(wqh, &poll->wait);
 }
@@ -85,6 +85,7 @@ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
 void vhost_poll_start(struct vhost_poll *poll, struct file *file)
 {
 	unsigned long mask;
+
 	mask = file->f_op->poll(file, &poll->table);
 	if (mask)
 		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
@@ -101,6 +102,7 @@ static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work,
 				unsigned seq)
 {
 	int left;
+
 	spin_lock_irq(&dev->work_lock);
 	left = seq - work->done_seq;
 	spin_unlock_irq(&dev->work_lock);
@@ -222,6 +224,7 @@ static int vhost_worker(void *data)
 static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
 {
 	int i;
+
 	for (i = 0; i < dev->nvqs; ++i) {
 		dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect *
 					       UIO_MAXIOV, GFP_KERNEL);
@@ -235,6 +238,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
 			goto err_nomem;
 	}
 	return 0;
+
 err_nomem:
 	for (; i >= 0; --i) {
 		kfree(dev->vqs[i].indirect);
@@ -247,6 +251,7 @@ err_nomem:
 static void vhost_dev_free_iovecs(struct vhost_dev *dev)
 {
 	int i;
+
 	for (i = 0; i < dev->nvqs; ++i) {
 		kfree(dev->vqs[i].indirect);
 		dev->vqs[i].indirect = NULL;
@@ -296,26 +301,28 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
 }
 
 struct vhost_attach_cgroups_struct {
-        struct vhost_work work;
-        struct task_struct *owner;
-        int ret;
+	struct vhost_work work;
+	struct task_struct *owner;
+	int ret;
 };
 
 static void vhost_attach_cgroups_work(struct vhost_work *work)
 {
-        struct vhost_attach_cgroups_struct *s;
-        s = container_of(work, struct vhost_attach_cgroups_struct, work);
-        s->ret = cgroup_attach_task_all(s->owner, current);
+	struct vhost_attach_cgroups_struct *s;
+
+	s = container_of(work, struct vhost_attach_cgroups_struct, work);
+	s->ret = cgroup_attach_task_all(s->owner, current);
 }
 
 static int vhost_attach_cgroups(struct vhost_dev *dev)
 {
-        struct vhost_attach_cgroups_struct attach;
-        attach.owner = current;
-        vhost_work_init(&attach.work, vhost_attach_cgroups_work);
-        vhost_work_queue(dev, &attach.work);
-        vhost_work_flush(dev, &attach.work);
-        return attach.ret;
+	struct vhost_attach_cgroups_struct attach;
+
+	attach.owner = current;
+	vhost_work_init(&attach.work, vhost_attach_cgroups_work);
+	vhost_work_queue(dev, &attach.work);
+	vhost_work_flush(dev, &attach.work);
+	return attach.ret;
 }
 
 /* Caller should have device mutex */
@@ -323,11 +330,13 @@ static long vhost_dev_set_owner(struct vhost_dev *dev)
 {
 	struct task_struct *worker;
 	int err;
+
 	/* Is there an owner already? */
 	if (dev->mm) {
 		err = -EBUSY;
 		goto err_mm;
 	}
+
 	/* No owner, become one */
 	dev->mm = get_task_mm(current);
 	worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid);
@@ -380,6 +389,7 @@ long vhost_dev_reset_owner(struct vhost_dev *dev)
 void vhost_dev_cleanup(struct vhost_dev *dev)
 {
 	int i;
+
 	for (i = 0; i < dev->nvqs; ++i) {
 		if (dev->vqs[i].kick && dev->vqs[i].handle_kick) {
 			vhost_poll_stop(&dev->vqs[i].poll);
@@ -421,6 +431,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
 static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
 {
 	u64 a = addr / VHOST_PAGE_SIZE / 8;
+
 	/* Make sure 64 bit math will not overflow. */
 	if (a > ULONG_MAX - (unsigned long)log_base ||
 	    a + (unsigned long)log_base > ULONG_MAX)
@@ -461,6 +472,7 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
 			    int log_all)
 {
 	int i;
+
 	for (i = 0; i < d->nvqs; ++i) {
 		int ok;
 		mutex_lock(&d->vqs[i].mutex);
@@ -527,6 +539,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
 {
 	struct vhost_memory mem, *newmem, *oldmem;
 	unsigned long size = offsetof(struct vhost_memory, regions);
+
 	if (copy_from_user(&mem, m, size))
 		return -EFAULT;
 	if (mem.padding)
@@ -544,7 +557,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
 		return -EFAULT;
 	}
 
-	if (!memory_access_ok(d, newmem, vhost_has_feature(d, VHOST_F_LOG_ALL))) {
+	if (!memory_access_ok(d, newmem,
+			      vhost_has_feature(d, VHOST_F_LOG_ALL))) {
 		kfree(newmem);
 		return -EFAULT;
 	}
@@ -560,6 +574,7 @@ static int init_used(struct vhost_virtqueue *vq,
 		     struct vring_used __user *used)
 {
 	int r = put_user(vq->used_flags, &used->flags);
+
 	if (r)
 		return r;
 	return get_user(vq->last_used_idx, &used->idx);
@@ -849,6 +864,7 @@ static const struct vhost_memory_region *find_region(struct vhost_memory *mem,
 {
 	struct vhost_memory_region *reg;
 	int i;
+
 	/* linear search is not brilliant, but we really have on the order of 6
 	 * regions in practice */
 	for (i = 0; i < mem->nregions; ++i) {
@@ -871,6 +887,7 @@ static int set_bit_to_user(int nr, void __user *addr)
 	void *base;
 	int bit = nr + (log % PAGE_SIZE) * 8;
 	int r;
+
 	r = get_user_pages_fast(log, 1, 1, &page);
 	if (r < 0)
 		return r;
@@ -888,6 +905,7 @@ static int log_write(void __user *log_base,
 {
 	u64 write_page = write_address / VHOST_PAGE_SIZE;
 	int r;
+
 	if (!write_length)
 		return 0;
 	write_length += write_address % VHOST_PAGE_SIZE;
@@ -1037,8 +1055,8 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 			       i, count);
 			return -EINVAL;
 		}
-		if (unlikely(memcpy_fromiovec((unsigned char *)&desc, vq->indirect,
-					      sizeof desc))) {
+		if (unlikely(memcpy_fromiovec((unsigned char *)&desc,
+					      vq->indirect, sizeof desc))) {
 			vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
 			       i, (size_t)indirect->addr + i * sizeof desc);
 			return -EINVAL;
@@ -1317,6 +1335,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
 void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 {
 	__u16 flags;
+
 	/* Flush out used index updates. This is paired
 	 * with the barrier that the Guest executes when enabling
 	 * interrupts. */
@@ -1361,6 +1380,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
 {
 	u16 avail_idx;
 	int r;
+
 	if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
 		return false;
 	vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
@@ -1387,6 +1407,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
 void vhost_disable_notify(struct vhost_virtqueue *vq)
 {
 	int r;
+
 	if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
 		return;
 	vq->used_flags |= VRING_USED_F_NO_NOTIFY;
-- 
cgit v1.2.3


From fcc042a2806064ffcaed7a0c5cb710eca0e99108 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 6 Mar 2011 13:33:49 +0200
Subject: vhost: copy_from_user -> __copy_from_user

copy_from_user is pretty high on perf top profile,
replacing it with __copy_from_user helps.
It's also safe because we do access_ok checks during setup.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/vhost.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index b0cc7f8ca4de..2ab291241635 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1171,7 +1171,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 			       i, vq->num, head);
 			return -EINVAL;
 		}
-		ret = copy_from_user(&desc, vq->desc + i, sizeof desc);
+		ret = __copy_from_user(&desc, vq->desc + i, sizeof desc);
 		if (unlikely(ret)) {
 			vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
 			       i, vq->desc + i);
-- 
cgit v1.2.3


From cfbdab951369f15de890597530076bf0119361be Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Mon, 17 Jan 2011 16:10:59 +0800
Subject: vhost-net: check the support of mergeable buffer outside the receive
 loop

No need to check the support of mergeable buffer inside the recevie
loop as the whole handle_rx()_xx is in the read critical region.  So
this patch move it ahead of the receiving loop.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 59dad9fe52dd..9f57cd45fe8f 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -411,7 +411,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
 		.hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
 	};
 	size_t total_len = 0;
-	int err, headcount;
+	int err, headcount, mergeable;
 	size_t vhost_hlen, sock_hlen;
 	size_t vhost_len, sock_len;
 	/* TODO: check that we are running from vhost_worker? */
@@ -427,6 +427,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
 
 	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
 		vq->log : NULL;
+	mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF);
 
 	while ((sock_len = peek_head_len(sock->sk))) {
 		sock_len += sock_hlen;
@@ -476,7 +477,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
 			break;
 		}
 		/* TODO: Should check and handle checksum. */
-		if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) &&
+		if (likely(mergeable) &&
 		    memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount,
 				      offsetof(typeof(hdr), num_buffers),
 				      sizeof hdr.num_buffers)) {
-- 
cgit v1.2.3


From 94249369e9930276e30087da205349a55478cbb5 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Mon, 17 Jan 2011 16:11:08 +0800
Subject: vhost-net: Unify the code of mergeable and big buffer handling

Codes duplication were found between the handling of mergeable and big
buffers, so this patch tries to unify them. This could be easily done
by adding a quota to the get_rx_bufs() which is used to limit the
number of buffers it returns (for mergeable buffer, the quota is
simply UIO_MAXIOV, for big buffers, the quota is just 1), and then the
previous handle_rx_mergeable() could be resued also for big buffers.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c | 128 +++-------------------------------------------------
 1 file changed, 7 insertions(+), 121 deletions(-)

(limited to 'drivers')

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9f57cd45fe8f..0329c411bbf1 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -229,6 +229,7 @@ static int peek_head_len(struct sock *sk)
  * @iovcount	- returned count of io vectors we fill
  * @log		- vhost log
  * @log_num	- log offset
+ * @quota       - headcount quota, 1 for big buffer
  *	returns number of buffer heads allocated, negative on error
  */
 static int get_rx_bufs(struct vhost_virtqueue *vq,
@@ -236,7 +237,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
 		       int datalen,
 		       unsigned *iovcount,
 		       struct vhost_log *log,
-		       unsigned *log_num)
+		       unsigned *log_num,
+		       unsigned int quota)
 {
 	unsigned int out, in;
 	int seg = 0;
@@ -244,7 +246,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
 	unsigned d;
 	int r, nlogs = 0;
 
-	while (datalen > 0) {
+	while (datalen > 0 && headcount < quota) {
 		if (unlikely(seg >= UIO_MAXIOV)) {
 			r = -ENOBUFS;
 			goto err;
@@ -284,116 +286,7 @@ err:
 
 /* Expects to be always run from workqueue - which acts as
  * read-size critical section for our kind of RCU. */
-static void handle_rx_big(struct vhost_net *net)
-{
-	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
-	unsigned out, in, log, s;
-	int head;
-	struct vhost_log *vq_log;
-	struct msghdr msg = {
-		.msg_name = NULL,
-		.msg_namelen = 0,
-		.msg_control = NULL, /* FIXME: get and handle RX aux data. */
-		.msg_controllen = 0,
-		.msg_iov = vq->iov,
-		.msg_flags = MSG_DONTWAIT,
-	};
-	struct virtio_net_hdr hdr = {
-		.flags = 0,
-		.gso_type = VIRTIO_NET_HDR_GSO_NONE
-	};
-	size_t len, total_len = 0;
-	int err;
-	size_t hdr_size;
-	/* TODO: check that we are running from vhost_worker? */
-	struct socket *sock = rcu_dereference_check(vq->private_data, 1);
-
-	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
-		return;
-
-	mutex_lock(&vq->mutex);
-	vhost_disable_notify(vq);
-	hdr_size = vq->vhost_hlen;
-
-	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
-		vq->log : NULL;
-
-	for (;;) {
-		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
-					 ARRAY_SIZE(vq->iov),
-					 &out, &in,
-					 vq_log, &log);
-		/* On error, stop handling until the next kick. */
-		if (unlikely(head < 0))
-			break;
-		/* OK, now we need to know about added descriptors. */
-		if (head == vq->num) {
-			if (unlikely(vhost_enable_notify(vq))) {
-				/* They have slipped one in as we were
-				 * doing that: check again. */
-				vhost_disable_notify(vq);
-				continue;
-			}
-			/* Nothing new?  Wait for eventfd to tell us
-			 * they refilled. */
-			break;
-		}
-		/* We don't need to be notified again. */
-		if (out) {
-			vq_err(vq, "Unexpected descriptor format for RX: "
-			       "out %d, int %d\n",
-			       out, in);
-			break;
-		}
-		/* Skip header. TODO: support TSO/mergeable rx buffers. */
-		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
-		msg.msg_iovlen = in;
-		len = iov_length(vq->iov, in);
-		/* Sanity check */
-		if (!len) {
-			vq_err(vq, "Unexpected header len for RX: "
-			       "%zd expected %zd\n",
-			       iov_length(vq->hdr, s), hdr_size);
-			break;
-		}
-		err = sock->ops->recvmsg(NULL, sock, &msg,
-					 len, MSG_DONTWAIT | MSG_TRUNC);
-		/* TODO: Check specific error and bomb out unless EAGAIN? */
-		if (err < 0) {
-			vhost_discard_vq_desc(vq, 1);
-			break;
-		}
-		/* TODO: Should check and handle checksum. */
-		if (err > len) {
-			pr_debug("Discarded truncated rx packet: "
-				 " len %d > %zd\n", err, len);
-			vhost_discard_vq_desc(vq, 1);
-			continue;
-		}
-		len = err;
-		err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
-		if (err) {
-			vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
-			       vq->iov->iov_base, err);
-			break;
-		}
-		len += hdr_size;
-		vhost_add_used_and_signal(&net->dev, vq, head, len);
-		if (unlikely(vq_log))
-			vhost_log_write(vq, vq_log, log, len);
-		total_len += len;
-		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
-			vhost_poll_queue(&vq->poll);
-			break;
-		}
-	}
-
-	mutex_unlock(&vq->mutex);
-}
-
-/* Expects to be always run from workqueue - which acts as
- * read-size critical section for our kind of RCU. */
-static void handle_rx_mergeable(struct vhost_net *net)
+static void handle_rx(struct vhost_net *net)
 {
 	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
 	unsigned uninitialized_var(in), log;
@@ -433,7 +326,8 @@ static void handle_rx_mergeable(struct vhost_net *net)
 		sock_len += sock_hlen;
 		vhost_len = sock_len + vhost_hlen;
 		headcount = get_rx_bufs(vq, vq->heads, vhost_len,
-					&in, vq_log, &log);
+					&in, vq_log, &log,
+					likely(mergeable) ? UIO_MAXIOV : 1);
 		/* On error, stop handling until the next kick. */
 		if (unlikely(headcount < 0))
 			break;
@@ -499,14 +393,6 @@ static void handle_rx_mergeable(struct vhost_net *net)
 	mutex_unlock(&vq->mutex);
 }
 
-static void handle_rx(struct vhost_net *net)
-{
-	if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF))
-		handle_rx_mergeable(net);
-	else
-		handle_rx_big(net);
-}
-
 static void handle_tx_kick(struct vhost_work *work)
 {
 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
-- 
cgit v1.2.3


From 783e3988544b94ff3918666b9f36866ac547fba1 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Mon, 17 Jan 2011 16:11:17 +0800
Subject: vhost: lock receive queue, not the socket

vhost takes a sock lock to try and prevent
the skb from being pulled from the receive queue
after skb_peek.  However this is not the right lock to use for that,
sk_receive_queue.lock is. Fix that up.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 0329c411bbf1..57203014c457 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -213,12 +213,13 @@ static int peek_head_len(struct sock *sk)
 {
 	struct sk_buff *head;
 	int len = 0;
+	unsigned long flags;
 
-	lock_sock(sk);
+	spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
 	head = skb_peek(&sk->sk_receive_queue);
-	if (head)
+	if (likely(head))
 		len = head->len;
-	release_sock(sk);
+	spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags);
 	return len;
 }
 
-- 
cgit v1.2.3


From de4d768a428d9de943dd6dc82bcd61742955cb6e Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 13 Mar 2011 23:00:52 +0200
Subject: vhost-net: remove unlocked use of receive_queue

Use of skb_queue_empty(&sock->sk->sk_receive_queue)
without taking the sk_receive_queue.lock is unsafe
or useless. Take it out.

Reported-by:  Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 57203014c457..2f7c76a85e53 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -311,7 +311,7 @@ static void handle_rx(struct vhost_net *net)
 	/* TODO: check that we are running from vhost_worker? */
 	struct socket *sock = rcu_dereference_check(vq->private_data, 1);
 
-	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
+	if (!sock)
 		return;
 
 	mutex_lock(&vq->mutex);
-- 
cgit v1.2.3


From 4363c2fddb1399b728ef21ee8101c148a311ea45 Mon Sep 17 00:00:00 2001
From: Alex Dubov <oakad@yahoo.com>
Date: Wed, 16 Mar 2011 17:57:13 +0000
Subject: gianfar: Fall back to software tcp/udp checksum on older controllers

As specified by errata eTSEC49 of MPC8548 and errata eTSEC12 of MPC83xx,
older revisions of gianfar controllers will be unable to calculate a TCP/UDP
packet checksum for some alignments of the appropriate FCB. This patch checks
for FCB alignment on such controllers and falls back to software checksumming
if the alignment is known to be bad.

Signed-off-by: Alex Dubov <oakad@yahoo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gianfar.c | 16 ++++++++++++++--
 drivers/net/gianfar.h |  1 +
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index ccb231c4d933..2a0ad9a501bb 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -949,6 +949,11 @@ static void gfar_detect_errata(struct gfar_private *priv)
 			(pvr == 0x80861010 && (mod & 0xfff9) == 0x80c0))
 		priv->errata |= GFAR_ERRATA_A002;
 
+	/* MPC8313 Rev < 2.0, MPC8548 rev 2.0 */
+	if ((pvr == 0x80850010 && mod == 0x80b0 && rev < 0x0020) ||
+			(pvr == 0x80210020 && mod == 0x8030 && rev == 0x0020))
+		priv->errata |= GFAR_ERRATA_12;
+
 	if (priv->errata)
 		dev_info(dev, "enabled errata workarounds, flags: 0x%x\n",
 			 priv->errata);
@@ -2154,8 +2159,15 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Set up checksumming */
 	if (CHECKSUM_PARTIAL == skb->ip_summed) {
 		fcb = gfar_add_fcb(skb);
-		lstatus |= BD_LFLAG(TXBD_TOE);
-		gfar_tx_checksum(skb, fcb);
+		/* as specified by errata */
+		if (unlikely(gfar_has_errata(priv, GFAR_ERRATA_12)
+			     && ((unsigned long)fcb % 0x20) > 0x18)) {
+			__skb_pull(skb, GMAC_FCB_LEN);
+			skb_checksum_help(skb);
+		} else {
+			lstatus |= BD_LFLAG(TXBD_TOE);
+			gfar_tx_checksum(skb, fcb);
+		}
 	}
 
 	if (vlan_tx_tag_present(skb)) {
diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h
index 54de4135e932..ec5d595ce2e2 100644
--- a/drivers/net/gianfar.h
+++ b/drivers/net/gianfar.h
@@ -1039,6 +1039,7 @@ enum gfar_errata {
 	GFAR_ERRATA_74		= 0x01,
 	GFAR_ERRATA_76		= 0x02,
 	GFAR_ERRATA_A002	= 0x04,
+	GFAR_ERRATA_12		= 0x08, /* a.k.a errata eTSEC49 */
 };
 
 /* Struct stolen almost completely (and shamelessly) from the FCC enet source
-- 
cgit v1.2.3


From 93d03203d5a165d7a757546245dd1543dfe0ff80 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Fri, 18 Mar 2011 21:53:03 -0700
Subject: ftmac100: use resource_size()

The calculation is off-by-one.  It should be "end - start + 1".  This
patch fixes it to use resource_size() instead.  Oddly, the code already
uses resource size correctly a couple lines earlier when it calls
request_mem_region() for this memory.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ftmac100.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/net/ftmac100.c b/drivers/net/ftmac100.c
index 1d6f4b8d393a..a31661948c42 100644
--- a/drivers/net/ftmac100.c
+++ b/drivers/net/ftmac100.c
@@ -1102,7 +1102,7 @@ static int ftmac100_probe(struct platform_device *pdev)
 		goto err_req_mem;
 	}
 
-	priv->base = ioremap(res->start, res->end - res->start);
+	priv->base = ioremap(res->start, resource_size(res));
 	if (!priv->base) {
 		dev_err(&pdev->dev, "Failed to ioremap ethernet registers\n");
 		err = -EIO;
-- 
cgit v1.2.3


From dadaa10b077133e5c03333131b82ecb13679af2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolas=20de=20Peslo=C3=BCan?= <nicolas.2p.debian@free.fr>
Date: Sat, 19 Mar 2011 13:36:18 -0700
Subject: bonding: fix a typo in a comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Nicolas de Pesloüan <nicolas.2p.debian@free.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1a6e9eb7af43..338bea147c64 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2130,7 +2130,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 }
 
 /*
-* First release a slave and than destroy the bond if no more slaves are left.
+* First release a slave and then destroy the bond if no more slaves are left.
 * Must be under rtnl_lock when this function is called.
 */
 static int  bond_release_and_destroy(struct net_device *bond_dev,
-- 
cgit v1.2.3


From b26fa4e0275426450238a14158bc1db24bb696e6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 19 Mar 2011 05:39:11 +0000
Subject: r8169: fix a bug in rtl8169_init_phy()

commit 54405cde7624 (r8169: support control of advertising.)
introduced a bug in rtl8169_init_phy()

Reported-by: Piotr Hosowicz <piotr@hosowicz.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Oliver Neukum <oliver@neukum.org>
Cc: Francois Romieu <romieu@fr.zoreil.com>
Tested-by: Anca Emanuel <anca.emanuel@gmail.com>
Tested-by: Piotr Hosowicz <piotr@hosowicz.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/r8169.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 5e403511289d..493b0de3848b 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -2685,9 +2685,9 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 	rtl8169_set_speed(dev, AUTONEG_ENABLE, SPEED_1000, DUPLEX_FULL,
 		ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full |
 		ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full |
-		tp->mii.supports_gmii ?
+		(tp->mii.supports_gmii ?
 			ADVERTISED_1000baseT_Half |
-			ADVERTISED_1000baseT_Full : 0);
+			ADVERTISED_1000baseT_Full : 0));
 
 	if (RTL_R8(PHYstatus) & TBI_Enable)
 		netif_info(tp, link, dev, "TBI auto-negotiating\n");
-- 
cgit v1.2.3


From a769f4968396093d5cc1b1a86204cef579784b24 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 19 Mar 2011 23:06:33 -0700
Subject: niu: Rename NIU parent platform device name to fix conflict.

When the OF device driver bits were converted over to the platform
device infrastructure in commit 74888760d40b3ac9054f9c5fa07b566c0676ba2d
("dt/net: Eliminate users of of_platform_{,un}register_driver") we
inadvertantly created probing problems in the OF case.

The NIU driver creates a dummy platform device to represent the
board that contains one or more child NIU devices.  Unfortunately
we use the same name, "niu", as the OF device driver itself uses.

The result is that we try to probe the dummy "niu" parent device we
create, and since it has a NULL ofdevice pointer etc. everything
explodes:

[783019.128243] niu: niu.c:v1.1 (Apr 22, 2010)
[783019.128810] Unable to handle kernel NULL pointer dereference
[783019.128949] tsk->{mm,active_mm}->context = 000000000000039e
[783019.129078] tsk->{mm,active_mm}->pgd = fffff803afc5a000
[783019.129206]               \|/ ____ \|/
[783019.129213]               "@'/ .. \`@"
[783019.129220]               /_| \__/ |_\
[783019.129226]                  \__U_/
[783019.129378] modprobe(2004): Oops [#1]
[783019.129423] TSTATE: 0000000011001602 TPC: 0000000010052ff8 TNPC: 000000000061bbb4 Y: 00000000    Not tainted
[783019.129542] TPC: <niu_of_probe+0x3c/0x2dc [niu]>
[783019.129624] g0: 8080000000000000 g1: 0000000000000000 g2: 0000000010056000 g3: 0000000000000002
[783019.129733] g4: fffff803fc1da0c0 g5: fffff800441e2000 g6: fffff803fba84000 g7: 0000000000000000
[783019.129842] o0: fffff803fe7df010 o1: 0000000010055700 o2: 0000000000000000 o3: fffff803fbacaca0
[783019.129951] o4: 0000000000000080 o5: 0000000000777908 sp: fffff803fba866e1 ret_pc: 0000000010052ff4
[783019.130083] RPC: <niu_of_probe+0x38/0x2dc [niu]>
[783019.130165] l0: fffff803fe7df010 l1: fffff803fbacafc0 l2: fffff803fbacaca0 l3: ffffffffffffffed
[783019.130273] l4: 0000000000000000 l5: 000000007fffffff l6: fffff803fba86f40 l7: 0000000000000001
[783019.130382] i0: fffff803fe7df000 i1: fffff803fc20aba0 i2: 0000000000000000 i3: 0000000000000001
[783019.130490] i4: 0000000000000000 i5: 0000000000000000 i6: fffff803fba867a1 i7: 000000000062038c
[783019.130614] I7: <platform_drv_probe+0xc/0x20>

Fix by simply renaming the parent device to "niu-board".

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/niu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 40fa59e2fd5c..32678b6c6b39 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -9501,7 +9501,7 @@ static struct niu_parent * __devinit niu_new_parent(struct niu *np,
 	struct niu_parent *p;
 	int i;
 
-	plat_dev = platform_device_register_simple("niu", niu_parent_index,
+	plat_dev = platform_device_register_simple("niu-board", niu_parent_index,
 						   NULL, 0);
 	if (IS_ERR(plat_dev))
 		return NULL;
-- 
cgit v1.2.3


From 4f2d56c45fec7c15169599cab05e9f6df18769d0 Mon Sep 17 00:00:00 2001
From: Jan Altenberg <jan@linutronix.de>
Date: Mon, 21 Mar 2011 18:19:26 -0700
Subject: can: c_can: Do basic c_can configuration _before_ enabling the
 interrupts

I ran into some trouble while testing the SocketCAN driver for the BOSCH
C_CAN controller. The interface is not correctly initialized, if I put
some CAN traffic on the line, _while_ the interface is being started
(which means: the interface doesn't come up correcty, if there's some RX
traffic while doing 'ifconfig can0 up').

The current implementation enables the controller interrupts _before_
doing the basic c_can configuration. I think, this should be done the
other way round.

The patch below fixes things for me.

Signed-off-by: Jan Altenberg <jan@linutronix.de>
Acked-by: Kurt Van Dijck <kurt.van.dijck@eia.be>
Acked-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/c_can/c_can.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 14050786218a..110eda01843c 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -633,9 +633,6 @@ static void c_can_start(struct net_device *dev)
 {
 	struct c_can_priv *priv = netdev_priv(dev);
 
-	/* enable status change, error and module interrupts */
-	c_can_enable_all_interrupts(priv, ENABLE_ALL_INTERRUPTS);
-
 	/* basic c_can configuration */
 	c_can_chip_config(dev);
 
@@ -643,6 +640,9 @@ static void c_can_start(struct net_device *dev)
 
 	/* reset tx helper pointers */
 	priv->tx_next = priv->tx_echo = 0;
+
+	/* enable status change, error and module interrupts */
+	c_can_enable_all_interrupts(priv, ENABLE_ALL_INTERRUPTS);
 }
 
 static void c_can_stop(struct net_device *dev)
-- 
cgit v1.2.3


From d5cd92448fded12c91f7574e49747c5f7d975a8d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Mon, 21 Mar 2011 18:22:22 -0700
Subject: macvlan: Fix use after free of struct macvlan_port.

When the macvlan driver was extended to call unregisgter_netdevice_queue
in 23289a37e2b127dfc4de1313fba15bb4c9f0cd5b, a use after free of struct
macvlan_port was introduced.  The code in dellink relied on unregister_netdevice
actually unregistering the net device so it would be safe to free macvlan_port.

Since unregister_netdevice_queue can just queue up the unregister instead of
performing the unregiser immediately we free the macvlan_port too soon and
then the code in macvlan_stop removes the macaddress for the set of macaddress
to listen for and uses memory that has already been freed.

To fix this add a reference count to track when it is safe to free the macvlan_port
and move the call of macvlan_port_destroy into macvlan_uninit which is guaranteed
to be called after the final macvlan_port_close.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 5b37d3c191e4..78e34e9e4f00 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -39,8 +39,11 @@ struct macvlan_port {
 	struct list_head	vlans;
 	struct rcu_head		rcu;
 	bool 			passthru;
+	int			count;
 };
 
+static void macvlan_port_destroy(struct net_device *dev);
+
 #define macvlan_port_get_rcu(dev) \
 	((struct macvlan_port *) rcu_dereference(dev->rx_handler_data))
 #define macvlan_port_get(dev) ((struct macvlan_port *) dev->rx_handler_data)
@@ -457,8 +460,13 @@ static int macvlan_init(struct net_device *dev)
 static void macvlan_uninit(struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct macvlan_port *port = vlan->port;
 
 	free_percpu(vlan->pcpu_stats);
+
+	port->count -= 1;
+	if (!port->count)
+		macvlan_port_destroy(port->dev);
 }
 
 static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
@@ -691,12 +699,13 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 		vlan->mode = nla_get_u32(data[IFLA_MACVLAN_MODE]);
 
 	if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
-		if (!list_empty(&port->vlans))
+		if (port->count)
 			return -EINVAL;
 		port->passthru = true;
 		memcpy(dev->dev_addr, lowerdev->dev_addr, ETH_ALEN);
 	}
 
+	port->count += 1;
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto destroy_port;
@@ -707,7 +716,8 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	return 0;
 
 destroy_port:
-	if (list_empty(&port->vlans))
+	port->count -= 1;
+	if (!port->count)
 		macvlan_port_destroy(lowerdev);
 
 	return err;
@@ -725,13 +735,9 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev,
 void macvlan_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
-	struct macvlan_port *port = vlan->port;
 
 	list_del(&vlan->list);
 	unregister_netdevice_queue(dev, head);
-
-	if (list_empty(&port->vlans))
-		macvlan_port_destroy(port->dev);
 }
 EXPORT_SYMBOL_GPL(macvlan_dellink);
 
-- 
cgit v1.2.3


From 675071a2ef3f4a6d25ee002a7437d50431168344 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Mon, 21 Mar 2011 18:24:53 -0700
Subject: veth: Fix the byte counters

Commit 44540960 "veth: move loopback logic to common location" introduced
a bug in the packet counters.  I don't understand why that happened as it
is not explained in the comments and the mut check in dev_forward_skb
retains the assumption that skb->len is the total length of the packet.

I just measured this emperically by setting up a veth pair between two
noop network namespaces setting and attempting a telnet connection between
the two.  I saw three packets in each direction and the byte counters were
exactly 14*3 = 42 bytes high in each direction.  I got the actual
packet lengths with tcpdump.

So remove the extra ETH_HLEN from the veth byte count totals.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/veth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 105d7f0630cc..2de9b90c5f8f 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -171,7 +171,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (skb->ip_summed == CHECKSUM_NONE)
 		skb->ip_summed = rcv_priv->ip_summed;
 
-	length = skb->len + ETH_HLEN;
+	length = skb->len;
 	if (dev_forward_skb(rcv, skb) != NET_RX_SUCCESS)
 		goto rx_drop;
 
-- 
cgit v1.2.3