From 2b5bbe3b8bee8b38bdc27dd9c0270829b6eb7eeb Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:59:17 -0600 Subject: virtio_net: skb_orphan() and nf_reset() in xmit path. The complex transmit free logic was introduced to avoid hangs on removing the ip_conntrack module and also because drivers aren't generally supposed to keep stale skbs for unbounded times. After some debate, it was decided that while doing skb_orphan() generally is a rat's nest, we can do it in this driver. Following patches take advantage of this. Signed-off-by: Rusty Russell --- drivers/net/virtio_net.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5c498d2b043f..dc4c68718976 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -528,8 +528,12 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); - if (err >= 0 && !vi->free_in_tasklet) + if (err >= 0 && !vi->free_in_tasklet) { + /* Don't wait up for transmitted skbs to be freed. */ + skb_orphan(skb); + nf_reset(skb); mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10)); + } return err; } -- cgit v1.2.3 From 8958f574dbe7e41cc54df0df1accc861bb9f6be8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:59:18 -0600 Subject: virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb. This effectively reverts 99ffc696d10b28580fe93441d627cf290ac4484c "virtio: wean net driver off NETDEV_TX_BUSY". The complexity of queuing an skb (setting a tasklet to re-xmit) is questionable, especially once we get rid of the other reason for the tasklet in the next patch. If the skb won't fit in the tx queue, just return NETDEV_TX_BUSY. This is frowned upon, so a followup patch uses a more complex solution. Signed-off-by: Rusty Russell Cc: Herbert Xu --- drivers/net/virtio_net.c | 46 ++++++++++------------------------------------ 1 file changed, 10 insertions(+), 36 deletions(-) (limited to 'drivers') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index dc4c68718976..222f3d098ae4 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -48,9 +48,6 @@ struct virtnet_info struct napi_struct napi; unsigned int status; - /* The skb we couldn't send because buffers were full. */ - struct sk_buff *last_xmit_skb; - /* If we need to free in a timer, this is it. */ struct timer_list xmit_free_timer; @@ -120,9 +117,8 @@ static void skb_xmit_done(struct virtqueue *svq) /* We were probably waiting for more output buffers. */ netif_wake_queue(vi->dev); - /* Make sure we re-xmit last_xmit_skb: if there are no more packets - * queued, start_xmit won't be called. */ - tasklet_schedule(&vi->tasklet); + if (vi->free_in_tasklet) + tasklet_schedule(&vi->tasklet); } static void receive_skb(struct net_device *dev, struct sk_buff *skb, @@ -543,12 +539,7 @@ static void xmit_tasklet(unsigned long data) struct virtnet_info *vi = (void *)data; netif_tx_lock_bh(vi->dev); - if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) >= 0) { - vi->svq->vq_ops->kick(vi->svq); - vi->last_xmit_skb = NULL; - } - if (vi->free_in_tasklet) - free_old_xmit_skbs(vi); + free_old_xmit_skbs(vi); netif_tx_unlock_bh(vi->dev); } @@ -560,28 +551,16 @@ again: /* Free up any pending old buffers before queueing new ones. */ free_old_xmit_skbs(vi); - /* If we has a buffer left over from last time, send it now. */ - if (unlikely(vi->last_xmit_skb) && - xmit_skb(vi, vi->last_xmit_skb) < 0) - goto stop_queue; - - vi->last_xmit_skb = NULL; - /* Put new one in send queue and do transmit */ - if (likely(skb)) { - __skb_queue_head(&vi->send, skb); - if (xmit_skb(vi, skb) < 0) { - vi->last_xmit_skb = skb; - skb = NULL; - goto stop_queue; - } + __skb_queue_head(&vi->send, skb); + if (likely(xmit_skb(vi, skb) >= 0)) { + vi->svq->vq_ops->kick(vi->svq); + return NETDEV_TX_OK; } -done: - vi->svq->vq_ops->kick(vi->svq); - return NETDEV_TX_OK; -stop_queue: + /* Ring too full for this packet, remove it from queue again. */ pr_debug("%s: virtio not prepared to send\n", dev->name); + __skb_unlink(skb, &vi->send); netif_stop_queue(dev); /* Activate callback for using skbs: if this returns false it @@ -591,12 +570,7 @@ stop_queue: netif_start_queue(dev); goto again; } - if (skb) { - /* Drop this skb: we only queue one. */ - vi->dev->stats.tx_dropped++; - kfree_skb(skb); - } - goto done; + return NETDEV_TX_BUSY; } static int virtnet_set_mac_address(struct net_device *dev, void *p) -- cgit v1.2.3 From b0c39dbdc204006ef3558a66716ff09797619778 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:59:19 -0600 Subject: virtio_net: don't free buffers in xmit ring The virtio_net driver is complicated by the two methods of freeing old xmit buffers (in addition to freeing old ones at the start of the xmit path). The original code used a 1/10 second timer attached to xmit_free(), reset on every xmit. Before we orphaned skbs on xmit, the transmitting userspace could block with a full socket until the timer fired, the skb destructor was called, and they were re-woken. So we added the VIRTIO_F_NOTIFY_ON_EMPTY feature: supporting devices send an interrupt (even if normally suppressed) on an empty xmit ring which makes us schedule xmit_tasklet(). This was a benchmark win. Unfortunately, VIRTIO_F_NOTIFY_ON_EMPTY makes quite a lot of work: a host which is faster than the guest will fire the interrupt every xmit packet (slowing the guest down further). Attempting mitigation in the host adds overhead of userspace timers (possibly with the additional pain of signals), and risks increasing latency anyway if you get it wrong. In practice, this effect was masked by benchmarks which take advantage of GSO (with its inherent transmit batching), but it's still there. Now we orphan xmitted skbs, the pressure is off: remove both paths and no longer request VIRTIO_F_NOTIFY_ON_EMPTY. Note that the current QEMU will notify us even if we don't negotiate this feature (legal, but suboptimal); a patch is outstanding to improve that. Move the skb_orphan/nf_reset to after we've done the send and notified the other end, for a slight optimization. Signed-off-by: Rusty Russell Cc: Mark McLoughlin --- drivers/net/virtio_net.c | 64 ++++-------------------------------------------- 1 file changed, 5 insertions(+), 59 deletions(-) (limited to 'drivers') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 222f3d098ae4..3041e4eddb3b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -48,16 +48,9 @@ struct virtnet_info struct napi_struct napi; unsigned int status; - /* If we need to free in a timer, this is it. */ - struct timer_list xmit_free_timer; - /* Number of input buffers, and max we've ever had. */ unsigned int num, max; - /* For cleaning up after transmission. */ - struct tasklet_struct tasklet; - bool free_in_tasklet; - /* I like... big packets and I cannot lie! */ bool big_packets; @@ -116,9 +109,6 @@ static void skb_xmit_done(struct virtqueue *svq) /* We were probably waiting for more output buffers. */ netif_wake_queue(vi->dev); - - if (vi->free_in_tasklet) - tasklet_schedule(&vi->tasklet); } static void receive_skb(struct net_device *dev, struct sk_buff *skb, @@ -458,25 +448,9 @@ static void free_old_xmit_skbs(struct virtnet_info *vi) } } -/* If the virtio transport doesn't always notify us when all in-flight packets - * are consumed, we fall back to using this function on a timer to free them. */ -static void xmit_free(unsigned long data) -{ - struct virtnet_info *vi = (void *)data; - - netif_tx_lock(vi->dev); - - free_old_xmit_skbs(vi); - - if (!skb_queue_empty(&vi->send)) - mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10)); - - netif_tx_unlock(vi->dev); -} - static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) { - int num, err; + int num; struct scatterlist sg[2+MAX_SKB_FRAGS]; struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); @@ -522,25 +496,7 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) sg_set_buf(sg, hdr, sizeof(*hdr)); num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; - - err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); - if (err >= 0 && !vi->free_in_tasklet) { - /* Don't wait up for transmitted skbs to be freed. */ - skb_orphan(skb); - nf_reset(skb); - mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10)); - } - - return err; -} - -static void xmit_tasklet(unsigned long data) -{ - struct virtnet_info *vi = (void *)data; - - netif_tx_lock_bh(vi->dev); - free_old_xmit_skbs(vi); - netif_tx_unlock_bh(vi->dev); + return vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); } static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -555,6 +511,9 @@ again: __skb_queue_head(&vi->send, skb); if (likely(xmit_skb(vi, skb) >= 0)) { vi->svq->vq_ops->kick(vi->svq); + /* Don't wait up for transmitted skbs to be freed. */ + skb_orphan(skb); + nf_reset(skb); return NETDEV_TX_OK; } @@ -903,10 +862,6 @@ static int virtnet_probe(struct virtio_device *vdev) vi->pages = NULL; INIT_DELAYED_WORK(&vi->refill, refill_work); - /* If they give us a callback when all buffers are done, we don't need - * the timer. */ - vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY); - /* If we can receive ANY GSO packets, we must allocate large ones. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) @@ -938,11 +893,6 @@ static int virtnet_probe(struct virtio_device *vdev) skb_queue_head_init(&vi->recv); skb_queue_head_init(&vi->send); - tasklet_init(&vi->tasklet, xmit_tasklet, (unsigned long)vi); - - if (!vi->free_in_tasklet) - setup_timer(&vi->xmit_free_timer, xmit_free, (unsigned long)vi); - err = register_netdev(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); @@ -983,9 +933,6 @@ static void virtnet_remove(struct virtio_device *vdev) /* Stop all the virtqueues. */ vdev->config->reset(vdev); - if (!vi->free_in_tasklet) - del_timer_sync(&vi->xmit_free_timer); - /* Free our skbs in send and recv queues, if any. */ while ((skb = __skb_dequeue(&vi->recv)) != NULL) { kfree_skb(skb); @@ -1019,7 +966,6 @@ static unsigned int features[] = { VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, - VIRTIO_F_NOTIFY_ON_EMPTY, }; static struct virtio_driver virtio_net = { -- cgit v1.2.3 From b3f24698a7faa6e9d8a14124cfdc25353fc8ca19 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:59:19 -0600 Subject: virtio_net: formalize skb_vnet_hdr We put the virtio_net_hdr into the skb's cb region; turn this into a union to clean up the code slightly and allow future expansion. Signed-off-by: Rusty Russell Cc: Mark McLoughlin Cc: Dinesh Subhraveti --- drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 37 deletions(-) (limited to 'drivers') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 3041e4eddb3b..420388a4c5e8 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -68,9 +68,16 @@ struct virtnet_info struct page *pages; }; -static inline void *skb_vnet_hdr(struct sk_buff *skb) +struct skb_vnet_hdr { + union { + struct virtio_net_hdr hdr; + struct virtio_net_hdr_mrg_rxbuf mhdr; + }; +}; + +static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb) { - return (struct virtio_net_hdr *)skb->cb; + return (struct skb_vnet_hdr *)skb->cb; } static void give_a_page(struct virtnet_info *vi, struct page *page) @@ -115,7 +122,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, unsigned len) { struct virtnet_info *vi = netdev_priv(dev); - struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); + struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); int err; int i; @@ -126,7 +133,6 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, } if (vi->mergeable_rx_bufs) { - struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); unsigned int copy; char *p = page_address(skb_shinfo(skb)->frags[0].page); @@ -134,8 +140,8 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, len = PAGE_SIZE; len -= sizeof(struct virtio_net_hdr_mrg_rxbuf); - memcpy(hdr, p, sizeof(*mhdr)); - p += sizeof(*mhdr); + memcpy(&hdr->mhdr, p, sizeof(hdr->mhdr)); + p += sizeof(hdr->mhdr); copy = len; if (copy > skb_tailroom(skb)) @@ -150,13 +156,13 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, skb_shinfo(skb)->nr_frags--; } else { skb_shinfo(skb)->frags[0].page_offset += - sizeof(*mhdr) + copy; + sizeof(hdr->mhdr) + copy; skb_shinfo(skb)->frags[0].size = len; skb->data_len += len; skb->len += len; } - while (--mhdr->num_buffers) { + while (--hdr->mhdr.num_buffers) { struct sk_buff *nskb; i = skb_shinfo(skb)->nr_frags; @@ -170,7 +176,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, nskb = vi->rvq->vq_ops->get_buf(vi->rvq, &len); if (!nskb) { pr_debug("%s: rx error: %d buffers missing\n", - dev->name, mhdr->num_buffers); + dev->name, hdr->mhdr.num_buffers); dev->stats.rx_length_errors++; goto drop; } @@ -191,7 +197,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, skb->len += len; } } else { - len -= sizeof(struct virtio_net_hdr); + len -= sizeof(hdr->hdr); if (len <= MAX_PACKET_LEN) trim_pages(vi, skb); @@ -209,9 +215,11 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, dev->stats.rx_bytes += skb->len; dev->stats.rx_packets++; - if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { pr_debug("Needs csum!\n"); - if (!skb_partial_csum_set(skb,hdr->csum_start,hdr->csum_offset)) + if (!skb_partial_csum_set(skb, + hdr->hdr.csum_start, + hdr->hdr.csum_offset)) goto frame_err; } @@ -219,9 +227,9 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, pr_debug("Receiving skb proto 0x%04x len %i type %i\n", ntohs(skb->protocol), skb->len, skb->pkt_type); - if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { pr_debug("GSO!\n"); - switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; break; @@ -234,14 +242,14 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, default: if (net_ratelimit()) printk(KERN_WARNING "%s: bad gso type %u.\n", - dev->name, hdr->gso_type); + dev->name, hdr->hdr.gso_type); goto frame_err; } - if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) + if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - skb_shinfo(skb)->gso_size = hdr->gso_size; + skb_shinfo(skb)->gso_size = hdr->hdr.gso_size; if (skb_shinfo(skb)->gso_size == 0) { if (net_ratelimit()) printk(KERN_WARNING "%s: zero gso size.\n", @@ -272,7 +280,7 @@ static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp) sg_init_table(sg, 2+MAX_SKB_FRAGS); for (;;) { - struct virtio_net_hdr *hdr; + struct skb_vnet_hdr *hdr; skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN + NET_IP_ALIGN); if (unlikely(!skb)) { @@ -284,7 +292,7 @@ static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp) skb_put(skb, MAX_PACKET_LEN); hdr = skb_vnet_hdr(skb); - sg_set_buf(sg, hdr, sizeof(*hdr)); + sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr)); if (vi->big_packets) { for (i = 0; i < MAX_SKB_FRAGS; i++) { @@ -452,8 +460,7 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) { int num; struct scatterlist sg[2+MAX_SKB_FRAGS]; - struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); - struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); + struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; sg_init_table(sg, 2+MAX_SKB_FRAGS); @@ -461,39 +468,39 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); if (skb->ip_summed == CHECKSUM_PARTIAL) { - hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - hdr->csum_start = skb->csum_start - skb_headroom(skb); - hdr->csum_offset = skb->csum_offset; + hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + hdr->hdr.csum_start = skb->csum_start - skb_headroom(skb); + hdr->hdr.csum_offset = skb->csum_offset; } else { - hdr->flags = 0; - hdr->csum_offset = hdr->csum_start = 0; + hdr->hdr.flags = 0; + hdr->hdr.csum_offset = hdr->hdr.csum_start = 0; } if (skb_is_gso(skb)) { - hdr->hdr_len = skb_headlen(skb); - hdr->gso_size = skb_shinfo(skb)->gso_size; + hdr->hdr.hdr_len = skb_headlen(skb); + hdr->hdr.gso_size = skb_shinfo(skb)->gso_size; if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) - hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; + hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) - hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; + hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP; else BUG(); if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN) - hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; + hdr->hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN; } else { - hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; - hdr->gso_size = hdr->hdr_len = 0; + hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE; + hdr->hdr.gso_size = hdr->hdr.hdr_len = 0; } - mhdr->num_buffers = 0; + hdr->mhdr.num_buffers = 0; /* Encode metadata header at front. */ if (vi->mergeable_rx_bufs) - sg_set_buf(sg, mhdr, sizeof(*mhdr)); + sg_set_buf(sg, &hdr->mhdr, sizeof(hdr->mhdr)); else - sg_set_buf(sg, hdr, sizeof(*hdr)); + sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr)); num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; return vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); -- cgit v1.2.3 From 48925e372f04f5e35fec6269127c62b2c71ab794 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:59:20 -0600 Subject: virtio_net: avoid (most) NETDEV_TX_BUSY by stopping queue early. Now we can tell the theoretical capacity remaining in the output queue, virtio_net can waste entries by stopping the queue early. It doesn't work in the case of indirect buffers and kmalloc failure, but that's rare (we could drop the packet in that case, but other drivers return TX_BUSY for similar reasons). For the record, I think this patch reflects poorly on the linux network API. Signed-off-by: Rusty Russell Cc: Dinesh Subhraveti --- drivers/net/virtio_net.c | 64 ++++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 24 deletions(-) (limited to 'drivers') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 420388a4c5e8..effe8c685f77 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1,4 +1,4 @@ -/* A simple network driver using virtio. +/* A network driver using virtio. * * Copyright 2007 Rusty Russell IBM Corporation * @@ -73,6 +73,7 @@ struct skb_vnet_hdr { struct virtio_net_hdr hdr; struct virtio_net_hdr_mrg_rxbuf mhdr; }; + unsigned int num_sg; }; static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb) @@ -442,23 +443,24 @@ again: return received; } -static void free_old_xmit_skbs(struct virtnet_info *vi) +static unsigned int free_old_xmit_skbs(struct virtnet_info *vi) { struct sk_buff *skb; - unsigned int len; + unsigned int len, tot_sgs = 0; while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) { pr_debug("Sent skb %p\n", skb); __skb_unlink(skb, &vi->send); vi->dev->stats.tx_bytes += skb->len; vi->dev->stats.tx_packets++; + tot_sgs += skb_vnet_hdr(skb)->num_sg; kfree_skb(skb); } + return tot_sgs; } static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) { - int num; struct scatterlist sg[2+MAX_SKB_FRAGS]; struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; @@ -502,13 +504,14 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) else sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr)); - num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; - return vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); + hdr->num_sg = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; + return vi->svq->vq_ops->add_buf(vi->svq, sg, hdr->num_sg, 0, skb); } static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); + int capacity; again: /* Free up any pending old buffers before queueing new ones. */ @@ -516,27 +519,40 @@ again: /* Put new one in send queue and do transmit */ __skb_queue_head(&vi->send, skb); - if (likely(xmit_skb(vi, skb) >= 0)) { - vi->svq->vq_ops->kick(vi->svq); - /* Don't wait up for transmitted skbs to be freed. */ - skb_orphan(skb); - nf_reset(skb); - return NETDEV_TX_OK; + capacity = xmit_skb(vi, skb); + + /* This can happen with OOM and indirect buffers. */ + if (unlikely(capacity < 0)) { + netif_stop_queue(dev); + dev_warn(&dev->dev, "Unexpected full queue\n"); + if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { + vi->svq->vq_ops->disable_cb(vi->svq); + netif_start_queue(dev); + goto again; + } + return NETDEV_TX_BUSY; } - /* Ring too full for this packet, remove it from queue again. */ - pr_debug("%s: virtio not prepared to send\n", dev->name); - __skb_unlink(skb, &vi->send); - netif_stop_queue(dev); - - /* Activate callback for using skbs: if this returns false it - * means some were used in the meantime. */ - if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { - vi->svq->vq_ops->disable_cb(vi->svq); - netif_start_queue(dev); - goto again; + vi->svq->vq_ops->kick(vi->svq); + /* Don't wait up for transmitted skbs to be freed. */ + skb_orphan(skb); + nf_reset(skb); + + /* Apparently nice girls don't return TX_BUSY; stop the queue + * before it gets out of hand. Naturally, this wastes entries. */ + if (capacity < 2+MAX_SKB_FRAGS) { + netif_stop_queue(dev); + if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { + /* More just got used, free them then recheck. */ + capacity += free_old_xmit_skbs(vi); + if (capacity >= 2+MAX_SKB_FRAGS) { + netif_start_queue(dev); + vi->svq->vq_ops->disable_cb(vi->svq); + } + } } - return NETDEV_TX_BUSY; + + return NETDEV_TX_OK; } static int virtnet_set_mac_address(struct net_device *dev, void *p) -- cgit v1.2.3 From 0aea51c37fc5868cd723f670af9056c2ef694fee Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Wed, 26 Aug 2009 14:58:28 +0530 Subject: virtio_net: Check for room in the vq before adding buffer Saves us one cycle of alloc-add-free if the queue was full. Signed-off-by: Amit Shah Signed-off-by: Rusty Russell (modified) --- drivers/net/virtio_net.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index effe8c685f77..d445845f2779 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -280,7 +280,7 @@ static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp) bool oom = false; sg_init_table(sg, 2+MAX_SKB_FRAGS); - for (;;) { + do { struct skb_vnet_hdr *hdr; skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN + NET_IP_ALIGN); @@ -323,7 +323,7 @@ static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp) break; } vi->num++; - } + } while (err >= num); if (unlikely(vi->num > vi->max)) vi->max = vi->num; vi->rvq->vq_ops->kick(vi->rvq); @@ -341,7 +341,7 @@ static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp) if (!vi->mergeable_rx_bufs) return try_fill_recv_maxbufs(vi, gfp); - for (;;) { + do { skb_frag_t *f; skb = netdev_alloc_skb(vi->dev, GOOD_COPY_LEN + NET_IP_ALIGN); @@ -375,7 +375,7 @@ static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp) break; } vi->num++; - } + } while (err > 0); if (unlikely(vi->num > vi->max)) vi->max = vi->num; vi->rvq->vq_ops->kick(vi->rvq); -- cgit v1.2.3 From 43c2ed8e9e2d285be49d7ea1afc05071d5d7b0f9 Mon Sep 17 00:00:00 2001 From: Sriram Date: Thu, 24 Sep 2009 19:15:18 +0000 Subject: TI Davinci EMAC: Fix in vector definition for EMAC_VERSION_2 In the emac_poll function when looking for interrupt status masks correct definition must be chosen based on EMAC_VERSION(the bit mask has changed from version 1 to version 2). Signed-off-by: Sriram Acked-by: Chaithrika U S Signed-off-by: David S. Miller --- drivers/net/davinci_emac.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c index d465eaa796c4..65a2d0ba64e2 100644 --- a/drivers/net/davinci_emac.c +++ b/drivers/net/davinci_emac.c @@ -200,6 +200,9 @@ static const char emac_version_string[] = "TI DaVinci EMAC Linux v6.1"; /** NOTE:: For DM646x the IN_VECTOR has changed */ #define EMAC_DM646X_MAC_IN_VECTOR_RX_INT_VEC BIT(EMAC_DEF_RX_CH) #define EMAC_DM646X_MAC_IN_VECTOR_TX_INT_VEC BIT(16 + EMAC_DEF_TX_CH) +#define EMAC_DM646X_MAC_IN_VECTOR_HOST_INT BIT(26) +#define EMAC_DM646X_MAC_IN_VECTOR_STATPEND_INT BIT(27) + /* CPPI bit positions */ #define EMAC_CPPI_SOP_BIT BIT(31) @@ -2167,7 +2170,11 @@ static int emac_poll(struct napi_struct *napi, int budget) emac_int_enable(priv); } - if (unlikely(status & EMAC_DM644X_MAC_IN_VECTOR_HOST_INT)) { + mask = EMAC_DM644X_MAC_IN_VECTOR_HOST_INT; + if (priv->version == EMAC_VERSION_2) + mask = EMAC_DM646X_MAC_IN_VECTOR_HOST_INT; + + if (unlikely(status & mask)) { u32 ch, cause; dev_err(emac_dev, "DaVinci EMAC: Fatal Hardware Error\n"); netif_stop_queue(ndev); -- cgit v1.2.3 From 99c4a6344f6574c97019ac16e8d54bfe5ad21f2d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 25 Sep 2009 12:14:43 -0700 Subject: at91_can: Forgotten git 'add' of at91_can.c Signed-off-by: David S. Miller --- drivers/net/can/at91_can.c | 1186 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1186 insertions(+) create mode 100644 drivers/net/can/at91_can.c (limited to 'drivers') diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c new file mode 100644 index 000000000000..f67ae285a35a --- /dev/null +++ b/drivers/net/can/at91_can.c @@ -0,0 +1,1186 @@ +/* + * at91_can.c - CAN network driver for AT91 SoC CAN controller + * + * (C) 2007 by Hans J. Koch + * (C) 2008, 2009 by Marc Kleine-Budde + * + * This software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2 as distributed in the 'COPYING' + * file from the main directory of the linux kernel source. + * + * Send feedback to + * + * + * Your platform definition file should specify something like: + * + * static struct at91_can_data ek_can_data = { + * transceiver_switch = sam9263ek_transceiver_switch, + * }; + * + * at91_add_device_can(&ek_can_data); + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#define DRV_NAME "at91_can" +#define AT91_NAPI_WEIGHT 12 + +/* + * RX/TX Mailbox split + * don't dare to touch + */ +#define AT91_MB_RX_NUM 12 +#define AT91_MB_TX_SHIFT 2 + +#define AT91_MB_RX_FIRST 0 +#define AT91_MB_RX_LAST (AT91_MB_RX_FIRST + AT91_MB_RX_NUM - 1) + +#define AT91_MB_RX_MASK(i) ((1 << (i)) - 1) +#define AT91_MB_RX_SPLIT 8 +#define AT91_MB_RX_LOW_LAST (AT91_MB_RX_SPLIT - 1) +#define AT91_MB_RX_LOW_MASK (AT91_MB_RX_MASK(AT91_MB_RX_SPLIT)) + +#define AT91_MB_TX_NUM (1 << AT91_MB_TX_SHIFT) +#define AT91_MB_TX_FIRST (AT91_MB_RX_LAST + 1) +#define AT91_MB_TX_LAST (AT91_MB_TX_FIRST + AT91_MB_TX_NUM - 1) + +#define AT91_NEXT_PRIO_SHIFT (AT91_MB_TX_SHIFT) +#define AT91_NEXT_PRIO_MASK (0xf << AT91_MB_TX_SHIFT) +#define AT91_NEXT_MB_MASK (AT91_MB_TX_NUM - 1) +#define AT91_NEXT_MASK ((AT91_MB_TX_NUM - 1) | AT91_NEXT_PRIO_MASK) + +/* Common registers */ +enum at91_reg { + AT91_MR = 0x000, + AT91_IER = 0x004, + AT91_IDR = 0x008, + AT91_IMR = 0x00C, + AT91_SR = 0x010, + AT91_BR = 0x014, + AT91_TIM = 0x018, + AT91_TIMESTP = 0x01C, + AT91_ECR = 0x020, + AT91_TCR = 0x024, + AT91_ACR = 0x028, +}; + +/* Mailbox registers (0 <= i <= 15) */ +#define AT91_MMR(i) (enum at91_reg)(0x200 + ((i) * 0x20)) +#define AT91_MAM(i) (enum at91_reg)(0x204 + ((i) * 0x20)) +#define AT91_MID(i) (enum at91_reg)(0x208 + ((i) * 0x20)) +#define AT91_MFID(i) (enum at91_reg)(0x20C + ((i) * 0x20)) +#define AT91_MSR(i) (enum at91_reg)(0x210 + ((i) * 0x20)) +#define AT91_MDL(i) (enum at91_reg)(0x214 + ((i) * 0x20)) +#define AT91_MDH(i) (enum at91_reg)(0x218 + ((i) * 0x20)) +#define AT91_MCR(i) (enum at91_reg)(0x21C + ((i) * 0x20)) + +/* Register bits */ +#define AT91_MR_CANEN BIT(0) +#define AT91_MR_LPM BIT(1) +#define AT91_MR_ABM BIT(2) +#define AT91_MR_OVL BIT(3) +#define AT91_MR_TEOF BIT(4) +#define AT91_MR_TTM BIT(5) +#define AT91_MR_TIMFRZ BIT(6) +#define AT91_MR_DRPT BIT(7) + +#define AT91_SR_RBSY BIT(29) + +#define AT91_MMR_PRIO_SHIFT (16) + +#define AT91_MID_MIDE BIT(29) + +#define AT91_MSR_MRTR BIT(20) +#define AT91_MSR_MABT BIT(22) +#define AT91_MSR_MRDY BIT(23) +#define AT91_MSR_MMI BIT(24) + +#define AT91_MCR_MRTR BIT(20) +#define AT91_MCR_MTCR BIT(23) + +/* Mailbox Modes */ +enum at91_mb_mode { + AT91_MB_MODE_DISABLED = 0, + AT91_MB_MODE_RX = 1, + AT91_MB_MODE_RX_OVRWR = 2, + AT91_MB_MODE_TX = 3, + AT91_MB_MODE_CONSUMER = 4, + AT91_MB_MODE_PRODUCER = 5, +}; + +/* Interrupt mask bits */ +#define AT91_IRQ_MB_RX ((1 << (AT91_MB_RX_LAST + 1)) \ + - (1 << AT91_MB_RX_FIRST)) +#define AT91_IRQ_MB_TX ((1 << (AT91_MB_TX_LAST + 1)) \ + - (1 << AT91_MB_TX_FIRST)) +#define AT91_IRQ_MB_ALL (AT91_IRQ_MB_RX | AT91_IRQ_MB_TX) + +#define AT91_IRQ_ERRA (1 << 16) +#define AT91_IRQ_WARN (1 << 17) +#define AT91_IRQ_ERRP (1 << 18) +#define AT91_IRQ_BOFF (1 << 19) +#define AT91_IRQ_SLEEP (1 << 20) +#define AT91_IRQ_WAKEUP (1 << 21) +#define AT91_IRQ_TOVF (1 << 22) +#define AT91_IRQ_TSTP (1 << 23) +#define AT91_IRQ_CERR (1 << 24) +#define AT91_IRQ_SERR (1 << 25) +#define AT91_IRQ_AERR (1 << 26) +#define AT91_IRQ_FERR (1 << 27) +#define AT91_IRQ_BERR (1 << 28) + +#define AT91_IRQ_ERR_ALL (0x1fff0000) +#define AT91_IRQ_ERR_FRAME (AT91_IRQ_CERR | AT91_IRQ_SERR | \ + AT91_IRQ_AERR | AT91_IRQ_FERR | AT91_IRQ_BERR) +#define AT91_IRQ_ERR_LINE (AT91_IRQ_ERRA | AT91_IRQ_WARN | \ + AT91_IRQ_ERRP | AT91_IRQ_BOFF) + +#define AT91_IRQ_ALL (0x1fffffff) + +struct at91_priv { + struct can_priv can; /* must be the first member! */ + struct net_device *dev; + struct napi_struct napi; + + void __iomem *reg_base; + + u32 reg_sr; + unsigned int tx_next; + unsigned int tx_echo; + unsigned int rx_next; + + struct clk *clk; + struct at91_can_data *pdata; +}; + +static struct can_bittiming_const at91_bittiming_const = { + .tseg1_min = 4, + .tseg1_max = 16, + .tseg2_min = 2, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 2, + .brp_max = 128, + .brp_inc = 1, +}; + +static inline int get_tx_next_mb(const struct at91_priv *priv) +{ + return (priv->tx_next & AT91_NEXT_MB_MASK) + AT91_MB_TX_FIRST; +} + +static inline int get_tx_next_prio(const struct at91_priv *priv) +{ + return (priv->tx_next >> AT91_NEXT_PRIO_SHIFT) & 0xf; +} + +static inline int get_tx_echo_mb(const struct at91_priv *priv) +{ + return (priv->tx_echo & AT91_NEXT_MB_MASK) + AT91_MB_TX_FIRST; +} + +static inline u32 at91_read(const struct at91_priv *priv, enum at91_reg reg) +{ + return readl(priv->reg_base + reg); +} + +static inline void at91_write(const struct at91_priv *priv, enum at91_reg reg, + u32 value) +{ + writel(value, priv->reg_base + reg); +} + +static inline void set_mb_mode_prio(const struct at91_priv *priv, + unsigned int mb, enum at91_mb_mode mode, int prio) +{ + at91_write(priv, AT91_MMR(mb), (mode << 24) | (prio << 16)); +} + +static inline void set_mb_mode(const struct at91_priv *priv, unsigned int mb, + enum at91_mb_mode mode) +{ + set_mb_mode_prio(priv, mb, mode, 0); +} + +static struct sk_buff *alloc_can_skb(struct net_device *dev, + struct can_frame **cf) +{ + struct sk_buff *skb; + + skb = netdev_alloc_skb(dev, sizeof(struct can_frame)); + if (unlikely(!skb)) + return NULL; + + skb->protocol = htons(ETH_P_CAN); + skb->ip_summed = CHECKSUM_UNNECESSARY; + *cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); + + return skb; +} + +static struct sk_buff *alloc_can_err_skb(struct net_device *dev, + struct can_frame **cf) +{ + struct sk_buff *skb; + + skb = alloc_can_skb(dev, cf); + if (unlikely(!skb)) + return NULL; + + memset(*cf, 0, sizeof(struct can_frame)); + (*cf)->can_id = CAN_ERR_FLAG; + (*cf)->can_dlc = CAN_ERR_DLC; + + return skb; +} + +/* + * Swtich transceiver on or off + */ +static void at91_transceiver_switch(const struct at91_priv *priv, int on) +{ + if (priv->pdata && priv->pdata->transceiver_switch) + priv->pdata->transceiver_switch(on); +} + +static void at91_setup_mailboxes(struct net_device *dev) +{ + struct at91_priv *priv = netdev_priv(dev); + unsigned int i; + + /* + * The first 12 mailboxes are used as a reception FIFO. The + * last mailbox is configured with overwrite option. The + * overwrite flag indicates a FIFO overflow. + */ + for (i = AT91_MB_RX_FIRST; i < AT91_MB_RX_LAST; i++) + set_mb_mode(priv, i, AT91_MB_MODE_RX); + set_mb_mode(priv, AT91_MB_RX_LAST, AT91_MB_MODE_RX_OVRWR); + + /* The last 4 mailboxes are used for transmitting. */ + for (i = AT91_MB_TX_FIRST; i <= AT91_MB_TX_LAST; i++) + set_mb_mode_prio(priv, i, AT91_MB_MODE_TX, 0); + + /* Reset tx and rx helper pointers */ + priv->tx_next = priv->tx_echo = priv->rx_next = 0; +} + +static int at91_set_bittiming(struct net_device *dev) +{ + const struct at91_priv *priv = netdev_priv(dev); + const struct can_bittiming *bt = &priv->can.bittiming; + u32 reg_br; + + reg_br = ((priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) << 24) | + ((bt->brp - 1) << 16) | ((bt->sjw - 1) << 12) | + ((bt->prop_seg - 1) << 8) | ((bt->phase_seg1 - 1) << 4) | + ((bt->phase_seg2 - 1) << 0); + + dev_info(dev->dev.parent, "writing AT91_BR: 0x%08x\n", reg_br); + + at91_write(priv, AT91_BR, reg_br); + + return 0; +} + +static void at91_chip_start(struct net_device *dev) +{ + struct at91_priv *priv = netdev_priv(dev); + u32 reg_mr, reg_ier; + + /* disable interrupts */ + at91_write(priv, AT91_IDR, AT91_IRQ_ALL); + + /* disable chip */ + reg_mr = at91_read(priv, AT91_MR); + at91_write(priv, AT91_MR, reg_mr & ~AT91_MR_CANEN); + + at91_setup_mailboxes(dev); + at91_transceiver_switch(priv, 1); + + /* enable chip */ + at91_write(priv, AT91_MR, AT91_MR_CANEN); + + priv->can.state = CAN_STATE_ERROR_ACTIVE; + + /* Enable interrupts */ + reg_ier = AT91_IRQ_MB_RX | AT91_IRQ_ERRP | AT91_IRQ_ERR_FRAME; + at91_write(priv, AT91_IDR, AT91_IRQ_ALL); + at91_write(priv, AT91_IER, reg_ier); +} + +static void at91_chip_stop(struct net_device *dev, enum can_state state) +{ + struct at91_priv *priv = netdev_priv(dev); + u32 reg_mr; + + /* disable interrupts */ + at91_write(priv, AT91_IDR, AT91_IRQ_ALL); + + reg_mr = at91_read(priv, AT91_MR); + at91_write(priv, AT91_MR, reg_mr & ~AT91_MR_CANEN); + + at91_transceiver_switch(priv, 0); + priv->can.state = state; +} + +/* + * theory of operation: + * + * According to the datasheet priority 0 is the highest priority, 15 + * is the lowest. If two mailboxes have the same priority level the + * message of the mailbox with the lowest number is sent first. + * + * We use the first TX mailbox (AT91_MB_TX_FIRST) with prio 0, then + * the next mailbox with prio 0, and so on, until all mailboxes are + * used. Then we start from the beginning with mailbox + * AT91_MB_TX_FIRST, but with prio 1, mailbox AT91_MB_TX_FIRST + 1 + * prio 1. When we reach the last mailbox with prio 15, we have to + * stop sending, waiting for all messages to be delivered, then start + * again with mailbox AT91_MB_TX_FIRST prio 0. + * + * We use the priv->tx_next as counter for the next transmission + * mailbox, but without the offset AT91_MB_TX_FIRST. The lower bits + * encode the mailbox number, the upper 4 bits the mailbox priority: + * + * priv->tx_next = (prio << AT91_NEXT_PRIO_SHIFT) || + * (mb - AT91_MB_TX_FIRST); + * + */ +static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct at91_priv *priv = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; + struct can_frame *cf = (struct can_frame *)skb->data; + unsigned int mb, prio; + u32 reg_mid, reg_mcr; + + mb = get_tx_next_mb(priv); + prio = get_tx_next_prio(priv); + + if (unlikely(!(at91_read(priv, AT91_MSR(mb)) & AT91_MSR_MRDY))) { + netif_stop_queue(dev); + + dev_err(dev->dev.parent, + "BUG! TX buffer full when queue awake!\n"); + return NETDEV_TX_BUSY; + } + + if (cf->can_id & CAN_EFF_FLAG) + reg_mid = (cf->can_id & CAN_EFF_MASK) | AT91_MID_MIDE; + else + reg_mid = (cf->can_id & CAN_SFF_MASK) << 18; + + reg_mcr = ((cf->can_id & CAN_RTR_FLAG) ? AT91_MCR_MRTR : 0) | + (cf->can_dlc << 16) | AT91_MCR_MTCR; + + /* disable MB while writing ID (see datasheet) */ + set_mb_mode(priv, mb, AT91_MB_MODE_DISABLED); + at91_write(priv, AT91_MID(mb), reg_mid); + set_mb_mode_prio(priv, mb, AT91_MB_MODE_TX, prio); + + at91_write(priv, AT91_MDL(mb), *(u32 *)(cf->data + 0)); + at91_write(priv, AT91_MDH(mb), *(u32 *)(cf->data + 4)); + + /* This triggers transmission */ + at91_write(priv, AT91_MCR(mb), reg_mcr); + + stats->tx_bytes += cf->can_dlc; + dev->trans_start = jiffies; + + /* _NOTE_: substract AT91_MB_TX_FIRST offset from mb! */ + can_put_echo_skb(skb, dev, mb - AT91_MB_TX_FIRST); + + /* + * we have to stop the queue and deliver all messages in case + * of a prio+mb counter wrap around. This is the case if + * tx_next buffer prio and mailbox equals 0. + * + * also stop the queue if next buffer is still in use + * (== not ready) + */ + priv->tx_next++; + if (!(at91_read(priv, AT91_MSR(get_tx_next_mb(priv))) & + AT91_MSR_MRDY) || + (priv->tx_next & AT91_NEXT_MASK) == 0) + netif_stop_queue(dev); + + /* Enable interrupt for this mailbox */ + at91_write(priv, AT91_IER, 1 << mb); + + return NETDEV_TX_OK; +} + +/** + * at91_activate_rx_low - activate lower rx mailboxes + * @priv: a91 context + * + * Reenables the lower mailboxes for reception of new CAN messages + */ +static inline void at91_activate_rx_low(const struct at91_priv *priv) +{ + u32 mask = AT91_MB_RX_LOW_MASK; + at91_write(priv, AT91_TCR, mask); +} + +/** + * at91_activate_rx_mb - reactive single rx mailbox + * @priv: a91 context + * @mb: mailbox to reactivate + * + * Reenables given mailbox for reception of new CAN messages + */ +static inline void at91_activate_rx_mb(const struct at91_priv *priv, + unsigned int mb) +{ + u32 mask = 1 << mb; + at91_write(priv, AT91_TCR, mask); +} + +/** + * at91_rx_overflow_err - send error frame due to rx overflow + * @dev: net device + */ +static void at91_rx_overflow_err(struct net_device *dev) +{ + struct net_device_stats *stats = &dev->stats; + struct sk_buff *skb; + struct can_frame *cf; + + dev_dbg(dev->dev.parent, "RX buffer overflow\n"); + stats->rx_over_errors++; + stats->rx_errors++; + + skb = alloc_can_err_skb(dev, &cf); + if (unlikely(!skb)) + return; + + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; + netif_receive_skb(skb); + + stats->rx_packets++; + stats->rx_bytes += cf->can_dlc; +} + +/** + * at91_read_mb - read CAN msg from mailbox (lowlevel impl) + * @dev: net device + * @mb: mailbox number to read from + * @cf: can frame where to store message + * + * Reads a CAN message from the given mailbox and stores data into + * given can frame. "mb" and "cf" must be valid. + */ +static void at91_read_mb(struct net_device *dev, unsigned int mb, + struct can_frame *cf) +{ + const struct at91_priv *priv = netdev_priv(dev); + u32 reg_msr, reg_mid; + + reg_mid = at91_read(priv, AT91_MID(mb)); + if (reg_mid & AT91_MID_MIDE) + cf->can_id = ((reg_mid >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG; + else + cf->can_id = (reg_mid >> 18) & CAN_SFF_MASK; + + reg_msr = at91_read(priv, AT91_MSR(mb)); + if (reg_msr & AT91_MSR_MRTR) + cf->can_id |= CAN_RTR_FLAG; + cf->can_dlc = min_t(__u8, (reg_msr >> 16) & 0xf, 8); + + *(u32 *)(cf->data + 0) = at91_read(priv, AT91_MDL(mb)); + *(u32 *)(cf->data + 4) = at91_read(priv, AT91_MDH(mb)); + + if (unlikely(mb == AT91_MB_RX_LAST && reg_msr & AT91_MSR_MMI)) + at91_rx_overflow_err(dev); +} + +/** + * at91_read_msg - read CAN message from mailbox + * @dev: net device + * @mb: mail box to read from + * + * Reads a CAN message from given mailbox, and put into linux network + * RX queue, does all housekeeping chores (stats, ...) + */ +static void at91_read_msg(struct net_device *dev, unsigned int mb) +{ + struct net_device_stats *stats = &dev->stats; + struct can_frame *cf; + struct sk_buff *skb; + + skb = alloc_can_skb(dev, &cf); + if (unlikely(!skb)) { + stats->rx_dropped++; + return; + } + + at91_read_mb(dev, mb, cf); + netif_receive_skb(skb); + + stats->rx_packets++; + stats->rx_bytes += cf->can_dlc; +} + +/** + * at91_poll_rx - read multiple CAN messages from mailboxes + * @dev: net device + * @quota: max number of pkgs we're allowed to receive + * + * Theory of Operation: + * + * 12 of the 16 mailboxes on the chip are reserved for RX. we split + * them into 2 groups. The lower group holds 8 and upper 4 mailboxes. + * + * Like it or not, but the chip always saves a received CAN message + * into the first free mailbox it finds (starting with the + * lowest). This makes it very difficult to read the messages in the + * right order from the chip. This is how we work around that problem: + * + * The first message goes into mb nr. 0 and issues an interrupt. All + * rx ints are disabled in the interrupt handler and a napi poll is + * scheduled. We read the mailbox, but do _not_ reenable the mb (to + * receive another message). + * + * lower mbxs upper + * ______^______ __^__ + * / \ / \ + * +-+-+-+-+-+-+-+-++-+-+-+-+ + * |x|x|x|x|x|x|x|x|| | | | | + * +-+-+-+-+-+-+-+-++-+-+-+-+ + * 0 0 0 0 0 0 0 0 0 0 1 1 \ mail + * 0 1 2 3 4 5 6 7 8 9 0 1 / box + * + * The variable priv->rx_next points to the next mailbox to read a + * message from. As long we're in the lower mailboxes we just read the + * mailbox but not reenable it. + * + * With completion of the last of the lower mailboxes, we reenable the + * whole first group, but continue to look for filled mailboxes in the + * upper mailboxes. Imagine the second group like overflow mailboxes, + * which takes CAN messages if the lower goup is full. While in the + * upper group we reenable the mailbox right after reading it. Giving + * the chip more room to store messages. + * + * After finishing we look again in the lower group if we've still + * quota. + * + */ +static int at91_poll_rx(struct net_device *dev, int quota) +{ + struct at91_priv *priv = netdev_priv(dev); + u32 reg_sr = at91_read(priv, AT91_SR); + const unsigned long *addr = (unsigned long *)®_sr; + unsigned int mb; + int received = 0; + + if (priv->rx_next > AT91_MB_RX_LOW_LAST && + reg_sr & AT91_MB_RX_LOW_MASK) + dev_info(dev->dev.parent, + "order of incoming frames cannot be guaranteed\n"); + + again: + for (mb = find_next_bit(addr, AT91_MB_RX_NUM, priv->rx_next); + mb < AT91_MB_RX_NUM && quota > 0; + reg_sr = at91_read(priv, AT91_SR), + mb = find_next_bit(addr, AT91_MB_RX_NUM, ++priv->rx_next)) { + at91_read_msg(dev, mb); + + /* reactivate mailboxes */ + if (mb == AT91_MB_RX_LOW_LAST) + /* all lower mailboxed, if just finished it */ + at91_activate_rx_low(priv); + else if (mb > AT91_MB_RX_LOW_LAST) + /* only the mailbox we read */ + at91_activate_rx_mb(priv, mb); + + received++; + quota--; + } + + /* upper group completed, look again in lower */ + if (priv->rx_next > AT91_MB_RX_LOW_LAST && + quota > 0 && mb >= AT91_MB_RX_NUM) { + priv->rx_next = 0; + goto again; + } + + return received; +} + +static void at91_poll_err_frame(struct net_device *dev, + struct can_frame *cf, u32 reg_sr) +{ + struct at91_priv *priv = netdev_priv(dev); + + /* CRC error */ + if (reg_sr & AT91_IRQ_CERR) { + dev_dbg(dev->dev.parent, "CERR irq\n"); + dev->stats.rx_errors++; + priv->can.can_stats.bus_error++; + cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; + } + + /* Stuffing Error */ + if (reg_sr & AT91_IRQ_SERR) { + dev_dbg(dev->dev.parent, "SERR irq\n"); + dev->stats.rx_errors++; + priv->can.can_stats.bus_error++; + cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; + cf->data[2] |= CAN_ERR_PROT_STUFF; + } + + /* Acknowledgement Error */ + if (reg_sr & AT91_IRQ_AERR) { + dev_dbg(dev->dev.parent, "AERR irq\n"); + dev->stats.tx_errors++; + cf->can_id |= CAN_ERR_ACK; + } + + /* Form error */ + if (reg_sr & AT91_IRQ_FERR) { + dev_dbg(dev->dev.parent, "FERR irq\n"); + dev->stats.rx_errors++; + priv->can.can_stats.bus_error++; + cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; + cf->data[2] |= CAN_ERR_PROT_FORM; + } + + /* Bit Error */ + if (reg_sr & AT91_IRQ_BERR) { + dev_dbg(dev->dev.parent, "BERR irq\n"); + dev->stats.tx_errors++; + priv->can.can_stats.bus_error++; + cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; + cf->data[2] |= CAN_ERR_PROT_BIT; + } +} + +static int at91_poll_err(struct net_device *dev, int quota, u32 reg_sr) +{ + struct sk_buff *skb; + struct can_frame *cf; + + if (quota == 0) + return 0; + + skb = alloc_can_err_skb(dev, &cf); + if (unlikely(!skb)) + return 0; + + at91_poll_err_frame(dev, cf, reg_sr); + netif_receive_skb(skb); + + dev->last_rx = jiffies; + dev->stats.rx_packets++; + dev->stats.rx_bytes += cf->can_dlc; + + return 1; +} + +static int at91_poll(struct napi_struct *napi, int quota) +{ + struct net_device *dev = napi->dev; + const struct at91_priv *priv = netdev_priv(dev); + u32 reg_sr = at91_read(priv, AT91_SR); + int work_done = 0; + + if (reg_sr & AT91_IRQ_MB_RX) + work_done += at91_poll_rx(dev, quota - work_done); + + /* + * The error bits are clear on read, + * so use saved value from irq handler. + */ + reg_sr |= priv->reg_sr; + if (reg_sr & AT91_IRQ_ERR_FRAME) + work_done += at91_poll_err(dev, quota - work_done, reg_sr); + + if (work_done < quota) { + /* enable IRQs for frame errors and all mailboxes >= rx_next */ + u32 reg_ier = AT91_IRQ_ERR_FRAME; + reg_ier |= AT91_IRQ_MB_RX & ~AT91_MB_RX_MASK(priv->rx_next); + + napi_complete(napi); + at91_write(priv, AT91_IER, reg_ier); + } + + return work_done; +} + +/* + * theory of operation: + * + * priv->tx_echo holds the number of the oldest can_frame put for + * transmission into the hardware, but not yet ACKed by the CAN tx + * complete IRQ. + * + * We iterate from priv->tx_echo to priv->tx_next and check if the + * packet has been transmitted, echo it back to the CAN framework. If + * we discover a not yet transmitted package, stop looking for more. + * + */ +static void at91_irq_tx(struct net_device *dev, u32 reg_sr) +{ + struct at91_priv *priv = netdev_priv(dev); + u32 reg_msr; + unsigned int mb; + + /* masking of reg_sr not needed, already done by at91_irq */ + + for (/* nix */; (priv->tx_next - priv->tx_echo) > 0; priv->tx_echo++) { + mb = get_tx_echo_mb(priv); + + /* no event in mailbox? */ + if (!(reg_sr & (1 << mb))) + break; + + /* Disable irq for this TX mailbox */ + at91_write(priv, AT91_IDR, 1 << mb); + + /* + * only echo if mailbox signals us a transfer + * complete (MSR_MRDY). Otherwise it's a tansfer + * abort. "can_bus_off()" takes care about the skbs + * parked in the echo queue. + */ + reg_msr = at91_read(priv, AT91_MSR(mb)); + if (likely(reg_msr & AT91_MSR_MRDY && + ~reg_msr & AT91_MSR_MABT)) { + /* _NOTE_: substract AT91_MB_TX_FIRST offset from mb! */ + can_get_echo_skb(dev, mb - AT91_MB_TX_FIRST); + dev->stats.tx_packets++; + } + } + + /* + * restart queue if we don't have a wrap around but restart if + * we get a TX int for the last can frame directly before a + * wrap around. + */ + if ((priv->tx_next & AT91_NEXT_MASK) != 0 || + (priv->tx_echo & AT91_NEXT_MASK) == 0) + netif_wake_queue(dev); +} + +static void at91_irq_err_state(struct net_device *dev, + struct can_frame *cf, enum can_state new_state) +{ + struct at91_priv *priv = netdev_priv(dev); + u32 reg_idr, reg_ier, reg_ecr; + u8 tec, rec; + + reg_ecr = at91_read(priv, AT91_ECR); + rec = reg_ecr & 0xff; + tec = reg_ecr >> 16; + + switch (priv->can.state) { + case CAN_STATE_ERROR_ACTIVE: + /* + * from: ERROR_ACTIVE + * to : ERROR_WARNING, ERROR_PASSIVE, BUS_OFF + * => : there was a warning int + */ + if (new_state >= CAN_STATE_ERROR_WARNING && + new_state <= CAN_STATE_BUS_OFF) { + dev_dbg(dev->dev.parent, "Error Warning IRQ\n"); + priv->can.can_stats.error_warning++; + + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] = (tec > rec) ? + CAN_ERR_CRTL_TX_WARNING : + CAN_ERR_CRTL_RX_WARNING; + } + case CAN_STATE_ERROR_WARNING: /* fallthrough */ + /* + * from: ERROR_ACTIVE, ERROR_WARNING + * to : ERROR_PASSIVE, BUS_OFF + * => : error passive int + */ + if (new_state >= CAN_STATE_ERROR_PASSIVE && + new_state <= CAN_STATE_BUS_OFF) { + dev_dbg(dev->dev.parent, "Error Passive IRQ\n"); + priv->can.can_stats.error_passive++; + + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] = (tec > rec) ? + CAN_ERR_CRTL_TX_PASSIVE : + CAN_ERR_CRTL_RX_PASSIVE; + } + break; + case CAN_STATE_BUS_OFF: + /* + * from: BUS_OFF + * to : ERROR_ACTIVE, ERROR_WARNING, ERROR_PASSIVE + */ + if (new_state <= CAN_STATE_ERROR_PASSIVE) { + cf->can_id |= CAN_ERR_RESTARTED; + + dev_dbg(dev->dev.parent, "restarted\n"); + priv->can.can_stats.restarts++; + + netif_carrier_on(dev); + netif_wake_queue(dev); + } + break; + default: + break; + } + + + /* process state changes depending on the new state */ + switch (new_state) { + case CAN_STATE_ERROR_ACTIVE: + /* + * actually we want to enable AT91_IRQ_WARN here, but + * it screws up the system under certain + * circumstances. so just enable AT91_IRQ_ERRP, thus + * the "fallthrough" + */ + dev_dbg(dev->dev.parent, "Error Active\n"); + cf->can_id |= CAN_ERR_PROT; + cf->data[2] = CAN_ERR_PROT_ACTIVE; + case CAN_STATE_ERROR_WARNING: /* fallthrough */ + reg_idr = AT91_IRQ_ERRA | AT91_IRQ_WARN | AT91_IRQ_BOFF; + reg_ier = AT91_IRQ_ERRP; + break; + case CAN_STATE_ERROR_PASSIVE: + reg_idr = AT91_IRQ_ERRA | AT91_IRQ_WARN | AT91_IRQ_ERRP; + reg_ier = AT91_IRQ_BOFF; + break; + case CAN_STATE_BUS_OFF: + reg_idr = AT91_IRQ_ERRA | AT91_IRQ_ERRP | + AT91_IRQ_WARN | AT91_IRQ_BOFF; + reg_ier = 0; + + cf->can_id |= CAN_ERR_BUSOFF; + + dev_dbg(dev->dev.parent, "bus-off\n"); + netif_carrier_off(dev); + priv->can.can_stats.bus_off++; + + /* turn off chip, if restart is disabled */ + if (!priv->can.restart_ms) { + at91_chip_stop(dev, CAN_STATE_BUS_OFF); + return; + } + break; + default: + break; + } + + at91_write(priv, AT91_IDR, reg_idr); + at91_write(priv, AT91_IER, reg_ier); +} + +static void at91_irq_err(struct net_device *dev) +{ + struct at91_priv *priv = netdev_priv(dev); + struct sk_buff *skb; + struct can_frame *cf; + enum can_state new_state; + u32 reg_sr; + + reg_sr = at91_read(priv, AT91_SR); + + /* we need to look at the unmasked reg_sr */ + if (unlikely(reg_sr & AT91_IRQ_BOFF)) + new_state = CAN_STATE_BUS_OFF; + else if (unlikely(reg_sr & AT91_IRQ_ERRP)) + new_state = CAN_STATE_ERROR_PASSIVE; + else if (unlikely(reg_sr & AT91_IRQ_WARN)) + new_state = CAN_STATE_ERROR_WARNING; + else if (likely(reg_sr & AT91_IRQ_ERRA)) + new_state = CAN_STATE_ERROR_ACTIVE; + else { + dev_err(dev->dev.parent, "BUG! hardware in undefined state\n"); + return; + } + + /* state hasn't changed */ + if (likely(new_state == priv->can.state)) + return; + + skb = alloc_can_err_skb(dev, &cf); + if (unlikely(!skb)) + return; + + at91_irq_err_state(dev, cf, new_state); + netif_rx(skb); + + dev->last_rx = jiffies; + dev->stats.rx_packets++; + dev->stats.rx_bytes += cf->can_dlc; + + priv->can.state = new_state; +} + +/* + * interrupt handler + */ +static irqreturn_t at91_irq(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct at91_priv *priv = netdev_priv(dev); + irqreturn_t handled = IRQ_NONE; + u32 reg_sr, reg_imr; + + reg_sr = at91_read(priv, AT91_SR); + reg_imr = at91_read(priv, AT91_IMR); + + /* Ignore masked interrupts */ + reg_sr &= reg_imr; + if (!reg_sr) + goto exit; + + handled = IRQ_HANDLED; + + /* Receive or error interrupt? -> napi */ + if (reg_sr & (AT91_IRQ_MB_RX | AT91_IRQ_ERR_FRAME)) { + /* + * The error bits are clear on read, + * save for later use. + */ + priv->reg_sr = reg_sr; + at91_write(priv, AT91_IDR, + AT91_IRQ_MB_RX | AT91_IRQ_ERR_FRAME); + napi_schedule(&priv->napi); + } + + /* Transmission complete interrupt */ + if (reg_sr & AT91_IRQ_MB_TX) + at91_irq_tx(dev, reg_sr); + + at91_irq_err(dev); + + exit: + return handled; +} + +static int at91_open(struct net_device *dev) +{ + struct at91_priv *priv = netdev_priv(dev); + int err; + + clk_enable(priv->clk); + + /* check or determine and set bittime */ + err = open_candev(dev); + if (err) + goto out; + + /* register interrupt handler */ + if (request_irq(dev->irq, at91_irq, IRQF_SHARED, + dev->name, dev)) { + err = -EAGAIN; + goto out_close; + } + + /* start chip and queuing */ + at91_chip_start(dev); + napi_enable(&priv->napi); + netif_start_queue(dev); + + return 0; + + out_close: + close_candev(dev); + out: + clk_disable(priv->clk); + + return err; +} + +/* + * stop CAN bus activity + */ +static int at91_close(struct net_device *dev) +{ + struct at91_priv *priv = netdev_priv(dev); + + netif_stop_queue(dev); + napi_disable(&priv->napi); + at91_chip_stop(dev, CAN_STATE_STOPPED); + + free_irq(dev->irq, dev); + clk_disable(priv->clk); + + close_candev(dev); + + return 0; +} + +static int at91_set_mode(struct net_device *dev, enum can_mode mode) +{ + switch (mode) { + case CAN_MODE_START: + at91_chip_start(dev); + netif_wake_queue(dev); + break; + + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static const struct net_device_ops at91_netdev_ops = { + .ndo_open = at91_open, + .ndo_stop = at91_close, + .ndo_start_xmit = at91_start_xmit, +}; + +static int __init at91_can_probe(struct platform_device *pdev) +{ + struct net_device *dev; + struct at91_priv *priv; + struct resource *res; + struct clk *clk; + void __iomem *addr; + int err, irq; + + clk = clk_get(&pdev->dev, "can_clk"); + if (IS_ERR(clk)) { + dev_err(&pdev->dev, "no clock defined\n"); + err = -ENODEV; + goto exit; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + irq = platform_get_irq(pdev, 0); + if (!res || !irq) { + err = -ENODEV; + goto exit_put; + } + + if (!request_mem_region(res->start, + resource_size(res), + pdev->name)) { + err = -EBUSY; + goto exit_put; + } + + addr = ioremap_nocache(res->start, resource_size(res)); + if (!addr) { + err = -ENOMEM; + goto exit_release; + } + + dev = alloc_candev(sizeof(struct at91_priv)); + if (!dev) { + err = -ENOMEM; + goto exit_iounmap; + } + + dev->netdev_ops = &at91_netdev_ops; + dev->irq = irq; + dev->flags |= IFF_ECHO; + + priv = netdev_priv(dev); + priv->can.clock.freq = clk_get_rate(clk); + priv->can.bittiming_const = &at91_bittiming_const; + priv->can.do_set_bittiming = at91_set_bittiming; + priv->can.do_set_mode = at91_set_mode; + priv->reg_base = addr; + priv->dev = dev; + priv->clk = clk; + priv->pdata = pdev->dev.platform_data; + + netif_napi_add(dev, &priv->napi, at91_poll, AT91_NAPI_WEIGHT); + + dev_set_drvdata(&pdev->dev, dev); + SET_NETDEV_DEV(dev, &pdev->dev); + + err = register_candev(dev); + if (err) { + dev_err(&pdev->dev, "registering netdev failed\n"); + goto exit_free; + } + + dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%d)\n", + priv->reg_base, dev->irq); + + return 0; + + exit_free: + free_netdev(dev); + exit_iounmap: + iounmap(addr); + exit_release: + release_mem_region(res->start, resource_size(res)); + exit_put: + clk_put(clk); + exit: + return err; +} + +static int __devexit at91_can_remove(struct platform_device *pdev) +{ + struct net_device *dev = platform_get_drvdata(pdev); + struct at91_priv *priv = netdev_priv(dev); + struct resource *res; + + unregister_netdev(dev); + + platform_set_drvdata(pdev, NULL); + + free_netdev(dev); + + iounmap(priv->reg_base); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(res->start, resource_size(res)); + + clk_put(priv->clk); + + return 0; +} + +static struct platform_driver at91_can_driver = { + .probe = at91_can_probe, + .remove = __devexit_p(at91_can_remove), + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + }, +}; + +static int __init at91_can_module_init(void) +{ + printk(KERN_INFO "%s netdevice driver\n", DRV_NAME); + return platform_driver_register(&at91_can_driver); +} + +static void __exit at91_can_module_exit(void) +{ + platform_driver_unregister(&at91_can_driver); + printk(KERN_INFO "%s: driver removed\n", DRV_NAME); +} + +module_init(at91_can_module_init); +module_exit(at91_can_module_exit); + +MODULE_AUTHOR("Marc Kleine-Budde "); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION(DRV_NAME " CAN netdevice driver"); -- cgit v1.2.3