From b442723fcec445fb0ae1104888dd22cd285e0a91 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 5 Jan 2015 12:49:10 -0500 Subject: can: kvaser_usb: Don't free packets when tight on URBs Flooding the Kvaser CAN to USB dongle with multiple reads and writes in high frequency caused seemingly-random panics in the kernel. On further inspection, it seems the driver erroneously freed the to-be-transmitted packet upon getting tight on URBs and returning NETDEV_TX_BUSY, leading to invalid memory writes and double frees at a later point in time. Note: Finding no more URBs/transmit-contexts and returning NETDEV_TX_BUSY is a driver bug in and out of itself: it means that our start/stop queue flow control is broken. This patch only fixes the (buggy) error handling code; the root cause shall be fixed in a later commit. Acked-by: Olivier Sobrie Signed-off-by: Ahmed S. Darwish Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers/net/can/usb') diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 541fb7a05625..2e7d513a7c11 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1294,12 +1294,14 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, if (!urb) { netdev_err(netdev, "No memory left for URBs\n"); stats->tx_dropped++; - goto nourbmem; + dev_kfree_skb(skb); + return NETDEV_TX_OK; } buf = kmalloc(sizeof(struct kvaser_msg), GFP_ATOMIC); if (!buf) { stats->tx_dropped++; + dev_kfree_skb(skb); goto nobufmem; } @@ -1334,6 +1336,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, } } + /* This should never happen; it implies a flow control bug */ if (!context) { netdev_warn(netdev, "cannot find free context\n"); ret = NETDEV_TX_BUSY; @@ -1364,9 +1367,6 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, if (unlikely(err)) { can_free_echo_skb(netdev, context->echo_index); - skb = NULL; /* set to NULL to avoid double free in - * dev_kfree_skb(skb) */ - atomic_dec(&priv->active_tx_urbs); usb_unanchor_urb(urb); @@ -1388,8 +1388,6 @@ releasebuf: kfree(buf); nobufmem: usb_free_urb(urb); -nourbmem: - dev_kfree_skb(skb); return ret; } -- cgit v1.2.3 From 889b77f7fd2bcc922493d73a4c51d8a851505815 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 5 Jan 2015 12:52:06 -0500 Subject: can: kvaser_usb: Reset all URB tx contexts upon channel close Flooding the Kvaser CAN to USB dongle with multiple reads and writes in very high frequency (*), closing the CAN channel while all the transmissions are on (#), opening the device again (@), then sending a small number of packets would make the driver enter an almost infinite loop of: [....] [15959.853988] kvaser_usb 4-3:1.0 can0: cannot find free context [15959.853990] kvaser_usb 4-3:1.0 can0: cannot find free context [15959.853991] kvaser_usb 4-3:1.0 can0: cannot find free context [15959.853993] kvaser_usb 4-3:1.0 can0: cannot find free context [15959.853994] kvaser_usb 4-3:1.0 can0: cannot find free context [15959.853995] kvaser_usb 4-3:1.0 can0: cannot find free context [....] _dragging the whole system down_ in the process due to the excessive logging output. Initially, this has caused random panics in the kernel due to a buggy error recovery path. That got fixed in an earlier commit.(%) This patch aims at solving the root cause. --> 16 tx URBs and contexts are allocated per CAN channel per USB device. Such URBs are protected by: a) A simple atomic counter, up to a value of MAX_TX_URBS (16) b) A flag in each URB context, stating if it's free c) The fact that ndo_start_xmit calls are themselves protected by the networking layers higher above After grabbing one of the tx URBs, if the driver noticed that all of them are now taken, it stops the netif transmission queue. Such queue is worken up again only if an acknowedgment was received from the firmware on one of our earlier-sent frames. Meanwhile, upon channel close (#), the driver sends a CMD_STOP_CHIP to the firmware, effectively closing all further communication. In the high traffic case, the atomic counter remains at MAX_TX_URBS, and all the URB contexts remain marked as active. While opening the channel again (@), it cannot send any further frames since no more free tx URB contexts are available. Reset all tx URB contexts upon CAN channel close. (*) 50 parallel instances of `cangen0 -g 0 -ix` (#) `ifconfig can0 down` (@) `ifconfig can0 up` (%) "can: kvaser_usb: Don't free packets when tight on URBs" Signed-off-by: Ahmed S. Darwish Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/can/usb') diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 2e7d513a7c11..9accc8272c27 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1246,6 +1246,9 @@ static int kvaser_usb_close(struct net_device *netdev) if (err) netdev_warn(netdev, "Cannot stop device, error %d\n", err); + /* reset tx contexts */ + kvaser_usb_unlink_tx_urbs(priv); + priv->can.state = CAN_STATE_STOPPED; close_candev(priv->netdev); -- cgit v1.2.3 From 5e7e6e0c9b47a45576c38b4a72d67927a5e049f7 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 5 Jan 2015 12:57:13 -0500 Subject: can: kvaser_usb: Don't send a RESET_CHIP for non-existing channels Recent Leaf firmware versions (>= 3.1.557) do not allow to send commands for non-existing channels. If a command is sent for a non-existing channel, the firmware crashes. Reported-by: Christopher Storah Signed-off-by: Olivier Sobrie Signed-off-by: Ahmed S. Darwish Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/net/can/usb') diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 9accc8272c27..cc7bfc0c0a71 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1503,6 +1503,10 @@ static int kvaser_usb_init_one(struct usb_interface *intf, struct kvaser_usb_net_priv *priv; int i, err; + err = kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, channel); + if (err) + return err; + netdev = alloc_candev(sizeof(*priv), MAX_TX_URBS); if (!netdev) { dev_err(&intf->dev, "Cannot alloc candev\n"); @@ -1607,9 +1611,6 @@ static int kvaser_usb_probe(struct usb_interface *intf, usb_set_intfdata(intf, dev); - for (i = 0; i < MAX_NET_DEVICES; i++) - kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, i); - err = kvaser_usb_get_software_info(dev); if (err) { dev_err(&intf->dev, -- cgit v1.2.3 From a58518ccf39f86f898a65201518dd8e799b3abeb Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Sun, 11 Jan 2015 15:49:52 -0500 Subject: can: kvaser_usb: Don't dereference skb after a netif_rx() We should not touch the packet after a netif_rx: it might get freed behind our back. Suggested-by: Marc Kleine-Budde Signed-off-by: Ahmed S. Darwish Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers/net/can/usb') diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index cc7bfc0c0a71..c32cd61073bc 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -520,10 +520,10 @@ static void kvaser_usb_tx_acknowledge(const struct kvaser_usb *dev, skb = alloc_can_err_skb(priv->netdev, &cf); if (skb) { cf->can_id |= CAN_ERR_RESTARTED; - netif_rx(skb); stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); } else { netdev_err(priv->netdev, "No memory left for err_skb\n"); @@ -770,10 +770,9 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev, priv->can.state = new_state; - netif_rx(skb); - stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); } static void kvaser_usb_rx_can_err(const struct kvaser_usb_net_priv *priv, @@ -805,10 +804,9 @@ static void kvaser_usb_rx_can_err(const struct kvaser_usb_net_priv *priv, stats->rx_over_errors++; stats->rx_errors++; - netif_rx(skb); - stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); } } @@ -887,10 +885,9 @@ static void kvaser_usb_rx_can_msg(const struct kvaser_usb *dev, cf->can_dlc); } - netif_rx(skb); - stats->rx_packets++; stats->rx_bytes += cf->can_dlc; + netif_rx(skb); } static void kvaser_usb_start_chip_reply(const struct kvaser_usb *dev, -- cgit v1.2.3 From ded5006667318c06df875609535176bd33f243a1 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 26 Jan 2015 07:20:39 +0200 Subject: can: kvaser_usb: Do not sleep in atomic context Upon receiving a hardware event with the BUS_RESET flag set, the driver kills all of its anchored URBs and resets all of its transmit URB contexts. Unfortunately it does so under the context of URB completion handler `kvaser_usb_read_bulk_callback()', which is often called in an atomic context. While the device is flooded with many received error packets, usb_kill_urb() typically sleeps/reschedules till the transfer request of each killed URB in question completes, leading to the sleep in atomic bug. [3] In v2 submission of the original driver patch [1], it was stated that the URBs kill and tx contexts reset was needed since we don't receive any tx acknowledgments later and thus such resources will be locked down forever. Fortunately this is no longer needed since an earlier bugfix in this patch series is now applied: all tx URB contexts are reset upon CAN channel close. [2] Moreover, a BUS_RESET is now treated _exactly_ like a BUS_OFF event, which is the recommended handling method advised by the device manufacturer. [1] http://article.gmane.org/gmane.linux.network/239442 http://www.webcitation.org/6Vr2yagAQ [2] can: kvaser_usb: Reset all URB tx contexts upon channel close 889b77f7fd2bcc922493d73a4c51d8a851505815 [3] Stacktrace: [] dump_stack+0x45/0x57 [] __schedule_bug+0x41/0x4f [] __schedule+0x5f1/0x700 [] ? _raw_spin_unlock_irqrestore+0xa/0x10 [] schedule+0x24/0x70 [] usb_kill_urb+0x65/0xa0 [] ? prepare_to_wait_event+0x110/0x110 [] usb_kill_anchored_urbs+0x48/0x80 [] kvaser_usb_unlink_tx_urbs+0x18/0x50 [kvaser_usb] [] kvaser_usb_rx_error+0xc0/0x400 [kvaser_usb] [] ? vprintk_default+0x1a/0x20 [] kvaser_usb_read_bulk_callback+0x4c1/0x5f0 [kvaser_usb] [] __usb_hcd_giveback_urb+0x5e/0xc0 [] usb_hcd_giveback_urb+0x41/0x110 [] finish_urb+0x98/0x180 [ohci_hcd] [] ? acct_account_cputime+0x17/0x20 [] ? local_clock+0x15/0x30 [] ohci_work+0x1fb/0x5a0 [ohci_hcd] [] ? process_backlog+0xb1/0x130 [] ohci_irq+0xeb/0x270 [ohci_hcd] [] usb_hcd_irq+0x21/0x30 [] handle_irq_event_percpu+0x43/0x120 [] handle_irq_event+0x3d/0x60 [] handle_fasteoi_irq+0x74/0x110 [] handle_irq+0x1d/0x30 [] do_IRQ+0x57/0x100 [] common_interrupt+0x6a/0x6a Signed-off-by: Ahmed S. Darwish Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/net/can/usb') diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index c32cd61073bc..978a25e9cd3c 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -662,11 +662,6 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev, priv = dev->nets[channel]; stats = &priv->netdev->stats; - if (status & M16C_STATE_BUS_RESET) { - kvaser_usb_unlink_tx_urbs(priv); - return; - } - skb = alloc_can_err_skb(priv->netdev, &cf); if (!skb) { stats->rx_dropped++; @@ -677,7 +672,7 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev, netdev_dbg(priv->netdev, "Error status: 0x%02x\n", status); - if (status & M16C_STATE_BUS_OFF) { + if (status & (M16C_STATE_BUS_OFF | M16C_STATE_BUS_RESET)) { cf->can_id |= CAN_ERR_BUSOFF; priv->can.can_stats.bus_off++; -- cgit v1.2.3 From 3803fa6977f1de15fda4e8646c8fec97c8045cae Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 26 Jan 2015 07:22:54 +0200 Subject: can: kvaser_usb: Send correct context to URB completion Send expected argument to the URB completion hander: a CAN netdevice instead of the network interface private context `kvaser_usb_net_priv'. This was discovered by having some garbage in the kernel log in place of the netdevice names: can0 and can1. Signed-off-by: Ahmed S. Darwish Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/can/usb') diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 978a25e9cd3c..f0c62075df0a 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -587,7 +587,7 @@ static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv, usb_sndbulkpipe(dev->udev, dev->bulk_out->bEndpointAddress), buf, msg->len, - kvaser_usb_simple_msg_callback, priv); + kvaser_usb_simple_msg_callback, netdev); usb_anchor_urb(urb, &priv->tx_submitted); err = usb_submit_urb(urb, GFP_ATOMIC); -- cgit v1.2.3 From 14c10c2a1dd8eb8e00b750b521753260befa2789 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 26 Jan 2015 07:24:06 +0200 Subject: can: kvaser_usb: Retry the first bulk transfer on -ETIMEDOUT On some x86 laptops, plugging a Kvaser device again after an unplug makes the firmware always ignore the very first command. For such a case, provide some room for retries instead of completely exiting the driver init code. Signed-off-by: Ahmed S. Darwish Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/net/can/usb') diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index f0c62075df0a..55407b9663a6 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1585,7 +1585,7 @@ static int kvaser_usb_probe(struct usb_interface *intf, { struct kvaser_usb *dev; int err = -ENOMEM; - int i; + int i, retry = 3; dev = devm_kzalloc(&intf->dev, sizeof(*dev), GFP_KERNEL); if (!dev) @@ -1603,7 +1603,15 @@ static int kvaser_usb_probe(struct usb_interface *intf, usb_set_intfdata(intf, dev); - err = kvaser_usb_get_software_info(dev); + /* On some x86 laptops, plugging a Kvaser device again after + * an unplug makes the firmware always ignore the very first + * command. For such a case, provide some room for retries + * instead of completely exiting the driver. + */ + do { + err = kvaser_usb_get_software_info(dev); + } while (--retry && err == -ETIMEDOUT); + if (err) { dev_err(&intf->dev, "Cannot get software infos, error %d\n", err); -- cgit v1.2.3 From e638642b08c170d2021b706f0b1c4f4ae93d8cbd Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 26 Jan 2015 07:25:43 +0200 Subject: can: kvaser_usb: Fix state handling upon BUS_ERROR events While being in an ERROR_WARNING state, and receiving further bus error events with error counters still in the ERROR_WARNING range of 97-127 inclusive, the state handling code erroneously reverts back to ERROR_ACTIVE. Per the CAN standard, only revert to ERROR_ACTIVE when the error counters are less than 96. Moreover, in certain Kvaser models, the BUS_ERROR flag is always set along with undefined bits in the M16C status register. Thus use bitwise operators instead of full equality for checking that register against bus errors. Signed-off-by: Ahmed S. Darwish Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/net/can/usb') diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 55407b9663a6..7af379ca861b 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -698,9 +698,7 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev, } new_state = CAN_STATE_ERROR_PASSIVE; - } - - if (status == M16C_STATE_BUS_ERROR) { + } else if (status & M16C_STATE_BUS_ERROR) { if ((priv->can.state < CAN_STATE_ERROR_WARNING) && ((txerr >= 96) || (rxerr >= 96))) { cf->can_id |= CAN_ERR_CRTL; @@ -710,7 +708,8 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev, priv->can.can_stats.error_warning++; new_state = CAN_STATE_ERROR_WARNING; - } else if (priv->can.state > CAN_STATE_ERROR_ACTIVE) { + } else if ((priv->can.state > CAN_STATE_ERROR_ACTIVE) && + ((txerr < 96) && (rxerr < 96))) { cf->can_id |= CAN_ERR_PROT; cf->data[2] = CAN_ERR_PROT_ACTIVE; -- cgit v1.2.3