diff options
author | David S. Miller <davem@davemloft.net> | 2014-03-07 16:24:54 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-03-07 16:24:54 -0500 |
commit | a5d5ff572c33acc629779e383f02c048a98859e0 (patch) | |
tree | fe0712d62e1b8022612c12fa7777540d1df598cf | |
parent | 91bd66e4c8e0042fdea945c5e455363f1d44959a (diff) | |
parent | 6128d1bb30748d0ff56a63898d14f312126e404c (diff) |
Merge branch 'r8152'
Hayes Wang says:
====================
r8152: tx/rx improvement
- Select the suitable spin lock for each function.
- Add additional check to reduce the spin lock.
- Up the priority of the tx to avoid interrupted by rx.
- Support rx checksum, large send, and IPv6 hw checksum.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/usb/r8152.c | 323 |
1 files changed, 263 insertions, 60 deletions
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index b8eee365e15d..c7ef30dee1b9 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -21,9 +21,10 @@ #include <linux/list.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <net/ip6_checksum.h> /* Version Information */ -#define DRIVER_VERSION "v1.05.0 (2014/02/18)" +#define DRIVER_VERSION "v1.06.0 (2014/03/03)" #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>" #define DRIVER_DESC "Realtek RTL8152/RTL8153 Based USB Ethernet Adapters" #define MODULENAME "r8152" @@ -447,6 +448,7 @@ enum rtl8152_flags { RTL8152_LINK_CHG, SELECTIVE_SUSPEND, PHY_RESET, + SCHEDULE_TASKLET, }; /* Define these values to match your device */ @@ -466,8 +468,18 @@ enum rtl8152_flags { struct rx_desc { __le32 opts1; #define RX_LEN_MASK 0x7fff + __le32 opts2; +#define RD_UDP_CS (1 << 23) +#define RD_TCP_CS (1 << 22) +#define RD_IPV6_CS (1 << 20) +#define RD_IPV4_CS (1 << 19) + __le32 opts3; +#define IPF (1 << 23) /* IP checksum fail */ +#define UDPF (1 << 22) /* UDP checksum fail */ +#define TCPF (1 << 21) /* TCP checksum fail */ + __le32 opts4; __le32 opts5; __le32 opts6; @@ -477,13 +489,21 @@ struct tx_desc { __le32 opts1; #define TX_FS (1 << 31) /* First segment of a packet */ #define TX_LS (1 << 30) /* Final segment of a packet */ -#define TX_LEN_MASK 0x3ffff +#define GTSENDV4 (1 << 28) +#define GTSENDV6 (1 << 27) +#define GTTCPHO_SHIFT 18 +#define GTTCPHO_MAX 0x7fU +#define TX_LEN_MAX 0x3ffffU __le32 opts2; #define UDP_CS (1 << 31) /* Calculate UDP/IP checksum */ #define TCP_CS (1 << 30) /* Calculate TCP/IP checksum */ #define IPV4_CS (1 << 29) /* Calculate IPv4 checksum */ #define IPV6_CS (1 << 28) /* Calculate IPv6 checksum */ +#define MSS_SHIFT 17 +#define MSS_MAX 0x7ffU +#define TCPHO_SHIFT 17 +#define TCPHO_MAX 0x7ffU }; struct r8152; @@ -550,12 +570,21 @@ enum rtl_version { RTL_VER_MAX }; +enum tx_csum_stat { + TX_CSUM_SUCCESS = 0, + TX_CSUM_TSO, + TX_CSUM_NONE +}; + /* Maximum number of multicast addresses to filter (vs. Rx-all-multicast). * The RTL chips use a 64 element hash table based on the Ethernet CRC. */ static const int multicast_filter_limit = 32; static unsigned int rx_buf_sz = 16384; +#define RTL_LIMITED_TSO_SIZE (rx_buf_sz - sizeof(struct tx_desc) - \ + VLAN_ETH_HLEN - VLAN_HLEN) + static int get_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) { @@ -963,7 +992,6 @@ static int rtl8152_set_mac_address(struct net_device *netdev, void *p) static void read_bulk_callback(struct urb *urb) { struct net_device *netdev; - unsigned long flags; int status = urb->status; struct rx_agg *agg; struct r8152 *tp; @@ -997,9 +1025,9 @@ static void read_bulk_callback(struct urb *urb) if (urb->actual_length < ETH_ZLEN) break; - spin_lock_irqsave(&tp->rx_lock, flags); + spin_lock(&tp->rx_lock); list_add_tail(&agg->list, &tp->rx_done); - spin_unlock_irqrestore(&tp->rx_lock, flags); + spin_unlock(&tp->rx_lock); tasklet_schedule(&tp->tl); return; case -ESHUTDOWN: @@ -1022,9 +1050,9 @@ static void read_bulk_callback(struct urb *urb) if (result == -ENODEV) { netif_device_detach(tp->netdev); } else if (result) { - spin_lock_irqsave(&tp->rx_lock, flags); + spin_lock(&tp->rx_lock); list_add_tail(&agg->list, &tp->rx_done); - spin_unlock_irqrestore(&tp->rx_lock, flags); + spin_unlock(&tp->rx_lock); tasklet_schedule(&tp->tl); } } @@ -1033,7 +1061,6 @@ static void write_bulk_callback(struct urb *urb) { struct net_device_stats *stats; struct net_device *netdev; - unsigned long flags; struct tx_agg *agg; struct r8152 *tp; int status = urb->status; @@ -1057,9 +1084,9 @@ static void write_bulk_callback(struct urb *urb) stats->tx_bytes += agg->skb_len; } - spin_lock_irqsave(&tp->tx_lock, flags); + spin_lock(&tp->tx_lock); list_add_tail(&agg->list, &tp->tx_free); - spin_unlock_irqrestore(&tp->tx_lock, flags); + spin_unlock(&tp->tx_lock); usb_autopm_put_interface_async(tp->intf); @@ -1073,7 +1100,7 @@ static void write_bulk_callback(struct urb *urb) return; if (!skb_queue_empty(&tp->tx_queue)) - schedule_delayed_work(&tp->schedule, 0); + tasklet_schedule(&tp->tl); } static void intr_callback(struct urb *urb) @@ -1268,6 +1295,9 @@ static struct tx_agg *r8152_get_tx_agg(struct r8152 *tp) struct tx_agg *agg = NULL; unsigned long flags; + if (list_empty(&tp->tx_free)) + return NULL; + spin_lock_irqsave(&tp->tx_lock, flags); if (!list_empty(&tp->tx_free)) { struct list_head *cursor; @@ -1281,24 +1311,130 @@ static struct tx_agg *r8152_get_tx_agg(struct r8152 *tp) return agg; } -static void -r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, struct sk_buff *skb) +static inline __be16 get_protocol(struct sk_buff *skb) { - memset(desc, 0, sizeof(*desc)); + __be16 protocol; - desc->opts1 = cpu_to_le32((skb->len & TX_LEN_MASK) | TX_FS | TX_LS); + if (skb->protocol == htons(ETH_P_8021Q)) + protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; + else + protocol = skb->protocol; + + return protocol; +} + +/* + * r8152_csum_workaround() + * The hw limites the value the transport offset. When the offset is out of the + * range, calculate the checksum by sw. + */ +static void r8152_csum_workaround(struct r8152 *tp, struct sk_buff *skb, + struct sk_buff_head *list) +{ + if (skb_shinfo(skb)->gso_size) { + netdev_features_t features = tp->netdev->features; + struct sk_buff_head seg_list; + struct sk_buff *segs, *nskb; + + features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO); + segs = skb_gso_segment(skb, features); + if (IS_ERR(segs) || !segs) + goto drop; + + __skb_queue_head_init(&seg_list); + + do { + nskb = segs; + segs = segs->next; + nskb->next = NULL; + __skb_queue_tail(&seg_list, nskb); + } while (segs); + + skb_queue_splice(&seg_list, list); + dev_kfree_skb(skb); + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb_checksum_help(skb) < 0) + goto drop; + + __skb_queue_head(list, skb); + } else { + struct net_device_stats *stats; + +drop: + stats = &tp->netdev->stats; + stats->tx_dropped++; + dev_kfree_skb(skb); + } +} + +/* + * msdn_giant_send_check() + * According to the document of microsoft, the TCP Pseudo Header excludes the + * packet length for IPv6 TCP large packets. + */ +static int msdn_giant_send_check(struct sk_buff *skb) +{ + const struct ipv6hdr *ipv6h; + struct tcphdr *th; + + ipv6h = ipv6_hdr(skb); + th = tcp_hdr(skb); + + th->check = 0; + th->check = ~tcp_v6_check(0, &ipv6h->saddr, &ipv6h->daddr, 0); + + return 0; +} + +static int r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, + struct sk_buff *skb, u32 len, u32 transport_offset) +{ + u32 mss = skb_shinfo(skb)->gso_size; + u32 opts1, opts2 = 0; + int ret = TX_CSUM_SUCCESS; + + WARN_ON_ONCE(len > TX_LEN_MAX); + + opts1 = len | TX_FS | TX_LS; + + if (mss) { + if (transport_offset > GTTCPHO_MAX) { + netif_warn(tp, tx_err, tp->netdev, + "Invalid transport offset 0x%x for TSO\n", + transport_offset); + ret = TX_CSUM_TSO; + goto unavailable; + } + + switch (get_protocol(skb)) { + case htons(ETH_P_IP): + opts1 |= GTSENDV4; + break; + + case htons(ETH_P_IPV6): + opts1 |= GTSENDV6; + msdn_giant_send_check(skb); + break; - if (skb->ip_summed == CHECKSUM_PARTIAL) { - __be16 protocol; + default: + WARN_ON_ONCE(1); + break; + } + + opts1 |= transport_offset << GTTCPHO_SHIFT; + opts2 |= min(mss, MSS_MAX) << MSS_SHIFT; + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { u8 ip_protocol; - u32 opts2 = 0; - if (skb->protocol == htons(ETH_P_8021Q)) - protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; - else - protocol = skb->protocol; + if (transport_offset > TCPHO_MAX) { + netif_warn(tp, tx_err, tp->netdev, + "Invalid transport offset 0x%x\n", + transport_offset); + ret = TX_CSUM_NONE; + goto unavailable; + } - switch (protocol) { + switch (get_protocol(skb)) { case htons(ETH_P_IP): opts2 |= IPV4_CS; ip_protocol = ip_hdr(skb)->protocol; @@ -1314,30 +1450,33 @@ r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, struct sk_buff *skb) break; } - if (ip_protocol == IPPROTO_TCP) { + if (ip_protocol == IPPROTO_TCP) opts2 |= TCP_CS; - opts2 |= (skb_transport_offset(skb) & 0x7fff) << 17; - } else if (ip_protocol == IPPROTO_UDP) { + else if (ip_protocol == IPPROTO_UDP) opts2 |= UDP_CS; - } else { + else WARN_ON_ONCE(1); - } - desc->opts2 = cpu_to_le32(opts2); + opts2 |= transport_offset << TCPHO_SHIFT; } + + desc->opts2 = cpu_to_le32(opts2); + desc->opts1 = cpu_to_le32(opts1); + +unavailable: + return ret; } static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) { struct sk_buff_head skb_head, *tx_queue = &tp->tx_queue; - unsigned long flags; int remain, ret; u8 *tx_data; __skb_queue_head_init(&skb_head); - spin_lock_irqsave(&tx_queue->lock, flags); + spin_lock(&tx_queue->lock); skb_queue_splice_init(tx_queue, &skb_head); - spin_unlock_irqrestore(&tx_queue->lock, flags); + spin_unlock(&tx_queue->lock); tx_data = agg->head; agg->skb_num = agg->skb_len = 0; @@ -1347,47 +1486,65 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) struct tx_desc *tx_desc; struct sk_buff *skb; unsigned int len; + u32 offset; skb = __skb_dequeue(&skb_head); if (!skb) break; - remain -= sizeof(*tx_desc); - len = skb->len; - if (remain < len) { + len = skb->len + sizeof(*tx_desc); + + if (len > remain) { __skb_queue_head(&skb_head, skb); break; } tx_data = tx_agg_align(tx_data); tx_desc = (struct tx_desc *)tx_data; + + offset = (u32)skb_transport_offset(skb); + + if (r8152_tx_csum(tp, tx_desc, skb, skb->len, offset)) { + r8152_csum_workaround(tp, skb, &skb_head); + continue; + } + tx_data += sizeof(*tx_desc); - r8152_tx_csum(tp, tx_desc, skb); - memcpy(tx_data, skb->data, len); - agg->skb_num++; + len = skb->len; + if (skb_copy_bits(skb, 0, tx_data, len) < 0) { + struct net_device_stats *stats = &tp->netdev->stats; + + stats->tx_dropped++; + dev_kfree_skb_any(skb); + tx_data -= sizeof(*tx_desc); + continue; + } + + tx_data += len; agg->skb_len += len; + agg->skb_num++; + dev_kfree_skb_any(skb); - tx_data += len; remain = rx_buf_sz - (int)(tx_agg_align(tx_data) - agg->head); } if (!skb_queue_empty(&skb_head)) { - spin_lock_irqsave(&tx_queue->lock, flags); + spin_lock(&tx_queue->lock); skb_queue_splice(&skb_head, tx_queue); - spin_unlock_irqrestore(&tx_queue->lock, flags); + spin_unlock(&tx_queue->lock); } - netif_tx_lock_bh(tp->netdev); + netif_tx_lock(tp->netdev); if (netif_queue_stopped(tp->netdev) && skb_queue_len(&tp->tx_queue) < tp->tx_qlen) netif_wake_queue(tp->netdev); - netif_tx_unlock_bh(tp->netdev); + netif_tx_unlock(tp->netdev); - ret = usb_autopm_get_interface(tp->intf); + ret = usb_autopm_get_interface_async(tp->intf); if (ret < 0) goto out_tx_fill; @@ -1395,14 +1552,45 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) agg->head, (int)(tx_data - (u8 *)agg->head), (usb_complete_t)write_bulk_callback, agg); - ret = usb_submit_urb(agg->urb, GFP_KERNEL); + ret = usb_submit_urb(agg->urb, GFP_ATOMIC); if (ret < 0) - usb_autopm_put_interface(tp->intf); + usb_autopm_put_interface_async(tp->intf); out_tx_fill: return ret; } +static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc) +{ + u8 checksum = CHECKSUM_NONE; + u32 opts2, opts3; + + if (tp->version == RTL_VER_01) + goto return_result; + + opts2 = le32_to_cpu(rx_desc->opts2); + opts3 = le32_to_cpu(rx_desc->opts3); + + if (opts2 & RD_IPV4_CS) { + if (opts3 & IPF) + checksum = CHECKSUM_NONE; + else if ((opts2 & RD_UDP_CS) && (opts3 & UDPF)) + checksum = CHECKSUM_NONE; + else if ((opts2 & RD_TCP_CS) && (opts3 & TCPF)) + checksum = CHECKSUM_NONE; + else + checksum = CHECKSUM_UNNECESSARY; + } else if (RD_IPV6_CS) { + if ((opts2 & RD_UDP_CS) && !(opts3 & UDPF)) + checksum = CHECKSUM_UNNECESSARY; + else if ((opts2 & RD_TCP_CS) && !(opts3 & TCPF)) + checksum = CHECKSUM_UNNECESSARY; + } + +return_result: + return checksum; +} + static void rx_bottom(struct r8152 *tp) { unsigned long flags; @@ -1455,8 +1643,10 @@ static void rx_bottom(struct r8152 *tp) skb = netdev_alloc_skb_ip_align(netdev, pkt_len); if (!skb) { stats->rx_dropped++; - break; + goto find_next_rx; } + + skb->ip_summed = r8152_rx_csum(tp, rx_desc); memcpy(skb->data, rx_data, pkt_len); skb_put(skb, pkt_len); skb->protocol = eth_type_trans(skb, netdev); @@ -1464,6 +1654,7 @@ static void rx_bottom(struct r8152 *tp) stats->rx_packets++; stats->rx_bytes += pkt_len; +find_next_rx: rx_data = rx_agg_align(rx_data + pkt_len + CRC_SIZE); rx_desc = (struct rx_desc *)rx_data; len_used = (int)(rx_data - (u8 *)agg->head); @@ -1535,6 +1726,7 @@ static void bottom_half(unsigned long data) return; rx_bottom(tp); + tx_bottom(tp); } static @@ -1551,16 +1743,15 @@ static void rtl_drop_queued_tx(struct r8152 *tp) { struct net_device_stats *stats = &tp->netdev->stats; struct sk_buff_head skb_head, *tx_queue = &tp->tx_queue; - unsigned long flags; struct sk_buff *skb; if (skb_queue_empty(tx_queue)) return; __skb_queue_head_init(&skb_head); - spin_lock_irqsave(&tx_queue->lock, flags); + spin_lock_bh(&tx_queue->lock); skb_queue_splice_init(tx_queue, &skb_head); - spin_unlock_irqrestore(&tx_queue->lock, flags); + spin_unlock_bh(&tx_queue->lock); while ((skb = __skb_dequeue(&skb_head))) { dev_kfree_skb(skb); @@ -1631,7 +1822,7 @@ static void _rtl8152_set_rx_mode(struct net_device *netdev) } static netdev_tx_t rtl8152_start_xmit(struct sk_buff *skb, - struct net_device *netdev) + struct net_device *netdev) { struct r8152 *tp = netdev_priv(netdev); @@ -1639,13 +1830,17 @@ static netdev_tx_t rtl8152_start_xmit(struct sk_buff *skb, skb_queue_tail(&tp->tx_queue, skb); - if (list_empty(&tp->tx_free) && - skb_queue_len(&tp->tx_queue) > tp->tx_qlen) + if (!list_empty(&tp->tx_free)) { + if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { + set_bit(SCHEDULE_TASKLET, &tp->flags); + schedule_delayed_work(&tp->schedule, 0); + } else { + usb_mark_last_busy(tp->udev); + tasklet_schedule(&tp->tl); + } + } else if (skb_queue_len(&tp->tx_queue) > tp->tx_qlen) netif_stop_queue(netdev); - if (!list_empty(&tp->tx_free)) - schedule_delayed_work(&tp->schedule, 0); - return NETDEV_TX_OK; } @@ -2524,8 +2719,11 @@ static void rtl_work_func_t(struct work_struct *work) if (test_bit(RTL8152_SET_RX_MODE, &tp->flags)) _rtl8152_set_rx_mode(tp->netdev); - if (tp->speed & LINK_STATUS) - tx_bottom(tp); + if (test_bit(SCHEDULE_TASKLET, &tp->flags) && + (tp->speed & LINK_STATUS)) { + clear_bit(SCHEDULE_TASKLET, &tp->flags); + tasklet_schedule(&tp->tl); + } if (test_bit(PHY_RESET, &tp->flags)) rtl_phy_reset(tp); @@ -3094,10 +3292,15 @@ static int rtl8152_probe(struct usb_interface *intf, netdev->netdev_ops = &rtl8152_netdev_ops; netdev->watchdog_timeo = RTL8152_TX_TIMEOUT; - netdev->features |= NETIF_F_IP_CSUM; - netdev->hw_features = NETIF_F_IP_CSUM; + netdev->features |= NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG | + NETIF_F_TSO | NETIF_F_FRAGLIST | NETIF_F_IPV6_CSUM | + NETIF_F_TSO6; + netdev->hw_features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG | + NETIF_F_TSO | NETIF_F_FRAGLIST | + NETIF_F_IPV6_CSUM | NETIF_F_TSO6; SET_ETHTOOL_OPS(netdev, &ops); + netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE); tp->mii.dev = netdev; tp->mii.mdio_read = read_mii_word; |