ath10k: implement NAPI support

Add NAPI support for rx and tx completion. NAPI poll is scheduled from interrupt handler. The design is as below - on interrupt - schedule napi and mask interrupts - on poll - process all pipes (no actual Tx/Rx) - process Rx within budget - if quota exceeds budget reschedule napi poll by returning budget - process Tx completions and update budget if necessary - process Tx fetch indications (pull-push) - push any other pending Tx (if possible) - before resched or napi completion replenish htt rx ring buffer - if work done < budget, complete napi poll and unmask interrupts This change also get rid of two tasklets (intr_tq and txrx_compl_task). Measured peak throughput with NAPI on IPQ4019 platform in controlled environment. No noticeable reduction in throughput is seen and also observed improvements in CPU usage. Approx. 15% CPU usage got reduced in UDP uplink case. DL: AP DUT Tx UL: AP DUT Rx IPQ4019 (avg. cpu usage %) ======== TOT +NAPI =========== ============= TCP DL 644 Mbps (42%) 645 Mbps (36%) TCP UL 673 Mbps (30%) 675 Mbps (26%) UDP DL 682 Mbps (49%) 680 Mbps (49%) UDP UL 720 Mbps (28%) 717 Mbps (11%) Signed-off-by: Rajkumar Manoharan <rmanohar@qti.qualcomm.com> Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
author: Rajkumar Manoharan <rmanohar@qti.qualcomm.com> 2016-09-02 19:46:09 +0300
committer: Kalle Valo <kvalo@qca.qualcomm.com> 2016-09-09 14:49:47 +0300
commit: 3c97f5de1f282492335a6aec1f94b77f7f899b8c (patch)
tree: 82ebf604a8ce1975f98b43ddb64e02c6a6d94ab2 /drivers/net/wireless/ath/ath10k/htt_rx.c
parent: c39265f72ae6dbcb0367be808837e2f182095d15 (diff)
1 files changed, 94 insertions, 60 deletions
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index ae6931b99da9..740b0faae927 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -34,7 +34,6 @@
 #define HTT_RX_RING_REFILL_RESCHED_MS 5
 
 static int ath10k_htt_rx_get_csum_state(struct sk_buff *skb);
-static void ath10k_htt_txrx_compl_task(unsigned long ptr);
 
 static struct sk_buff *
 ath10k_htt_rx_find_skb_paddr(struct ath10k *ar, u32 paddr)
@@ -226,7 +225,6 @@ int ath10k_htt_rx_ring_refill(struct ath10k *ar)
 void ath10k_htt_rx_free(struct ath10k_htt *htt)
 {
 	del_timer_sync(&htt->rx_ring.refill_retry_timer);
-	tasklet_kill(&htt->txrx_compl_task);
 
 	skb_queue_purge(&htt->rx_compl_q);
 	skb_queue_purge(&htt->rx_in_ord_compl_q);
@@ -520,9 +518,6 @@ int ath10k_htt_rx_alloc(struct ath10k_htt *htt)
 	skb_queue_head_init(&htt->tx_fetch_ind_q);
 	atomic_set(&htt->num_mpdus_ready, 0);
 
-	tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task,
-		     (unsigned long)htt);
-
 	ath10k_dbg(ar, ATH10K_DBG_BOOT, "htt rx ring size %d fill_level %d\n",
 		   htt->rx_ring.size, htt->rx_ring.fill_level);
 	return 0;
@@ -958,7 +953,7 @@ static void ath10k_process_rx(struct ath10k *ar,
 	trace_ath10k_rx_hdr(ar, skb->data, skb->len);
 	trace_ath10k_rx_payload(ar, skb->data, skb->len);
 
-	ieee80211_rx(ar->hw, skb);
+	ieee80211_rx_napi(ar->hw, NULL, skb, &ar->napi);
 }
 
 static int ath10k_htt_rx_nwifi_hdrlen(struct ath10k *ar,
@@ -1527,7 +1522,7 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt)
 	struct ath10k *ar = htt->ar;
 	static struct ieee80211_rx_status rx_status;
 	struct sk_buff_head amsdu;
-	int ret;
+	int ret, num_msdus;
 
 	__skb_queue_head_init(&amsdu);
 
@@ -1549,13 +1544,14 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt)
 		return ret;
 	}
 
+	num_msdus = skb_queue_len(&amsdu);
 	ath10k_htt_rx_h_ppdu(ar, &amsdu, &rx_status, 0xffff);
 	ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0);
 	ath10k_htt_rx_h_filter(ar, &amsdu, &rx_status);
 	ath10k_htt_rx_h_mpdu(ar, &amsdu, &rx_status);
 	ath10k_htt_rx_h_deliver(ar, &amsdu, &rx_status);
 
-	return 0;
+	return num_msdus;
 }
 
 static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt,
@@ -1579,15 +1575,6 @@ static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt,
 		mpdu_count += mpdu_ranges[i].mpdu_count;
 
 	atomic_add(mpdu_count, &htt->num_mpdus_ready);
-
-	tasklet_schedule(&htt->txrx_compl_task);
-}
-
-static void ath10k_htt_rx_frag_handler(struct ath10k_htt *htt)
-{
-	atomic_inc(&htt->num_mpdus_ready);
-
-	tasklet_schedule(&htt->txrx_compl_task);
 }
 
 static void ath10k_htt_rx_tx_compl_ind(struct ath10k *ar,
@@ -1772,14 +1759,15 @@ static void ath10k_htt_rx_h_rx_offload_prot(struct ieee80211_rx_status *status,
 			RX_FLAG_MMIC_STRIPPED;
 }
 
-static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar,
-				       struct sk_buff_head *list)
+static int ath10k_htt_rx_h_rx_offload(struct ath10k *ar,
+				      struct sk_buff_head *list)
 {
 	struct ath10k_htt *htt = &ar->htt;
 	struct ieee80211_rx_status *status = &htt->rx_status;
 	struct htt_rx_offload_msdu *rx;
 	struct sk_buff *msdu;
 	size_t offset;
+	int num_msdu = 0;
 
 	while ((msdu = __skb_dequeue(list))) {
 		/* Offloaded frames don't have Rx descriptor. Instead they have
@@ -1819,10 +1807,12 @@ static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar,
 		ath10k_htt_rx_h_rx_offload_prot(status, msdu);
 		ath10k_htt_rx_h_channel(ar, status, NULL, rx->vdev_id);
 		ath10k_process_rx(ar, status, msdu);
+		num_msdu++;
 	}
+	return num_msdu;
 }
 
-static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
+static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
 {
 	struct ath10k_htt *htt = &ar->htt;
 	struct htt_resp *resp = (void *)skb->data;
@@ -1835,12 +1825,12 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
 	u8 tid;
 	bool offload;
 	bool frag;
-	int ret;
+	int ret, num_msdus = 0;
 
 	lockdep_assert_held(&htt->rx_ring.lock);
 
 	if (htt->rx_confused)
-		return;
+		return -EIO;
 
 	skb_pull(skb, sizeof(resp->hdr));
 	skb_pull(skb, sizeof(resp->rx_in_ord_ind));
@@ -1859,7 +1849,7 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
 
 	if (skb->len < msdu_count * sizeof(*resp->rx_in_ord_ind.msdu_descs)) {
 		ath10k_warn(ar, "dropping invalid in order rx indication\n");
-		return;
+		return -EINVAL;
 	}
 
 	/* The event can deliver more than 1 A-MSDU. Each A-MSDU is later
@@ -1870,14 +1860,14 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
 	if (ret < 0) {
 		ath10k_warn(ar, "failed to pop paddr list: %d\n", ret);
 		htt->rx_confused = true;
-		return;
+		return -EIO;
 	}
 
 	/* Offloaded frames are very different and need to be handled
 	 * separately.
 	 */
 	if (offload)
-		ath10k_htt_rx_h_rx_offload(ar, &list);
+		num_msdus = ath10k_htt_rx_h_rx_offload(ar, &list);
 
 	while (!skb_queue_empty(&list)) {
 		__skb_queue_head_init(&amsdu);
@@ -1890,6 +1880,7 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
 			 * better to report something than nothing though. This
 			 * should still give an idea about rx rate to the user.
 			 */
+			num_msdus += skb_queue_len(&amsdu);
 			ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id);
 			ath10k_htt_rx_h_filter(ar, &amsdu, status);
 			ath10k_htt_rx_h_mpdu(ar, &amsdu, status);
@@ -1902,9 +1893,10 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
 			ath10k_warn(ar, "failed to extract amsdu: %d\n", ret);
 			htt->rx_confused = true;
 			__skb_queue_purge(&list);
-			return;
+			return -EIO;
 		}
 	}
+	return num_msdus;
 }
 
 static void ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar,
@@ -2267,7 +2259,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
 	}
 	case HTT_T2H_MSG_TYPE_TX_COMPL_IND:
 		ath10k_htt_rx_tx_compl_ind(htt->ar, skb);
-		tasklet_schedule(&htt->txrx_compl_task);
 		break;
 	case HTT_T2H_MSG_TYPE_SEC_IND: {
 		struct ath10k *ar = htt->ar;
@@ -2284,7 +2275,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
 	case HTT_T2H_MSG_TYPE_RX_FRAG_IND: {
 		ath10k_dbg_dump(ar, ATH10K_DBG_HTT_DUMP, NULL, "htt event: ",
 				skb->data, skb->len);
-		ath10k_htt_rx_frag_handler(htt);
+		atomic_inc(&htt->num_mpdus_ready);
 		break;
 	}
 	case HTT_T2H_MSG_TYPE_TEST:
@@ -2320,8 +2311,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
 		break;
 	}
 	case HTT_T2H_MSG_TYPE_RX_IN_ORD_PADDR_IND: {
-		skb_queue_tail(&htt->rx_in_ord_compl_q, skb);
-		tasklet_schedule(&htt->txrx_compl_task);
+		__skb_queue_tail(&htt->rx_in_ord_compl_q, skb);
 		return false;
 	}
 	case HTT_T2H_MSG_TYPE_TX_CREDIT_UPDATE_IND:
@@ -2347,7 +2337,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
 			break;
 		}
 		skb_queue_tail(&htt->tx_fetch_ind_q, tx_fetch_ind);
-		tasklet_schedule(&htt->txrx_compl_task);
 		break;
 	}
 	case HTT_T2H_MSG_TYPE_TX_FETCH_CONFIRM:
@@ -2376,27 +2365,77 @@ void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar,
 }
 EXPORT_SYMBOL(ath10k_htt_rx_pktlog_completion_handler);
 
-static void ath10k_htt_txrx_compl_task(unsigned long ptr)
+int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget)
 {
-	struct ath10k_htt *htt = (struct ath10k_htt *)ptr;
-	struct ath10k *ar = htt->ar;
+	struct ath10k_htt *htt = &ar->htt;
 	struct htt_tx_done tx_done = {};
-	struct sk_buff_head rx_ind_q;
 	struct sk_buff_head tx_ind_q;
 	struct sk_buff *skb;
 	unsigned long flags;
-	int num_mpdus;
+	int quota = 0, done, num_rx_msdus;
+	bool resched_napi = false;
 
-	__skb_queue_head_init(&rx_ind_q);
 	__skb_queue_head_init(&tx_ind_q);
 
-	spin_lock_irqsave(&htt->rx_in_ord_compl_q.lock, flags);
-	skb_queue_splice_init(&htt->rx_in_ord_compl_q, &rx_ind_q);
-	spin_unlock_irqrestore(&htt->rx_in_ord_compl_q.lock, flags);
+	/* Since in-ord-ind can deliver more than 1 A-MSDU in single event,
+	 * process it first to utilize full available quota.
+	 */
+	while (quota < budget) {
+		if (skb_queue_empty(&htt->rx_in_ord_compl_q))
+			break;
 
-	spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags);
-	skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q);
-	spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags);
+		skb = __skb_dequeue(&htt->rx_in_ord_compl_q);
+		if (!skb) {
+			resched_napi = true;
+			goto exit;
+		}
+
+		spin_lock_bh(&htt->rx_ring.lock);
+		num_rx_msdus = ath10k_htt_rx_in_ord_ind(ar, skb);
+		spin_unlock_bh(&htt->rx_ring.lock);
+		if (num_rx_msdus < 0) {
+			resched_napi = true;
+			goto exit;
+		}
+
+		dev_kfree_skb_any(skb);
+		if (num_rx_msdus > 0)
+			quota += num_rx_msdus;
+
+		if ((quota > ATH10K_NAPI_QUOTA_LIMIT) &&
+		    !skb_queue_empty(&htt->rx_in_ord_compl_q)) {
+			resched_napi = true;
+			goto exit;
+		}
+	}
+
+	while (quota < budget) {
+		/* no more data to receive */
+		if (!atomic_read(&htt->num_mpdus_ready))
+			break;
+
+		num_rx_msdus = ath10k_htt_rx_handle_amsdu(htt);
+		if (num_rx_msdus < 0) {
+			resched_napi = true;
+			goto exit;
+		}
+
+		quota += num_rx_msdus;
+		atomic_dec(&htt->num_mpdus_ready);
+		if ((quota > ATH10K_NAPI_QUOTA_LIMIT) &&
+		    atomic_read(&htt->num_mpdus_ready)) {
+			resched_napi = true;
+			goto exit;
+		}
+	}
+
+	/* From NAPI documentation:
+	 *  The napi poll() function may also process TX completions, in which
+	 *  case if it processes the entire TX ring then it should count that
+	 *  work as the rest of the budget.
+	 */
+	if ((quota < budget) && !kfifo_is_empty(&htt->txdone_fifo))
+		quota = budget;
 
 	/* kfifo_get: called only within txrx_tasklet so it's neatly serialized.
 	 * From kfifo_get() documentation:
@@ -2406,27 +2445,22 @@ static void ath10k_htt_txrx_compl_task(unsigned long ptr)
 	while (kfifo_get(&htt->txdone_fifo, &tx_done))
 		ath10k_txrx_tx_unref(htt, &tx_done);
 
+	spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags);
+	skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q);
+	spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags);
+
 	while ((skb = __skb_dequeue(&tx_ind_q))) {
 		ath10k_htt_rx_tx_fetch_ind(ar, skb);
 		dev_kfree_skb_any(skb);
 	}
 
-	num_mpdus = atomic_read(&htt->num_mpdus_ready);
-
-	while (num_mpdus) {
-		if (ath10k_htt_rx_handle_amsdu(htt))
-			break;
-
-		num_mpdus--;
-		atomic_dec(&htt->num_mpdus_ready);
-	}
-
-	while ((skb = __skb_dequeue(&rx_ind_q))) {
-		spin_lock_bh(&htt->rx_ring.lock);
-		ath10k_htt_rx_in_ord_ind(ar, skb);
-		spin_unlock_bh(&htt->rx_ring.lock);
-		dev_kfree_skb_any(skb);
-	}
-
+exit:
 	ath10k_htt_rx_msdu_buff_replenish(htt);
+	/* In case of rx failure or more data to read, report budget
+	 * to reschedule NAPI poll
+	 */
+	done = resched_napi ? budget : quota;
+
+	return done;
 }
+EXPORT_SYMBOL(ath10k_htt_txrx_compl_task);
author	Rajkumar Manoharan <rmanohar@qti.qualcomm.com>	2016-09-02 19:46:09 +0300
committer	Kalle Valo <kvalo@qca.qualcomm.com>	2016-09-09 14:49:47 +0300
commit	3c97f5de1f282492335a6aec1f94b77f7f899b8c (patch)
tree	82ebf604a8ce1975f98b43ddb64e02c6a6d94ab2 /drivers/net/wireless/ath/ath10k/htt_rx.c
parent	c39265f72ae6dbcb0367be808837e2f182095d15 (diff)