From: nbd <nbd@3c298f89-4303-0410-b956-a3cf2f4a3e73>
Date: Sat, 10 Dec 2011 21:17:07 +0000 (+0000)
Subject: ath9k: improve handling of blockackreq (should improve aggregation behavior under... 
X-Git-Url: https://git.rohieb.name/openwrt.git/commitdiff_plain/60c148cc14e1cd983fb72022530bae59c93b2602

ath9k: improve handling of blockackreq (should improve aggregation behavior under tough wifi conditions with lots of retransmission)

git-svn-id: svn://svn.openwrt.org/openwrt/trunk@29494 3c298f89-4303-0410-b956-a3cf2f4a3e73
---

diff --git a/package/mac80211/patches/560-ath9k_rework_send_bar.patch b/package/mac80211/patches/560-ath9k_rework_send_bar.patch
new file mode 100644
index 000000000..9176713a5
--- /dev/null
+++ b/package/mac80211/patches/560-ath9k_rework_send_bar.patch
@@ -0,0 +1,240 @@
+--- a/drivers/net/wireless/ath/ath9k/ath9k.h
++++ b/drivers/net/wireless/ath/ath9k/ath9k.h
+@@ -159,6 +159,9 @@ void ath_descdma_cleanup(struct ath_soft
+ /* return block-ack bitmap index given sequence and starting sequence */
+ #define ATH_BA_INDEX(_st, _seq) (((_seq) - (_st)) & (IEEE80211_SEQ_MAX - 1))
+ 
++/* return the seqno for _start + _offset */
++#define ATH_BA_INDEX2SEQ(_seq, _offset) (((_seq) + (_offset)) & (IEEE80211_SEQ_MAX - 1))
++
+ /* returns delimiter padding required given the packet length */
+ #define ATH_AGGR_GET_NDELIM(_len)					\
+        (((_len) >= ATH_AGGR_MINPLEN) ? 0 :                             \
+@@ -253,9 +256,9 @@ struct ath_atx_tid {
+ struct ath_node {
+ #ifdef CONFIG_ATH9K_DEBUGFS
+ 	struct list_head list; /* for sc->nodes */
++#endif
+ 	struct ieee80211_sta *sta; /* station struct we're part of */
+ 	struct ieee80211_vif *vif; /* interface with which we're associated */
+-#endif
+ 	struct ath_atx_tid tid[WME_NUM_TID];
+ 	struct ath_atx_ac ac[WME_NUM_AC];
+ 	int ps_key;
+@@ -277,7 +280,6 @@ struct ath_tx_control {
+ };
+ 
+ #define ATH_TX_ERROR        0x01
+-#define ATH_TX_BAR          0x02
+ 
+ /**
+  * @txq_map:  Index is mac80211 queue number.  This is
+--- a/drivers/net/wireless/ath/ath9k/main.c
++++ b/drivers/net/wireless/ath/ath9k/main.c
+@@ -644,9 +644,9 @@ static void ath_node_attach(struct ath_s
+ 	spin_lock(&sc->nodes_lock);
+ 	list_add(&an->list, &sc->nodes);
+ 	spin_unlock(&sc->nodes_lock);
++#endif
+ 	an->sta = sta;
+ 	an->vif = vif;
+-#endif
+ 	if (sc->sc_flags & SC_OP_TXAGGR) {
+ 		ath_tx_node_init(sc, an);
+ 		an->maxampdu = 1 << (IEEE80211_HT_MAX_AMPDU_FACTOR +
+--- a/drivers/net/wireless/ath/ath9k/xmit.c
++++ b/drivers/net/wireless/ath/ath9k/xmit.c
+@@ -53,7 +53,7 @@ static void ath_tx_complete(struct ath_s
+ 			    int tx_flags, struct ath_txq *txq);
+ static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf,
+ 				struct ath_txq *txq, struct list_head *bf_q,
+-				struct ath_tx_status *ts, int txok, int sendbar);
++				struct ath_tx_status *ts, int txok);
+ static void ath_tx_txqaddbuf(struct ath_softc *sc, struct ath_txq *txq,
+ 			     struct list_head *head, bool internal);
+ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf,
+@@ -150,6 +150,12 @@ static struct ath_frame_info *get_frame_
+ 	return (struct ath_frame_info *) &tx_info->rate_driver_data[0];
+ }
+ 
++static void ath_send_bar(struct ath_atx_tid *tid, u16 seqno)
++{
++	ieee80211_send_bar(tid->an->vif, tid->an->sta->addr, tid->tidno,
++			   seqno << IEEE80211_SEQ_SEQ_SHIFT);
++}
++
+ static void ath_tx_flush_tid(struct ath_softc *sc, struct ath_atx_tid *tid)
+ {
+ 	struct ath_txq *txq = tid->ac->txq;
+@@ -158,6 +164,7 @@ static void ath_tx_flush_tid(struct ath_
+ 	struct list_head bf_head;
+ 	struct ath_tx_status ts;
+ 	struct ath_frame_info *fi;
++	bool sendbar = false;
+ 
+ 	INIT_LIST_HEAD(&bf_head);
+ 
+@@ -172,7 +179,8 @@ static void ath_tx_flush_tid(struct ath_
+ 		if (bf && fi->retries) {
+ 			list_add_tail(&bf->list, &bf_head);
+ 			ath_tx_update_baw(sc, tid, bf->bf_state.seqno);
+-			ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0, 1);
++			ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0);
++			sendbar = true;
+ 		} else {
+ 			ath_tx_send_normal(sc, txq, NULL, skb);
+ 		}
+@@ -185,6 +193,9 @@ static void ath_tx_flush_tid(struct ath_
+ 	}
+ 
+ 	spin_unlock_bh(&txq->axq_lock);
++
++	if (sendbar)
++		ath_send_bar(tid, tid->seq_start);
+ }
+ 
+ static void ath_tx_update_baw(struct ath_softc *sc, struct ath_atx_tid *tid,
+@@ -255,7 +266,7 @@ static void ath_tid_drain(struct ath_sof
+ 			ath_tx_update_baw(sc, tid, bf->bf_state.seqno);
+ 
+ 		spin_unlock(&txq->axq_lock);
+-		ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0, 0);
++		ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0);
+ 		spin_lock(&txq->axq_lock);
+ 	}
+ 
+@@ -381,7 +392,7 @@ static void ath_tx_complete_aggr(struct 
+ 	struct ath_buf *bf_next, *bf_last = bf->bf_lastbf;
+ 	struct list_head bf_head;
+ 	struct sk_buff_head bf_pending;
+-	u16 seq_st = 0, acked_cnt = 0, txfail_cnt = 0;
++	u16 seq_st = 0, acked_cnt = 0, txfail_cnt = 0, seq_first;
+ 	u32 ba[WME_BA_BMP_SIZE >> 5];
+ 	int isaggr, txfail, txpending, sendbar = 0, needreset = 0, nbad = 0;
+ 	bool rc_update = true;
+@@ -391,6 +402,7 @@ static void ath_tx_complete_aggr(struct 
+ 	u8 tidno;
+ 	bool flush = !!(ts->ts_status & ATH9K_TX_FLUSH);
+ 	int i, retries;
++	int bar_index = -1;
+ 
+ 	skb = bf->bf_mpdu;
+ 	hdr = (struct ieee80211_hdr *)skb->data;
+@@ -416,8 +428,7 @@ static void ath_tx_complete_aggr(struct 
+ 			if (!bf->bf_stale || bf_next != NULL)
+ 				list_move_tail(&bf->list, &bf_head);
+ 
+-			ath_tx_complete_buf(sc, bf, txq, &bf_head, ts,
+-				0, 0);
++			ath_tx_complete_buf(sc, bf, txq, &bf_head, ts, 0);
+ 
+ 			bf = bf_next;
+ 		}
+@@ -427,6 +438,7 @@ static void ath_tx_complete_aggr(struct 
+ 	an = (struct ath_node *)sta->drv_priv;
+ 	tidno = ieee80211_get_qos_ctl(hdr)[0] & IEEE80211_QOS_CTL_TID_MASK;
+ 	tid = ATH_AN_2_TID(an, tidno);
++	seq_first = tid->seq_start;
+ 
+ 	/*
+ 	 * The hardware occasionally sends a tx status for the wrong TID.
+@@ -495,8 +507,9 @@ static void ath_tx_complete_aggr(struct 
+ 				txpending = 1;
+ 			} else {
+ 				txfail = 1;
+-				sendbar = 1;
+ 				txfail_cnt++;
++				bar_index = max_t(int, bar_index,
++					ATH_BA_INDEX(seq_first, seqno));
+ 			}
+ 		}
+ 
+@@ -525,7 +538,7 @@ static void ath_tx_complete_aggr(struct 
+ 			}
+ 
+ 			ath_tx_complete_buf(sc, bf, txq, &bf_head, ts,
+-				!txfail, sendbar);
++				!txfail);
+ 		} else {
+ 			/* retry the un-acked ones */
+ 			if (!(sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA)) {
+@@ -545,8 +558,10 @@ static void ath_tx_complete_aggr(struct 
+ 
+ 						ath_tx_complete_buf(sc, bf, txq,
+ 								    &bf_head,
+-								    ts, 0,
+-								    !flush);
++								    ts, 0);
++						bar_index = max_t(int, bar_index,
++							ATH_BA_INDEX(seq_first,
++								seqno));
+ 						break;
+ 					}
+ 
+@@ -564,6 +579,9 @@ static void ath_tx_complete_aggr(struct 
+ 		bf = bf_next;
+ 	}
+ 
++	if (bar_index >= 0)
++		ath_send_bar(tid, ATH_BA_INDEX2SEQ(seq_first, bar_index + 1));
++
+ 	/* prepend un-acked frames to the beginning of the pending frame queue */
+ 	if (!skb_queue_empty(&bf_pending)) {
+ 		if (an->sleeping)
+@@ -1452,7 +1470,7 @@ static void ath_drain_txq_list(struct at
+ 			ath_tx_complete_aggr(sc, txq, bf, &bf_head, &ts, 0,
+ 					     retry_tx);
+ 		else
+-			ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0, 0);
++			ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0);
+ 		spin_lock_bh(&txq->axq_lock);
+ 	}
+ }
+@@ -1967,9 +1985,6 @@ static void ath_tx_complete(struct ath_s
+ 
+ 	ath_dbg(common, ATH_DBG_XMIT, "TX complete: skb: %p\n", skb);
+ 
+-	if (tx_flags & ATH_TX_BAR)
+-		tx_info->flags |= IEEE80211_TX_STAT_AMPDU_NO_BACK;
+-
+ 	if (!(tx_flags & ATH_TX_ERROR))
+ 		/* Frame was ACKed */
+ 		tx_info->flags |= IEEE80211_TX_STAT_ACK;
+@@ -2013,16 +2028,13 @@ static void ath_tx_complete(struct ath_s
+ 
+ static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf,
+ 				struct ath_txq *txq, struct list_head *bf_q,
+-				struct ath_tx_status *ts, int txok, int sendbar)
++				struct ath_tx_status *ts, int txok)
+ {
+ 	struct sk_buff *skb = bf->bf_mpdu;
+ 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
+ 	unsigned long flags;
+ 	int tx_flags = 0;
+ 
+-	if (sendbar)
+-		tx_flags = ATH_TX_BAR;
+-
+ 	if (!txok)
+ 		tx_flags |= ATH_TX_ERROR;
+ 
+@@ -2129,7 +2141,7 @@ static void ath_tx_process_buffer(struct
+ 
+ 	if (!bf_isampdu(bf)) {
+ 		ath_tx_rc_status(sc, bf, ts, 1, txok ? 0 : 1, txok);
+-		ath_tx_complete_buf(sc, bf, txq, bf_head, ts, txok, 0);
++		ath_tx_complete_buf(sc, bf, txq, bf_head, ts, txok);
+ 	} else
+ 		ath_tx_complete_aggr(sc, txq, bf, bf_head, ts, txok, true);
+ 
+--- a/drivers/net/wireless/ath/ath9k/debug.c
++++ b/drivers/net/wireless/ath/ath9k/debug.c
+@@ -856,7 +856,7 @@ void ath_debug_stat_tx(struct ath_softc 
+ 	sc->debug.stats.txstats[qnum].tx_bytes_all += bf->bf_mpdu->len;
+ 
+ 	if (bf_isampdu(bf)) {
+-		if (flags & ATH_TX_BAR)
++		if (flags & ATH_TX_ERROR)
+ 			TX_STAT_INC(qnum, a_xretries);
+ 		else
+ 			TX_STAT_INC(qnum, a_completed);
diff --git a/package/mac80211/patches/561-ath9k_reduce_indentation.patch b/package/mac80211/patches/561-ath9k_reduce_indentation.patch
new file mode 100644
index 000000000..08ff157c6
--- /dev/null
+++ b/package/mac80211/patches/561-ath9k_reduce_indentation.patch
@@ -0,0 +1,159 @@
+--- a/drivers/net/wireless/ath/ath9k/xmit.c
++++ b/drivers/net/wireless/ath/ath9k/xmit.c
+@@ -490,27 +490,25 @@ static void ath_tx_complete_aggr(struct 
+ 		} else if (!isaggr && txok) {
+ 			/* transmit completion */
+ 			acked_cnt++;
++		} else if ((tid->state & AGGR_CLEANUP) || !retry) {
++			/*
++			 * cleanup in progress, just fail
++			 * the un-acked sub-frames
++			 */
++			txfail = 1;
++		} else if (flush) {
++			txpending = 1;
++		} else if (fi->retries < ATH_MAX_SW_RETRIES) {
++			if (txok || !an->sleeping)
++				ath_tx_set_retry(sc, txq, bf->bf_mpdu,
++						 retries);
++
++			txpending = 1;
+ 		} else {
+-			if ((tid->state & AGGR_CLEANUP) || !retry) {
+-				/*
+-				 * cleanup in progress, just fail
+-				 * the un-acked sub-frames
+-				 */
+-				txfail = 1;
+-			} else if (flush) {
+-				txpending = 1;
+-			} else if (fi->retries < ATH_MAX_SW_RETRIES) {
+-				if (txok || !an->sleeping)
+-					ath_tx_set_retry(sc, txq, bf->bf_mpdu,
+-							 retries);
+-
+-				txpending = 1;
+-			} else {
+-				txfail = 1;
+-				txfail_cnt++;
+-				bar_index = max_t(int, bar_index,
+-					ATH_BA_INDEX(seq_first, seqno));
+-			}
++			txfail = 1;
++			txfail_cnt++;
++			bar_index = max_t(int, bar_index,
++				ATH_BA_INDEX(seq_first, seqno));
+ 		}
+ 
+ 		/*
+@@ -541,32 +539,29 @@ static void ath_tx_complete_aggr(struct 
+ 				!txfail);
+ 		} else {
+ 			/* retry the un-acked ones */
+-			if (!(sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA)) {
+-				if (bf->bf_next == NULL && bf_last->bf_stale) {
+-					struct ath_buf *tbf;
+-
+-					tbf = ath_clone_txbuf(sc, bf_last);
+-					/*
+-					 * Update tx baw and complete the
+-					 * frame with failed status if we
+-					 * run out of tx buf.
+-					 */
+-					if (!tbf) {
+-						spin_lock_bh(&txq->axq_lock);
+-						ath_tx_update_baw(sc, tid, seqno);
+-						spin_unlock_bh(&txq->axq_lock);
+-
+-						ath_tx_complete_buf(sc, bf, txq,
+-								    &bf_head,
+-								    ts, 0);
+-						bar_index = max_t(int, bar_index,
+-							ATH_BA_INDEX(seq_first,
+-								seqno));
+-						break;
+-					}
++			if (!(sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA) &&
++			    bf->bf_next == NULL && bf_last->bf_stale) {
++				struct ath_buf *tbf;
+ 
+-					fi->bf = tbf;
++				tbf = ath_clone_txbuf(sc, bf_last);
++				/*
++				 * Update tx baw and complete the
++				 * frame with failed status if we
++				 * run out of tx buf.
++				 */
++				if (!tbf) {
++					spin_lock_bh(&txq->axq_lock);
++					ath_tx_update_baw(sc, tid, seqno);
++					spin_unlock_bh(&txq->axq_lock);
++
++					ath_tx_complete_buf(sc, bf, txq,
++							    &bf_head, ts, 0);
++					bar_index = max_t(int, bar_index,
++						ATH_BA_INDEX(seq_first, seqno));
++					break;
+ 				}
++
++				fi->bf = tbf;
+ 			}
+ 
+ 			/*
+@@ -654,24 +649,26 @@ static u32 ath_lookup_rate(struct ath_so
+ 	max_4ms_framelen = ATH_AMPDU_LIMIT_MAX;
+ 
+ 	for (i = 0; i < 4; i++) {
+-		if (rates[i].count) {
+-			int modeidx;
+-			if (!(rates[i].flags & IEEE80211_TX_RC_MCS)) {
+-				legacy = 1;
+-				break;
+-			}
+-
+-			if (rates[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
+-				modeidx = MCS_HT40;
+-			else
+-				modeidx = MCS_HT20;
++		int modeidx;
+ 
+-			if (rates[i].flags & IEEE80211_TX_RC_SHORT_GI)
+-				modeidx++;
++		if (!rates[i].count)
++			continue;
+ 
+-			frmlen = ath_max_4ms_framelen[modeidx][rates[i].idx];
+-			max_4ms_framelen = min(max_4ms_framelen, frmlen);
++		if (!(rates[i].flags & IEEE80211_TX_RC_MCS)) {
++			legacy = 1;
++			break;
+ 		}
++
++		if (rates[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
++			modeidx = MCS_HT40;
++		else
++			modeidx = MCS_HT20;
++
++		if (rates[i].flags & IEEE80211_TX_RC_SHORT_GI)
++			modeidx++;
++
++		frmlen = ath_max_4ms_framelen[modeidx][rates[i].idx];
++		max_4ms_framelen = min(max_4ms_framelen, frmlen);
+ 	}
+ 
+ 	/*
+@@ -1598,11 +1595,9 @@ void ath_txq_schedule(struct ath_softc *
+ 				break;
+ 		}
+ 
+-		if (!list_empty(&ac->tid_q)) {
+-			if (!ac->sched) {
+-				ac->sched = true;
+-				list_add_tail(&ac->list, &txq->axq_acq);
+-			}
++		if (!list_empty(&ac->tid_q) && !ac->sched) {
++			ac->sched = true;
++			list_add_tail(&ac->list, &txq->axq_acq);
+ 		}
+ 
+ 		if (ac == last_ac ||
diff --git a/package/mac80211/patches/562-ath9k_remove_seq_incr.patch b/package/mac80211/patches/562-ath9k_remove_seq_incr.patch
new file mode 100644
index 000000000..a89d0a50f
--- /dev/null
+++ b/package/mac80211/patches/562-ath9k_remove_seq_incr.patch
@@ -0,0 +1,13 @@
+--- a/drivers/net/wireless/ath/ath9k/xmit.c
++++ b/drivers/net/wireless/ath/ath9k/xmit.c
+@@ -1742,10 +1742,6 @@ static void ath_tx_send_normal(struct at
+ 	list_add_tail(&bf->list, &bf_head);
+ 	bf->bf_state.bf_type = 0;
+ 
+-	/* update starting sequence number for subsequent ADDBA request */
+-	if (tid)
+-		INCR(tid->seq_start, IEEE80211_SEQ_MAX);
+-
+ 	bf->bf_lastbf = bf;
+ 	ath_tx_fill_desc(sc, bf, txq, fi->framelen);
+ 	ath_tx_txqaddbuf(sc, txq, &bf_head, false);
diff --git a/package/mac80211/patches/563-ath9k_simplify_tx_locking.patch b/package/mac80211/patches/563-ath9k_simplify_tx_locking.patch
new file mode 100644
index 000000000..c6b3ad231
--- /dev/null
+++ b/package/mac80211/patches/563-ath9k_simplify_tx_locking.patch
@@ -0,0 +1,247 @@
+--- a/drivers/net/wireless/ath/ath9k/xmit.c
++++ b/drivers/net/wireless/ath/ath9k/xmit.c
+@@ -169,13 +169,11 @@ static void ath_tx_flush_tid(struct ath_
+ 	INIT_LIST_HEAD(&bf_head);
+ 
+ 	memset(&ts, 0, sizeof(ts));
+-	spin_lock_bh(&txq->axq_lock);
+ 
+ 	while ((skb = __skb_dequeue(&tid->buf_q))) {
+ 		fi = get_frame_info(skb);
+ 		bf = fi->bf;
+ 
+-		spin_unlock_bh(&txq->axq_lock);
+ 		if (bf && fi->retries) {
+ 			list_add_tail(&bf->list, &bf_head);
+ 			ath_tx_update_baw(sc, tid, bf->bf_state.seqno);
+@@ -184,7 +182,6 @@ static void ath_tx_flush_tid(struct ath_
+ 		} else {
+ 			ath_tx_send_normal(sc, txq, NULL, skb);
+ 		}
+-		spin_lock_bh(&txq->axq_lock);
+ 	}
+ 
+ 	if (tid->baw_head == tid->baw_tail) {
+@@ -192,8 +189,6 @@ static void ath_tx_flush_tid(struct ath_
+ 		tid->state &= ~AGGR_CLEANUP;
+ 	}
+ 
+-	spin_unlock_bh(&txq->axq_lock);
+-
+ 	if (sendbar)
+ 		ath_send_bar(tid, tid->seq_start);
+ }
+@@ -254,9 +249,7 @@ static void ath_tid_drain(struct ath_sof
+ 		bf = fi->bf;
+ 
+ 		if (!bf) {
+-			spin_unlock(&txq->axq_lock);
+ 			ath_tx_complete(sc, skb, ATH_TX_ERROR, txq);
+-			spin_lock(&txq->axq_lock);
+ 			continue;
+ 		}
+ 
+@@ -265,9 +258,7 @@ static void ath_tid_drain(struct ath_sof
+ 		if (fi->retries)
+ 			ath_tx_update_baw(sc, tid, bf->bf_state.seqno);
+ 
+-		spin_unlock(&txq->axq_lock);
+ 		ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0);
+-		spin_lock(&txq->axq_lock);
+ 	}
+ 
+ 	tid->seq_next = tid->seq_start;
+@@ -525,9 +516,7 @@ static void ath_tx_complete_aggr(struct 
+ 			 * complete the acked-ones/xretried ones; update
+ 			 * block-ack window
+ 			 */
+-			spin_lock_bh(&txq->axq_lock);
+ 			ath_tx_update_baw(sc, tid, seqno);
+-			spin_unlock_bh(&txq->axq_lock);
+ 
+ 			if (rc_update && (acked_cnt == 1 || txfail_cnt == 1)) {
+ 				memcpy(tx_info->control.rates, rates, sizeof(rates));
+@@ -550,9 +539,7 @@ static void ath_tx_complete_aggr(struct 
+ 				 * run out of tx buf.
+ 				 */
+ 				if (!tbf) {
+-					spin_lock_bh(&txq->axq_lock);
+ 					ath_tx_update_baw(sc, tid, seqno);
+-					spin_unlock_bh(&txq->axq_lock);
+ 
+ 					ath_tx_complete_buf(sc, bf, txq,
+ 							    &bf_head, ts, 0);
+@@ -582,7 +569,6 @@ static void ath_tx_complete_aggr(struct 
+ 		if (an->sleeping)
+ 			ieee80211_sta_set_buffered(sta, tid->tidno, true);
+ 
+-		spin_lock_bh(&txq->axq_lock);
+ 		skb_queue_splice(&bf_pending, &tid->buf_q);
+ 		if (!an->sleeping) {
+ 			ath_tx_queue_tid(txq, tid);
+@@ -590,7 +576,6 @@ static void ath_tx_complete_aggr(struct 
+ 			if (ts->ts_status & ATH9K_TXERR_FILT)
+ 				tid->ac->clear_ps_filter = true;
+ 		}
+-		spin_unlock_bh(&txq->axq_lock);
+ 	}
+ 
+ 	if (tid->state & AGGR_CLEANUP)
+@@ -1190,9 +1175,9 @@ void ath_tx_aggr_stop(struct ath_softc *
+ 		txtid->state |= AGGR_CLEANUP;
+ 	else
+ 		txtid->state &= ~AGGR_ADDBA_COMPLETE;
+-	spin_unlock_bh(&txq->axq_lock);
+ 
+ 	ath_tx_flush_tid(sc, txtid);
++	spin_unlock_bh(&txq->axq_lock);
+ }
+ 
+ void ath_tx_aggr_sleep(struct ieee80211_sta *sta, struct ath_softc *sc,
+@@ -1434,8 +1419,6 @@ static bool bf_is_ampdu_not_probing(stru
+ 
+ static void ath_drain_txq_list(struct ath_softc *sc, struct ath_txq *txq,
+ 			       struct list_head *list, bool retry_tx)
+-	__releases(txq->axq_lock)
+-	__acquires(txq->axq_lock)
+ {
+ 	struct ath_buf *bf, *lastbf;
+ 	struct list_head bf_head;
+@@ -1462,13 +1445,11 @@ static void ath_drain_txq_list(struct at
+ 		if (bf_is_ampdu_not_probing(bf))
+ 			txq->axq_ampdu_depth--;
+ 
+-		spin_unlock_bh(&txq->axq_lock);
+ 		if (bf_isampdu(bf))
+ 			ath_tx_complete_aggr(sc, txq, bf, &bf_head, &ts, 0,
+ 					     retry_tx);
+ 		else
+ 			ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0);
+-		spin_lock_bh(&txq->axq_lock);
+ 	}
+ }
+ 
+@@ -1847,8 +1828,6 @@ static void ath_tx_start_dma(struct ath_
+ 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
+ 	struct ath_buf *bf;
+ 
+-	spin_lock_bh(&txctl->txq->axq_lock);
+-
+ 	if ((tx_info->flags & IEEE80211_TX_CTL_AMPDU) && tid) {
+ 		/*
+ 		 * Try aggregation if it's a unicast data frame
+@@ -1858,7 +1837,7 @@ static void ath_tx_start_dma(struct ath_
+ 	} else {
+ 		bf = ath_tx_setup_buffer(sc, txctl->txq, tid, skb);
+ 		if (!bf)
+-			goto out;
++			return;
+ 
+ 		bf->bf_state.bfs_paprd = txctl->paprd;
+ 
+@@ -1867,9 +1846,6 @@ static void ath_tx_start_dma(struct ath_
+ 
+ 		ath_tx_send_normal(sc, txctl->txq, tid, skb);
+ 	}
+-
+-out:
+-	spin_unlock_bh(&txctl->txq->axq_lock);
+ }
+ 
+ /* Upon failure caller should free skb */
+@@ -1949,15 +1925,19 @@ int ath_tx_start(struct ieee80211_hw *hw
+ 	 */
+ 
+ 	q = skb_get_queue_mapping(skb);
++
+ 	spin_lock_bh(&txq->axq_lock);
++
+ 	if (txq == sc->tx.txq_map[q] &&
+ 	    ++txq->pending_frames > ATH_MAX_QDEPTH && !txq->stopped) {
+ 		ieee80211_stop_queue(sc->hw, q);
+ 		txq->stopped = 1;
+ 	}
+-	spin_unlock_bh(&txq->axq_lock);
+ 
+ 	ath_tx_start_dma(sc, skb, txctl, tid);
++
++	spin_unlock_bh(&txq->axq_lock);
++
+ 	return 0;
+ }
+ 
+@@ -2003,7 +1983,6 @@ static void ath_tx_complete(struct ath_s
+ 
+ 	q = skb_get_queue_mapping(skb);
+ 	if (txq == sc->tx.txq_map[q]) {
+-		spin_lock_bh(&txq->axq_lock);
+ 		if (WARN_ON(--txq->pending_frames < 0))
+ 			txq->pending_frames = 0;
+ 
+@@ -2011,7 +1990,6 @@ static void ath_tx_complete(struct ath_s
+ 			ieee80211_wake_queue(sc->hw, q);
+ 			txq->stopped = 0;
+ 		}
+-		spin_unlock_bh(&txq->axq_lock);
+ 	}
+ 
+ 	ieee80211_tx_status(hw, skb);
+@@ -2117,8 +2095,6 @@ static void ath_tx_rc_status(struct ath_
+ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq,
+ 				  struct ath_tx_status *ts, struct ath_buf *bf,
+ 				  struct list_head *bf_head)
+-	__releases(txq->axq_lock)
+-	__acquires(txq->axq_lock)
+ {
+ 	int txok;
+ 
+@@ -2128,16 +2104,12 @@ static void ath_tx_process_buffer(struct
+ 	if (bf_is_ampdu_not_probing(bf))
+ 		txq->axq_ampdu_depth--;
+ 
+-	spin_unlock_bh(&txq->axq_lock);
+-
+ 	if (!bf_isampdu(bf)) {
+ 		ath_tx_rc_status(sc, bf, ts, 1, txok ? 0 : 1, txok);
+ 		ath_tx_complete_buf(sc, bf, txq, bf_head, ts, txok);
+ 	} else
+ 		ath_tx_complete_aggr(sc, txq, bf, bf_head, ts, txok, true);
+ 
+-	spin_lock_bh(&txq->axq_lock);
+-
+ 	if (sc->sc_flags & SC_OP_TXAGGR)
+ 		ath_txq_schedule(sc, txq);
+ }
+@@ -2281,6 +2253,7 @@ void ath_tx_edma_tasklet(struct ath_soft
+ 	struct list_head bf_head;
+ 	int status;
+ 
++	spin_lock_bh(&txq->axq_lock);
+ 	for (;;) {
+ 		if (work_pending(&sc->hw_reset_work))
+ 			break;
+@@ -2300,12 +2273,8 @@ void ath_tx_edma_tasklet(struct ath_soft
+ 
+ 		txq = &sc->tx.txq[ts.qid];
+ 
+-		spin_lock_bh(&txq->axq_lock);
+-
+-		if (list_empty(&txq->txq_fifo[txq->txq_tailidx])) {
+-			spin_unlock_bh(&txq->axq_lock);
+-			return;
+-		}
++		if (list_empty(&txq->txq_fifo[txq->txq_tailidx]))
++			break;
+ 
+ 		bf = list_first_entry(&txq->txq_fifo[txq->txq_tailidx],
+ 				      struct ath_buf, list);
+@@ -2329,8 +2298,8 @@ void ath_tx_edma_tasklet(struct ath_soft
+ 		}
+ 
+ 		ath_tx_process_buffer(sc, txq, &ts, bf, &bf_head);
+-		spin_unlock_bh(&txq->axq_lock);
+ 	}
++	spin_unlock_bh(&txq->axq_lock);
+ }
+ 
+ /*****************/
diff --git a/package/mac80211/patches/564-ath9k_track_last_bar.patch b/package/mac80211/patches/564-ath9k_track_last_bar.patch
new file mode 100644
index 000000000..34a4dd1dd
--- /dev/null
+++ b/package/mac80211/patches/564-ath9k_track_last_bar.patch
@@ -0,0 +1,82 @@
+--- a/drivers/net/wireless/ath/ath9k/ath9k.h
++++ b/drivers/net/wireless/ath/ath9k/ath9k.h
+@@ -242,6 +242,7 @@ struct ath_atx_tid {
+ 	struct ath_atx_ac *ac;
+ 	unsigned long tx_buf[BITS_TO_LONGS(ATH_TID_MAX_BUFS)];
+ 	int buf_pending;
++	int bar_index;
+ 	u16 seq_start;
+ 	u16 seq_next;
+ 	u16 baw_size;
+--- a/drivers/net/wireless/ath/ath9k/xmit.c
++++ b/drivers/net/wireless/ath/ath9k/xmit.c
+@@ -206,6 +206,8 @@ static void ath_tx_update_baw(struct ath
+ 	while (tid->baw_head != tid->baw_tail && !test_bit(tid->baw_head, tid->tx_buf)) {
+ 		INCR(tid->seq_start, IEEE80211_SEQ_MAX);
+ 		INCR(tid->baw_head, ATH_TID_MAX_BUFS);
++		if (tid->bar_index >= 0)
++			tid->bar_index--;
+ 	}
+ }
+ 
+@@ -263,6 +265,7 @@ static void ath_tid_drain(struct ath_sof
+ 
+ 	tid->seq_next = tid->seq_start;
+ 	tid->baw_tail = tid->baw_head;
++	tid->bar_index = -1;
+ }
+ 
+ static void ath_tx_set_retry(struct ath_softc *sc, struct ath_txq *txq,
+@@ -561,8 +564,12 @@ static void ath_tx_complete_aggr(struct 
+ 		bf = bf_next;
+ 	}
+ 
+-	if (bar_index >= 0)
++	if (bar_index >= 0) {
++		u16 bar_seq = ATH_BA_INDEX2SEQ(seq_first, bar_index);
+ 		ath_send_bar(tid, ATH_BA_INDEX2SEQ(seq_first, bar_index + 1));
++		if (BAW_WITHIN(tid->seq_start, tid->baw_size, bar_seq))
++			tid->bar_index = ATH_BA_INDEX(tid->seq_start, bar_seq);
++	}
+ 
+ 	/* prepend un-acked frames to the beginning of the pending frame queue */
+ 	if (!skb_queue_empty(&bf_pending)) {
+@@ -789,8 +796,6 @@ static enum ATH_AGGR_STATUS ath_tx_form_
+ 
+ 		bf->bf_state.bf_type = BUF_AMPDU | BUF_AGGR;
+ 		seqno = bf->bf_state.seqno;
+-		if (!bf_first)
+-			bf_first = bf;
+ 
+ 		/* do not step over block-ack window */
+ 		if (!BAW_WITHIN(tid->seq_start, tid->baw_size, seqno)) {
+@@ -798,6 +803,21 @@ static enum ATH_AGGR_STATUS ath_tx_form_
+ 			break;
+ 		}
+ 
++		if (tid->bar_index > ATH_BA_INDEX(tid->seq_start, seqno)) {
++			struct ath_tx_status ts = {};
++			struct list_head bf_head;
++
++			INIT_LIST_HEAD(&bf_head);
++			list_add(&bf->list, &bf_head);
++			__skb_unlink(skb, &tid->buf_q);
++			ath_tx_update_baw(sc, tid, seqno);
++			ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0);
++			continue;
++		}
++
++		if (!bf_first)
++			bf_first = bf;
++
+ 		if (!rl) {
+ 			aggr_limit = ath_lookup_rate(sc, bf, tid);
+ 			rl = 1;
+@@ -1141,6 +1161,7 @@ int ath_tx_aggr_start(struct ath_softc *
+ 	txtid->state |= AGGR_ADDBA_PROGRESS;
+ 	txtid->paused = true;
+ 	*ssn = txtid->seq_start = txtid->seq_next;
++	txtid->bar_index = -1;
+ 
+ 	memset(txtid->tx_buf, 0, sizeof(txtid->tx_buf));
+ 	txtid->baw_head = txtid->baw_tail = 0;