From 0d563d039030bfa997a388c8ec2538789ed8bebd Mon Sep 17 00:00:00 2001
From: nbd <nbd@3c298f89-4303-0410-b956-a3cf2f4a3e73>
Date: Fri, 9 Jan 2009 03:06:37 +0000
Subject: [PATCH] b44: fix misalignment and space-saving in rx packet handling
 (significant throughput and cpu load improvement)

git-svn-id: svn://svn.openwrt.org/openwrt/trunk@13946 3c298f89-4303-0410-b956-a3cf2f4a3e73
---
 .../brcm-2.4/patches/110-b44_alignment.patch  | 103 ++++++++++++++++++
 .../212-b44_alignment_fix.patch               |  48 ++++++++
 .../212-b44_alignment_fix.patch               |  48 ++++++++
 3 files changed, 199 insertions(+)
 create mode 100644 target/linux/brcm-2.4/patches/110-b44_alignment.patch
 create mode 100644 target/linux/brcm47xx/patches-2.6.25/212-b44_alignment_fix.patch
 create mode 100644 target/linux/brcm47xx/patches-2.6.28/212-b44_alignment_fix.patch

diff --git a/target/linux/brcm-2.4/patches/110-b44_alignment.patch b/target/linux/brcm-2.4/patches/110-b44_alignment.patch
new file mode 100644
index 000000000..c1f078234
--- /dev/null
+++ b/target/linux/brcm-2.4/patches/110-b44_alignment.patch
@@ -0,0 +1,103 @@
+--- a/drivers/net/b44.c
++++ b/drivers/net/b44.c
+@@ -101,7 +101,8 @@ static int instance = 0;
+ 	  (BP)->tx_cons - (BP)->tx_prod - TX_RING_GAP(BP))
+ #define NEXT_TX(N)		(((N) + 1) & (B44_TX_RING_SIZE - 1))
+ 
+-#define RX_PKT_BUF_SZ		(1536 + bp->rx_offset + 64)
++#define RX_HEADER_OFS		(RX_HEADER_LEN + 2)
++#define RX_PKT_BUF_SZ		(1536 + RX_HEADER_OFS)
+ 
+ /* minimum number of free TX descriptors required to wake up TX process */
+ #define B44_TX_WAKEUP_THRESH		(B44_TX_RING_SIZE / 4)
+@@ -734,10 +735,9 @@ static int b44_alloc_rx_skb(struct b44 *
+ 	mapping = pci_map_single(bp->pdev, skb->data,
+ 				 RX_PKT_BUF_SZ,
+ 				 PCI_DMA_FROMDEVICE);
+-	skb_reserve(skb, bp->rx_offset);
+ 
+ 	rh = (struct rx_header *)
+-		(skb->data - bp->rx_offset);
++		(skb->data - RX_HEADER_OFS);
+ 	rh->len = 0;
+ 	rh->flags = 0;
+ 
+@@ -747,13 +747,13 @@ static int b44_alloc_rx_skb(struct b44 *
+ 	if (src_map != NULL)
+ 		src_map->skb = NULL;
+ 
+-	ctrl  = (DESC_CTRL_LEN & (RX_PKT_BUF_SZ - bp->rx_offset));
++	ctrl  = (DESC_CTRL_LEN & RX_PKT_BUF_SZ);
+ 	if (dest_idx == (B44_RX_RING_SIZE - 1))
+ 		ctrl |= DESC_CTRL_EOT;
+ 
+ 	dp = &bp->rx_ring[dest_idx];
+ 	dp->ctrl = cpu_to_le32(ctrl);
+-	dp->addr = cpu_to_le32((u32) mapping + bp->rx_offset + bp->dma_offset);
++	dp->addr = cpu_to_le32((u32) mapping + bp->dma_offset);
+ 
+ 	return RX_PKT_BUF_SZ;
+ }
+@@ -812,7 +812,7 @@ static int b44_rx(struct b44 *bp, int bu
+ 				    PCI_DMA_FROMDEVICE);
+ 		rh = (struct rx_header *) skb->data;
+ 		len = cpu_to_le16(rh->len);
+-		if ((len > (RX_PKT_BUF_SZ - bp->rx_offset)) ||
++		if ((len > (RX_PKT_BUF_SZ - RX_HEADER_OFS)) ||
+ 		    (rh->flags & cpu_to_le16(RX_FLAG_ERRORS))) {
+ 		drop_it:
+ 			b44_recycle_rx(bp, cons, bp->rx_prod);
+@@ -844,8 +844,8 @@ static int b44_rx(struct b44 *bp, int bu
+ 			pci_unmap_single(bp->pdev, map,
+ 					 skb_size, PCI_DMA_FROMDEVICE);
+ 			/* Leave out rx_header */
+-                	skb_put(skb, len+bp->rx_offset);
+-            	        skb_pull(skb,bp->rx_offset);
++			skb_put(skb, len+RX_HEADER_OFS);
++			skb_pull(skb,RX_HEADER_OFS);
+ 		} else {
+ 			struct sk_buff *copy_skb;
+ 
+@@ -858,7 +858,7 @@ static int b44_rx(struct b44 *bp, int bu
+ 			skb_reserve(copy_skb, 2);
+ 			skb_put(copy_skb, len);
+ 			/* DMA sync done above, copy just the actual packet */
+-			memcpy(copy_skb->data, skb->data+bp->rx_offset, len);
++			memcpy(copy_skb->data, skb->data+RX_HEADER_OFS, len);
+ 
+ 			skb = copy_skb;
+ 		}
+@@ -1344,7 +1344,7 @@ static void b44_init_hw(struct b44 *bp)
+ 	bw32(B44_DMATX_CTRL, DMATX_CTRL_ENABLE);
+ 	bw32(B44_DMATX_ADDR, bp->tx_ring_dma + bp->dma_offset);
+ 	bw32(B44_DMARX_CTRL, (DMARX_CTRL_ENABLE |
+-			      (bp->rx_offset << DMARX_CTRL_ROSHIFT)));
++			      (RX_HEADER_OFS << DMARX_CTRL_ROSHIFT)));
+ 	bw32(B44_DMARX_ADDR, bp->rx_ring_dma + bp->dma_offset);
+ 
+ 	bw32(B44_DMARX_PTR, bp->rx_pending);
+@@ -1873,13 +1873,7 @@ static int __devinit b44_get_invariants(
+ 		bp->mdc_port = (eeprom[90] >> 14) & 0x1;
+ 	}
+ 
+-	/* With this, plus the rx_header prepended to the data by the
+-	 * hardware, we'll land the ethernet header on a 2-byte boundary.
+-	 */
+-	bp->rx_offset = 30;
+-
+ 	bp->imask = IMASK_DEF;
+-
+ 	bp->core_unit = ssb_core_unit(bp);
+ 
+ 	/* XXX - really required? 
+--- a/drivers/net/b44.h
++++ b/drivers/net/b44.h
+@@ -518,8 +518,6 @@ struct b44 {
+ #define B44_FLAG_ADV_100FULL	0x08000000
+ #define B44_FLAG_INTERNAL_PHY	0x10000000
+ 
+-	u32			rx_offset;
+-
+ 	u32			msg_enable;
+ 
+ 	struct timer_list	timer;
diff --git a/target/linux/brcm47xx/patches-2.6.25/212-b44_alignment_fix.patch b/target/linux/brcm47xx/patches-2.6.25/212-b44_alignment_fix.patch
new file mode 100644
index 000000000..d65c37e44
--- /dev/null
+++ b/target/linux/brcm47xx/patches-2.6.25/212-b44_alignment_fix.patch
@@ -0,0 +1,48 @@
+--- a/drivers/net/b44.c
++++ b/drivers/net/b44.c
+@@ -73,8 +73,8 @@
+ 	  (BP)->tx_cons - (BP)->tx_prod - TX_RING_GAP(BP))
+ #define NEXT_TX(N)		(((N) + 1) & (B44_TX_RING_SIZE - 1))
+ 
+-#define RX_PKT_OFFSET		30
+-#define RX_PKT_BUF_SZ		(1536 + RX_PKT_OFFSET + 64)
++#define RX_PKT_OFFSET		(RX_HEADER_LEN + 2)
++#define RX_PKT_BUF_SZ		(1536 + RX_PKT_OFFSET)
+ 
+ /* minimum number of free TX descriptors required to wake up TX process */
+ #define B44_TX_WAKEUP_THRESH		(B44_TX_RING_SIZE / 4)
+@@ -682,7 +682,6 @@ static int b44_alloc_rx_skb(struct b44 *
+ 	}
+ 
+ 	rh = (struct rx_header *) skb->data;
+-	skb_reserve(skb, RX_PKT_OFFSET);
+ 
+ 	rh->len = 0;
+ 	rh->flags = 0;
+@@ -693,13 +692,13 @@ static int b44_alloc_rx_skb(struct b44 *
+ 	if (src_map != NULL)
+ 		src_map->skb = NULL;
+ 
+-	ctrl  = (DESC_CTRL_LEN & (RX_PKT_BUF_SZ - RX_PKT_OFFSET));
++	ctrl = (DESC_CTRL_LEN & RX_PKT_BUF_SZ);
+ 	if (dest_idx == (B44_RX_RING_SIZE - 1))
+ 		ctrl |= DESC_CTRL_EOT;
+ 
+ 	dp = &bp->rx_ring[dest_idx];
+ 	dp->ctrl = cpu_to_le32(ctrl);
+-	dp->addr = cpu_to_le32((u32) mapping + RX_PKT_OFFSET + bp->dma_offset);
++	dp->addr = cpu_to_le32((u32) mapping + bp->dma_offset);
+ 
+ 	if (bp->flags & B44_FLAG_RX_RING_HACK)
+ 		b44_sync_dma_desc_for_device(bp->sdev, bp->rx_ring_dma,
+@@ -809,8 +808,8 @@ static int b44_rx(struct b44 *bp, int bu
+ 			dma_unmap_single(bp->sdev->dma_dev, map,
+ 					 skb_size, DMA_FROM_DEVICE);
+ 			/* Leave out rx_header */
+-                	skb_put(skb, len + RX_PKT_OFFSET);
+-            	        skb_pull(skb, RX_PKT_OFFSET);
++			skb_put(skb, len + RX_PKT_OFFSET);
++			skb_pull(skb, RX_PKT_OFFSET);
+ 		} else {
+ 			struct sk_buff *copy_skb;
+ 
diff --git a/target/linux/brcm47xx/patches-2.6.28/212-b44_alignment_fix.patch b/target/linux/brcm47xx/patches-2.6.28/212-b44_alignment_fix.patch
new file mode 100644
index 000000000..b5840f296
--- /dev/null
+++ b/target/linux/brcm47xx/patches-2.6.28/212-b44_alignment_fix.patch
@@ -0,0 +1,48 @@
+--- a/drivers/net/b44.c
++++ b/drivers/net/b44.c
+@@ -73,8 +73,8 @@
+ 	  (BP)->tx_cons - (BP)->tx_prod - TX_RING_GAP(BP))
+ #define NEXT_TX(N)		(((N) + 1) & (B44_TX_RING_SIZE - 1))
+ 
+-#define RX_PKT_OFFSET		30
+-#define RX_PKT_BUF_SZ		(1536 + RX_PKT_OFFSET + 64)
++#define RX_PKT_OFFSET		(RX_HEADER_LEN + 2)
++#define RX_PKT_BUF_SZ		(1536 + RX_PKT_OFFSET)
+ 
+ /* minimum number of free TX descriptors required to wake up TX process */
+ #define B44_TX_WAKEUP_THRESH		(B44_TX_RING_SIZE / 4)
+@@ -682,7 +682,6 @@ static int b44_alloc_rx_skb(struct b44 *
+ 	}
+ 
+ 	rh = (struct rx_header *) skb->data;
+-	skb_reserve(skb, RX_PKT_OFFSET);
+ 
+ 	rh->len = 0;
+ 	rh->flags = 0;
+@@ -693,13 +692,13 @@ static int b44_alloc_rx_skb(struct b44 *
+ 	if (src_map != NULL)
+ 		src_map->skb = NULL;
+ 
+-	ctrl  = (DESC_CTRL_LEN & (RX_PKT_BUF_SZ - RX_PKT_OFFSET));
++	ctrl = (DESC_CTRL_LEN & RX_PKT_BUF_SZ);
+ 	if (dest_idx == (B44_RX_RING_SIZE - 1))
+ 		ctrl |= DESC_CTRL_EOT;
+ 
+ 	dp = &bp->rx_ring[dest_idx];
+ 	dp->ctrl = cpu_to_le32(ctrl);
+-	dp->addr = cpu_to_le32((u32) mapping + RX_PKT_OFFSET + bp->dma_offset);
++	dp->addr = cpu_to_le32((u32) mapping + bp->dma_offset);
+ 
+ 	if (bp->flags & B44_FLAG_RX_RING_HACK)
+ 		b44_sync_dma_desc_for_device(bp->sdev, bp->rx_ring_dma,
+@@ -809,8 +808,8 @@ static int b44_rx(struct b44 *bp, int bu
+ 			ssb_dma_unmap_single(bp->sdev, map,
+ 					     skb_size, DMA_FROM_DEVICE);
+ 			/* Leave out rx_header */
+-                	skb_put(skb, len + RX_PKT_OFFSET);
+-            	        skb_pull(skb, RX_PKT_OFFSET);
++			skb_put(skb, len + RX_PKT_OFFSET);
++			skb_pull(skb, RX_PKT_OFFSET);
+ 		} else {
+ 			struct sk_buff *copy_skb;
+ 
-- 
2.20.1