1 diff -urN linux.old/drivers/net/via-rhine.c linux.dev/drivers/net/via-rhine.c
2 --- linux.old/drivers/net/via-rhine.c 2006-06-08 20:21:20.000000000 +0200
3 +++ linux.dev/drivers/net/via-rhine.c 2006-06-08 20:19:40.000000000 +0200
5 - Fix Tx engine race for good
6 - Craig Brind: Zero padded aligned buffers for short packets.
8 + OpenWrt Version (Felix Fietkau <nbd@openwrt.org>)
9 + - Performance improvements
14 #define DRV_NAME "via-rhine"
16 These may be modified when a driver module is loaded. */
18 static int debug = 1; /* 1 normal messages, 0 quiet .. 7 verbose. */
19 -static int max_interrupt_work = 20;
21 /* Set the copy breakpoint for the copy-only-tiny-frames scheme.
22 Setting to > 1518 effectively disables this feature. */
24 Making the Tx ring too large decreases the effectiveness of channel
25 bonding and packet priority.
26 There are no ill effects from too-large receive rings. */
27 -#define TX_RING_SIZE 16
28 -#define TX_QUEUE_LEN 10 /* Limit ring entries actually used. */
29 -#define RX_RING_SIZE 16
30 +#define TX_RING_SIZE 128
31 +#define TX_QUEUE_LEN 120 /* Limit ring entries actually used. */
32 +#define RX_RING_SIZE 128
35 /* Operational parameters that usually are not changed. */
39 #include <asm/uaccess.h>
40 +#include <asm/unaligned.h>
42 /* These identify the driver base version and may not be removed. */
43 static char version[] __devinitdata =
45 MODULE_DESCRIPTION("VIA Rhine PCI Fast Ethernet driver");
46 MODULE_LICENSE("GPL");
48 -module_param(max_interrupt_work, int, 0);
49 module_param(debug, int, 0);
50 module_param(rx_copybreak, int, 0);
51 -MODULE_PARM_DESC(max_interrupt_work, "VIA Rhine maximum events handled per interrupt");
52 MODULE_PARM_DESC(debug, "VIA Rhine debug level (0-7)");
53 MODULE_PARM_DESC(rx_copybreak, "VIA Rhine copy breakpoint for copy-only-tiny-frames");
56 struct tx_desc *tx_ring;
57 dma_addr_t rx_ring_dma;
58 dma_addr_t tx_ring_dma;
62 /* The addresses of receive-in-place skbuffs. */
63 struct sk_buff *rx_skbuff[RX_RING_SIZE];
65 static void rhine_check_media_task(struct net_device *dev);
66 static int rhine_start_tx(struct sk_buff *skb, struct net_device *dev);
67 static irqreturn_t rhine_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
68 -static void rhine_tx(struct net_device *dev);
69 -static void rhine_rx(struct net_device *dev);
70 -static void rhine_error(struct net_device *dev, int intr_status);
71 +static int rhine_poll(struct net_device *dev, int *budget);
72 +static int rhine_tx(struct net_device *dev);
73 +static int rhine_rx(struct net_device *dev);
74 +static void rhine_error(struct net_device *dev);
75 static void rhine_set_rx_mode(struct net_device *dev);
76 static struct net_device_stats *rhine_get_stats(struct net_device *dev);
77 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
79 struct rhine_private *rp = netdev_priv(dev);
80 void __iomem *ioaddr = rp->base;
82 + pci_enable_device(rp->pdev);
84 iowrite8(Cmd1Reset, ioaddr + ChipCmd1);
88 "failed" : "succeeded");
91 +static inline void rhine_intr_enable(struct net_device *dev)
93 + struct rhine_private *rp = netdev_priv(dev);
94 + void __iomem *ioaddr = rp->base;
96 + iowrite16(rp->imask = (IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow |
97 + IntrRxDropped | IntrRxNoBuf | IntrTxAborted |
98 + IntrTxDone | IntrTxError | IntrTxUnderrun |
99 + IntrPCIErr | IntrStatsMax | IntrLinkChange),
100 + ioaddr + IntrEnable);
103 +static inline void rhine_intr_disable(struct net_device *dev)
105 + struct rhine_private *rp = netdev_priv(dev);
106 + void __iomem *ioaddr = rp->base;
108 + iowrite16(rp->imask = (IntrRxOverflow | IntrRxNoBuf | IntrTxAborted |
109 + IntrTxError | IntrTxUnderrun | IntrPCIErr | IntrStatsMax | IntrLinkChange),
110 + ioaddr + IntrEnable);
114 static void enable_mmio(long pioaddr, u32 quirks)
116 @@ -664,14 +693,26 @@
120 -#ifdef CONFIG_NET_POLL_CONTROLLER
121 -static void rhine_poll(struct net_device *dev)
122 +static int rhine_poll(struct net_device *dev, int *budget)
124 - disable_irq(dev->irq);
125 - rhine_interrupt(dev->irq, (void *)dev, NULL);
126 - enable_irq(dev->irq);
127 + unsigned int work_done, work_to_do = min(*budget, dev->quota);
128 + struct rhine_private *rp = netdev_priv(dev);
130 + work_done = rhine_rx(dev);
132 + if (rp->istat & (IntrTxErrSummary | IntrTxDone))
135 + *budget -= work_done;
136 + dev->quota -= work_done;
138 + if (work_done < work_to_do) {
139 + netif_rx_complete(dev);
140 + rhine_intr_enable(dev);
143 + return (work_done >= work_to_do);
147 static void rhine_hw_init(struct net_device *dev, long pioaddr)
149 @@ -850,11 +891,10 @@
150 dev->ethtool_ops = &netdev_ethtool_ops;
151 dev->tx_timeout = rhine_tx_timeout;
152 dev->watchdog_timeo = TX_TIMEOUT;
153 -#ifdef CONFIG_NET_POLL_CONTROLLER
154 - dev->poll_controller = rhine_poll;
156 - if (rp->quirks & rqRhineI)
157 - dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM;
158 + dev->poll = rhine_poll;
161 + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
163 INIT_WORK(&rp->tx_timeout_task,
164 (void (*)(void *))rhine_tx_timeout_task, dev);
168 rp->mii_if.phy_id = phy_id;
170 + // shut down until somebody really needs it
171 + iowrite8(0x80, ioaddr + 0xa1);
172 + pci_set_power_state(rp->pdev, 3);
178 /* Fill in the Rx buffers. Handle allocation failure gracefully. */
179 for (i = 0; i < RX_RING_SIZE; i++) {
180 - struct sk_buff *skb = dev_alloc_skb(rp->rx_buf_sz);
181 + struct sk_buff *skb = dev_alloc_skb(rp->rx_buf_sz + 4);
182 rp->rx_skbuff[i] = skb;
185 @@ -1115,11 +1159,7 @@
186 rhine_set_rx_mode(dev);
188 /* Enable interrupts by setting the interrupt mask. */
189 - iowrite16(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow |
190 - IntrRxDropped | IntrRxNoBuf | IntrTxAborted |
191 - IntrTxDone | IntrTxError | IntrTxUnderrun |
192 - IntrPCIErr | IntrStatsMax | IntrLinkChange,
193 - ioaddr + IntrEnable);
194 + rhine_intr_enable(dev);
196 iowrite16(CmdStart | CmdTxOn | CmdRxOn | (Cmd1NoTxPoll << 8),
198 @@ -1230,6 +1270,7 @@
199 mdio_read(dev, rp->mii_if.phy_id, MII_BMSR));
201 netif_start_queue(dev);
202 + netif_poll_enable(dev);
206 @@ -1268,8 +1309,8 @@
207 /* Reinitialize the hardware. */
208 rhine_chip_reset(dev);
211 spin_unlock(&rp->lock);
213 enable_irq(rp->pdev->irq);
215 dev->trans_start = jiffies;
216 @@ -1363,69 +1404,56 @@
217 struct net_device *dev = dev_instance;
218 struct rhine_private *rp = netdev_priv(dev);
219 void __iomem *ioaddr = rp->base;
221 - int boguscnt = max_interrupt_work;
224 - while ((intr_status = get_intr_status(dev))) {
225 + if ((rp->istat = (get_intr_status(dev) & rp->imask))) {
228 /* Acknowledge all of the current interrupt sources ASAP. */
229 - if (intr_status & IntrTxDescRace)
230 + if (rp->istat & IntrTxDescRace)
231 iowrite8(0x08, ioaddr + IntrStatus2);
232 - iowrite16(intr_status & 0xffff, ioaddr + IntrStatus);
233 + iowrite16(rp->istat & 0xffff, ioaddr + IntrStatus);
237 - printk(KERN_DEBUG "%s: Interrupt, status %8.8x.\n",
238 - dev->name, intr_status);
239 + if (likely(rp->istat & ((IntrRxDone | IntrRxErr | IntrRxDropped |
240 + IntrRxWakeUp | IntrRxEmpty | IntrRxNoBuf |
241 + IntrTxErrSummary | IntrTxDone)))) {
243 + rhine_intr_disable(dev);
245 - if (intr_status & (IntrRxDone | IntrRxErr | IntrRxDropped |
246 - IntrRxWakeUp | IntrRxEmpty | IntrRxNoBuf))
249 - if (intr_status & (IntrTxErrSummary | IntrTxDone)) {
250 - if (intr_status & IntrTxErrSummary) {
251 - /* Avoid scavenging before Tx engine turned off */
252 - RHINE_WAIT_FOR(!(ioread8(ioaddr+ChipCmd) & CmdTxOn));
254 - ioread8(ioaddr+ChipCmd) & CmdTxOn)
255 - printk(KERN_WARNING "%s: "
256 - "rhine_interrupt() Tx engine"
257 - "still on.\n", dev->name);
260 + if (likely(netif_rx_schedule_prep(dev)))
261 + __netif_rx_schedule(dev);
264 /* Abnormal error summary/uncommon events handlers. */
265 - if (intr_status & (IntrPCIErr | IntrLinkChange |
266 + if (unlikely(rp->istat & (IntrPCIErr | IntrLinkChange |
267 IntrStatsMax | IntrTxError | IntrTxAborted |
268 - IntrTxUnderrun | IntrTxDescRace))
269 - rhine_error(dev, intr_status);
271 - if (--boguscnt < 0) {
272 - printk(KERN_WARNING "%s: Too much work at interrupt, "
273 - "status=%#8.8x.\n",
274 - dev->name, intr_status);
277 + IntrTxUnderrun | IntrTxDescRace)))
282 - printk(KERN_DEBUG "%s: exiting interrupt, status=%8.8x.\n",
283 - dev->name, ioread16(ioaddr + IntrStatus));
284 return IRQ_RETVAL(handled);
287 /* This routine is logically part of the interrupt handler, but isolated
289 -static void rhine_tx(struct net_device *dev)
290 +static int rhine_tx(struct net_device *dev)
292 struct rhine_private *rp = netdev_priv(dev);
293 int txstatus = 0, entry = rp->dirty_tx % TX_RING_SIZE;
294 + void __iomem *ioaddr = rp->base;
297 - spin_lock(&rp->lock);
298 + /* Avoid scavenging before Tx engine turned off */
299 + RHINE_WAIT_FOR(!(ioread8(ioaddr+ChipCmd) & CmdTxOn));
301 + ioread8(ioaddr+ChipCmd) & CmdTxOn)
302 + printk(KERN_WARNING "%s: "
303 + "rhine_interrupt() Tx engine"
304 + "still on.\n", dev->name);
307 + spin_lock_irq(&rp->lock);
308 /* find and cleanup dirty tx descriptors */
309 while (rp->dirty_tx != rp->cur_tx) {
310 txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status);
311 @@ -1462,6 +1490,7 @@
313 rp->stats.tx_bytes += rp->tx_skbuff[entry]->len;
314 rp->stats.tx_packets++;
317 /* Free the original skb. */
318 if (rp->tx_skbuff_dma[entry]) {
319 @@ -1470,23 +1499,25 @@
320 rp->tx_skbuff[entry]->len,
323 - dev_kfree_skb_irq(rp->tx_skbuff[entry]);
324 + dev_kfree_skb_any(rp->tx_skbuff[entry]);
325 rp->tx_skbuff[entry] = NULL;
326 entry = (++rp->dirty_tx) % TX_RING_SIZE;
328 + spin_unlock_irq(&rp->lock);
330 if ((rp->cur_tx - rp->dirty_tx) < TX_QUEUE_LEN - 4)
331 netif_wake_queue(dev);
333 - spin_unlock(&rp->lock);
337 /* This routine is logically part of the interrupt handler, but isolated
338 for clarity and better register allocation. */
339 -static void rhine_rx(struct net_device *dev)
340 +static int rhine_rx(struct net_device *dev)
342 struct rhine_private *rp = netdev_priv(dev);
343 int entry = rp->cur_rx % RX_RING_SIZE;
344 - int boguscnt = rp->dirty_rx + RX_RING_SIZE - rp->cur_rx;
348 printk(KERN_DEBUG "%s: rhine_rx(), entry %d status %8.8x.\n",
349 @@ -1503,8 +1534,6 @@
351 printk(KERN_DEBUG "rhine_rx() status is %8.8x.\n",
353 - if (--boguscnt < 0)
355 if ((desc_status & (RxWholePkt | RxErr)) != RxWholePkt) {
356 if ((desc_status & RxWholePkt) != RxWholePkt) {
357 printk(KERN_WARNING "%s: Oversized Ethernet "
358 @@ -1528,9 +1557,7 @@
359 if (desc_status & 0x0004) rp->stats.rx_frame_errors++;
360 if (desc_status & 0x0002) {
361 /* this can also be updated outside the interrupt handler */
362 - spin_lock(&rp->lock);
363 rp->stats.rx_crc_errors++;
364 - spin_unlock(&rp->lock);
368 @@ -1558,6 +1585,7 @@
373 skb = rp->rx_skbuff[entry];
375 printk(KERN_ERR "%s: Inconsistent Rx "
376 @@ -1566,6 +1594,14 @@
379 rp->rx_skbuff[entry] = NULL;
381 + /* align the data to the ip header - should be faster than using rx_copybreak */
382 + for (i = pkt_len - (pkt_len % 4); i >= 0; i -= 4) {
383 + put_unaligned(*((u32 *) (skb->data + i)), (u32 *) (skb->data + i + 2));
388 skb_put(skb, pkt_len);
389 pci_unmap_single(rp->pdev,
390 rp->rx_skbuff_dma[entry],
391 @@ -1573,10 +1609,11 @@
394 skb->protocol = eth_type_trans(skb, dev);
396 + netif_receive_skb(skb);
397 dev->last_rx = jiffies;
398 rp->stats.rx_bytes += pkt_len;
399 rp->stats.rx_packets++;
402 entry = (++rp->cur_rx) % RX_RING_SIZE;
403 rp->rx_head_desc = &rp->rx_ring[entry];
404 @@ -1587,7 +1624,7 @@
406 entry = rp->dirty_rx % RX_RING_SIZE;
407 if (rp->rx_skbuff[entry] == NULL) {
408 - skb = dev_alloc_skb(rp->rx_buf_sz);
409 + skb = dev_alloc_skb(rp->rx_buf_sz + 4);
410 rp->rx_skbuff[entry] = skb;
412 break; /* Better luck next round. */
413 @@ -1600,6 +1637,8 @@
415 rp->rx_ring[entry].rx_status = cpu_to_le32(DescOwn);
422 @@ -1649,11 +1688,11 @@
426 -static void rhine_error(struct net_device *dev, int intr_status)
427 +static void rhine_error(struct net_device *dev)
429 struct rhine_private *rp = netdev_priv(dev);
430 void __iomem *ioaddr = rp->base;
432 + u32 intr_status = rp->istat;
433 spin_lock(&rp->lock);
435 if (intr_status & IntrLinkChange)
436 @@ -1898,6 +1937,7 @@
438 /* Disable interrupts by clearing the interrupt mask. */
439 iowrite16(0x0000, ioaddr + IntrEnable);
442 /* Stop the chip's Tx and Rx processes. */
443 iowrite16(CmdStop, ioaddr + ChipCmd);
444 @@ -1912,6 +1952,9 @@
448 + writeb(0x80, ioaddr + 0xa1);
449 + pci_set_power_state(rp->pdev, 3);
454 @@ -1941,6 +1984,7 @@
455 return; /* Nothing to do for non-WOL adapters */
457 rhine_power_init(dev);
458 + netif_poll_disable(dev);
460 /* Make sure we use pattern 0, 1 and not 4, 5 */
461 if (rp->quirks & rq6patterns)