From: Grant Grundler IRQs are racing with tulip_down(). DMA can be restarted by the interrupt handler _after_ we call tulip_stop_rxtx() and the DMA buffers are unmapped. The result is an MCA (hard crash on ia64) because of an IO TLB miss. The long-term fix is to make the interrupt handler shutdown aware. Signed-off-by: Grant Grundler Acked-by: Valerie Henson Cc: Jeff Garzik Signed-off-by: Andrew Morton --- drivers/net/tulip/tulip_core.c | 37 ++++++++++++++++++++----------- 1 files changed, 24 insertions(+), 13 deletions(-) diff -puN drivers/net/tulip/tulip_core.c~tulip-fix-shutdown-dma-irq-race drivers/net/tulip/tulip_core.c --- a/drivers/net/tulip/tulip_core.c~tulip-fix-shutdown-dma-irq-race +++ a/drivers/net/tulip/tulip_core.c @@ -17,11 +17,11 @@ #define DRV_NAME "tulip" #ifdef CONFIG_TULIP_NAPI -#define DRV_VERSION "1.1.13-NAPI" /* Keep at least for test */ +#define DRV_VERSION "1.1.14-NAPI" /* Keep at least for test */ #else -#define DRV_VERSION "1.1.13" +#define DRV_VERSION "1.1.14" #endif -#define DRV_RELDATE "May 11, 2002" +#define DRV_RELDATE "May 6, 2006" #include @@ -738,23 +738,36 @@ static void tulip_down (struct net_devic #endif spin_lock_irqsave (&tp->lock, flags); + /* + FIXME: We should really add a shutdown-in-progress flag and + check it in the interrupt handler to see whether we should + reenable DMA or not. The preferred ordering here would be: + + stop DMA engine + disable interrupts + remove DMA resources + free_irq() + + The below works but is non-obvious and doesn't match the + ordering of bring-up. -VAL + */ + /* Disable interrupts by clearing the interrupt mask. */ iowrite32 (0x00000000, ioaddr + CSR7); + ioread32 (ioaddr + CSR7); /* flush posted write */ - /* Stop the Tx and Rx processes. */ - tulip_stop_rxtx(tp); + spin_unlock_irqrestore (&tp->lock, flags); - /* prepare receive buffers */ - tulip_refill_rx(dev); + free_irq (dev->irq, dev); /* no more races after this */ + tulip_stop_rxtx(tp); /* Stop DMA */ - /* release any unconsumed transmit buffers */ - tulip_clean_tx_ring(tp); + /* Put driver back into the state we start with */ + tulip_refill_rx(dev); /* prepare RX buffers */ + tulip_clean_tx_ring(tp); /* clean up unsent TX buffers */ if (ioread32 (ioaddr + CSR6) != 0xffffffff) tp->stats.rx_missed_errors += ioread32 (ioaddr + CSR8) & 0xffff; - spin_unlock_irqrestore (&tp->lock, flags); - init_timer(&tp->timer); tp->timer.data = (unsigned long)dev; tp->timer.function = tulip_tbl[tp->chip_id].media_timer; @@ -780,7 +793,6 @@ static int tulip_close (struct net_devic printk (KERN_DEBUG "%s: Shutting down ethercard, status was %2.2x.\n", dev->name, ioread32 (ioaddr + CSR5)); - free_irq (dev->irq, dev); /* Free all the skbuffs in the Rx queue. */ for (i = 0; i < RX_RING_SIZE; i++) { @@ -1747,7 +1759,6 @@ static int tulip_suspend (struct pci_dev tulip_down(dev); netif_device_detach(dev); - free_irq(dev->irq, dev); pci_save_state(pdev); pci_disable_device(pdev); _