GIT d7a3d09b6a14a0b1352cf68d6556f230b7ac1cfd git://lost.foo-projects.org/~cleech/linux-2.6#master commit Author: Chris Leech Date: Wed Oct 18 16:08:50 2006 -0700 I/OAT: Only offload copies for TCP when there will be a context switch The performance wins come with having the DMA copy engine doing the copies in parallel with the context switch. If there is enough data ready on the socket at recv time just use a regular copy. Signed-off-by: Chris Leech commit 62ead6cac14ac91cf6d9a0a6021ff37662a08a71 Author: Chris Leech Date: Wed Oct 18 16:08:49 2006 -0700 I/OAT: Add entries to MAINTAINERS for the DMA memcpy subsystem and ioatdma Signed-off-by: Chris Leech commit a27412529e3cb9573241989d3752a1c861045c61 Author: Chris Leech Date: Wed Oct 18 16:08:49 2006 -0700 I/OAT: Add documentation for the tcp_dma_copybreak sysctl Signed-off-by: Chris Leech commit 51330208da8b0e306a8271f75efb9772e263269c Author: Chris Leech Date: Wed Oct 18 16:08:49 2006 -0700 I/OAT: Remove the use of writeq from the ioatdma driver There's only one now anyway, and it's not in a performance path, so make it behave the same on 32-bit and 64-bit CPUs. Signed-off-by: Chris Leech commit 8b3a3fb94148e56a40a022bae7f7f62506481ff6 Author: Chris Leech Date: Wed Oct 18 16:08:49 2006 -0700 I/OAT: Remove the wrappers around read(bwl)/write(bwl) in ioatdma Signed-off-by: Chris Leech commit 57cbbbb1a54d0db8698f6c0703d73ede285b73e8 Author: Jeff Garzik Date: Wed Oct 18 16:08:42 2006 -0700 drivers/dma: handle sysfs errors From: Jeff Garzik Signed-off-by: Jeff Garzik Signed-off-by: Chris Leech commit f6afad75c39b644ae40113b4c530ce6752cf71cc Author: Chris Leech Date: Wed Oct 18 16:07:58 2006 -0700 I/OAT: Push pending transactions to hardware more frequently Every 20 descriptors turns out to be to few append commands with newer/faster CPUs. Pushing every 4 still cuts down on MMIO writes to an acceptable level without letting the DMA engine run out of work. Signed-off-by: Chris Leech Documentation/networking/ip-sysctl.txt | 6 ++ MAINTAINERS | 12 +++ drivers/dma/dmaengine.c | 22 +++++- drivers/dma/ioatdma.c | 68 ++++++++---------- drivers/dma/ioatdma_io.h | 118 -------------------------------- net/ipv4/tcp.c | 10 ++- 6 files changed, 76 insertions(+), 160 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index fd3c0c0..e9ee102 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -375,6 +375,12 @@ tcp_slow_start_after_idle - BOOLEAN be timed out after an idle period. Default: 1 +tcp_dma_copybreak - INTEGER + Lower limit, in bytes, of the size of socket reads that will be + offloaded to a DMA copy engine, if one is present in the system + and CONFIG_NET_DMA is enabled. + Default: 4096 + CIPSOv4 Variables: cipso_cache_enable - BOOLEAN diff --git a/MAINTAINERS b/MAINTAINERS index d708702..46a25cc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -915,6 +915,12 @@ M: tori@unhappy.mine.nu L: linux-kernel@vger.kernel.org S: Maintained +DMA GENERIC MEMCPY SUBSYSTEM +P: Chris Leech +M: christopher.leech@intel.com +L: linux-kernel@vger.kernel.org +S: Maintained + DOCBOOK FOR DOCUMENTATION P: Martin Waitz M: tali@admingilde.org @@ -1516,6 +1522,12 @@ P: Tigran Aivazian M: tigran@veritas.com S: Maintained +INTEL I/OAT DMA DRIVER +P: Chris Leech +M: christopher.leech@intel.com +L: linux-kernel@vger.kernel.org +S: Supported + INTEL IXP4XX RANDOM NUMBER GENERATOR SUPPORT P: Deepak Saxena M: dsaxena@plexity.net diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 1527804..dc65773 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -312,7 +312,7 @@ void dma_async_client_chan_request(struc int dma_async_device_register(struct dma_device *device) { static int id; - int chancnt = 0; + int chancnt = 0, rc; struct dma_chan* chan; if (!device) @@ -334,8 +334,15 @@ int dma_async_device_register(struct dma snprintf(chan->class_dev.class_id, BUS_ID_SIZE, "dma%dchan%d", device->dev_id, chan->chan_id); + rc = class_device_register(&chan->class_dev); + if (rc) { + chancnt--; + free_percpu(chan->local); + chan->local = NULL; + goto err_out; + } + kref_get(&device->refcount); - class_device_register(&chan->class_dev); } mutex_lock(&dma_list_mutex); @@ -345,6 +352,17 @@ int dma_async_device_register(struct dma dma_chans_rebalance(); return 0; + +err_out: + list_for_each_entry(chan, &device->channels, device_node) { + if (chan->local == NULL) + continue; + kref_put(&device->refcount, dma_async_device_cleanup); + class_device_unregister(&chan->class_dev); + chancnt--; + free_percpu(chan->local); + } + return rc; } /** diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c index 0358419..2800c19 100644 --- a/drivers/dma/ioatdma.c +++ b/drivers/dma/ioatdma.c @@ -32,7 +32,6 @@ #include #include #include #include "ioatdma.h" -#include "ioatdma_io.h" #include "ioatdma_registers.h" #include "ioatdma_hw.h" @@ -51,8 +50,8 @@ static int enumerate_dma_channels(struct int i; struct ioat_dma_chan *ioat_chan; - device->common.chancnt = ioatdma_read8(device, IOAT_CHANCNT_OFFSET); - xfercap_scale = ioatdma_read8(device, IOAT_XFERCAP_OFFSET); + device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); + xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); for (i = 0; i < device->common.chancnt; i++) { @@ -123,7 +122,7 @@ static int ioat_dma_alloc_chan_resources * In-use bit automatically set by reading chanctrl * If 0, we got it, if 1, someone else did */ - chanctrl = ioatdma_chan_read16(ioat_chan, IOAT_CHANCTRL_OFFSET); + chanctrl = readw(ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); if (chanctrl & IOAT_CHANCTRL_CHANNEL_IN_USE) return -EBUSY; @@ -132,12 +131,12 @@ static int ioat_dma_alloc_chan_resources IOAT_CHANCTRL_ERR_INT_EN | IOAT_CHANCTRL_ANY_ERR_ABORT_EN | IOAT_CHANCTRL_ERR_COMPLETION_EN; - ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl); + writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); - chanerr = ioatdma_chan_read32(ioat_chan, IOAT_CHANERR_OFFSET); + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); if (chanerr) { printk("IOAT: CHANERR = %x, clearing\n", chanerr); - ioatdma_chan_write32(ioat_chan, IOAT_CHANERR_OFFSET, chanerr); + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); } /* Allocate descriptors */ @@ -161,10 +160,10 @@ static int ioat_dma_alloc_chan_resources &ioat_chan->completion_addr); memset(ioat_chan->completion_virt, 0, sizeof(*ioat_chan->completion_virt)); - ioatdma_chan_write32(ioat_chan, IOAT_CHANCMP_OFFSET_LOW, - ((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF); - ioatdma_chan_write32(ioat_chan, IOAT_CHANCMP_OFFSET_HIGH, - ((u64) ioat_chan->completion_addr) >> 32); + writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64) ioat_chan->completion_addr) >> 32, + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); ioat_start_null_desc(ioat_chan); return i; @@ -182,7 +181,7 @@ static void ioat_dma_free_chan_resources ioat_dma_memcpy_cleanup(ioat_chan); - ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET); + writeb(IOAT_CHANCMD_RESET, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); spin_lock_bh(&ioat_chan->desc_lock); list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) { @@ -210,9 +209,9 @@ static void ioat_dma_free_chan_resources ioat_chan->last_completion = ioat_chan->completion_addr = 0; /* Tell hw the chan is free */ - chanctrl = ioatdma_chan_read16(ioat_chan, IOAT_CHANCTRL_OFFSET); + chanctrl = readw(ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); chanctrl &= ~IOAT_CHANCTRL_CHANNEL_IN_USE; - ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl); + writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); } /** @@ -310,7 +309,7 @@ static dma_cookie_t do_ioat_dma_memcpy(s list_splice_init(&new_chain, ioat_chan->used_desc.prev); ioat_chan->pending += desc_count; - if (ioat_chan->pending >= 20) { + if (ioat_chan->pending >= 4) { append = 1; ioat_chan->pending = 0; } @@ -318,9 +317,8 @@ static dma_cookie_t do_ioat_dma_memcpy(s spin_unlock_bh(&ioat_chan->desc_lock); if (append) - ioatdma_chan_write8(ioat_chan, - IOAT_CHANCMD_OFFSET, - IOAT_CHANCMD_APPEND); + writeb(IOAT_CHANCMD_APPEND, + ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); return cookie; } @@ -417,9 +415,8 @@ static void ioat_dma_memcpy_issue_pendin if (ioat_chan->pending != 0) { ioat_chan->pending = 0; - ioatdma_chan_write8(ioat_chan, - IOAT_CHANCMD_OFFSET, - IOAT_CHANCMD_APPEND); + writeb(IOAT_CHANCMD_APPEND, + ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); } } @@ -449,7 +446,7 @@ #endif if ((chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { printk("IOAT: Channel halted, chanerr = %x\n", - ioatdma_chan_read32(chan, IOAT_CHANERR_OFFSET)); + readl(chan->reg_base + IOAT_CHANERR_OFFSET)); /* TODO do something to salvage the situation */ } @@ -569,21 +566,21 @@ static irqreturn_t ioat_do_interrupt(int unsigned long attnstatus; u8 intrctrl; - intrctrl = ioatdma_read8(instance, IOAT_INTRCTRL_OFFSET); + intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) return IRQ_NONE; if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { - ioatdma_write8(instance, IOAT_INTRCTRL_OFFSET, intrctrl); + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); return IRQ_NONE; } - attnstatus = ioatdma_read32(instance, IOAT_ATTNSTATUS_OFFSET); + attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); printk(KERN_ERR "ioatdma error: interrupt! status %lx\n", attnstatus); - ioatdma_write8(instance, IOAT_INTRCTRL_OFFSET, intrctrl); + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); return IRQ_HANDLED; } @@ -611,15 +608,12 @@ static void ioat_start_null_desc(struct list_add_tail(&desc->node, &ioat_chan->used_desc); spin_unlock_bh(&ioat_chan->desc_lock); -#if (BITS_PER_LONG == 64) - ioatdma_chan_write64(ioat_chan, IOAT_CHAINADDR_OFFSET, desc->phys); -#else - ioatdma_chan_write32(ioat_chan, - IOAT_CHAINADDR_OFFSET_LOW, - (u32) desc->phys); - ioatdma_chan_write32(ioat_chan, IOAT_CHAINADDR_OFFSET_HIGH, 0); -#endif - ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_START); + writel(((u64) desc->phys) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->phys) >> 32, + ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_HIGH); + + writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); } /* @@ -748,7 +742,7 @@ #endif device->reg_base = reg_base; - ioatdma_write8(device, IOAT_INTRCTRL_OFFSET, IOAT_INTRCTRL_MASTER_INT_EN); + writeb(IOAT_INTRCTRL_MASTER_INT_EN, device->reg_base + IOAT_INTRCTRL_OFFSET); pci_set_master(pdev); INIT_LIST_HEAD(&device->common.channels); @@ -818,7 +812,7 @@ #endif } /* MODULE API */ -MODULE_VERSION("1.7"); +MODULE_VERSION("1.9"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Intel Corporation"); diff --git a/drivers/dma/ioatdma_io.h b/drivers/dma/ioatdma_io.h deleted file mode 100644 index c0b4bf6..0000000 --- a/drivers/dma/ioatdma_io.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * The full GNU General Public License is included in this distribution in the - * file called COPYING. - */ -#ifndef IOATDMA_IO_H -#define IOATDMA_IO_H - -#include - -/* - * device and per-channel MMIO register read and write functions - * this is a lot of anoying inline functions, but it's typesafe - */ - -static inline u8 ioatdma_read8(struct ioat_device *device, - unsigned int offset) -{ - return readb(device->reg_base + offset); -} - -static inline u16 ioatdma_read16(struct ioat_device *device, - unsigned int offset) -{ - return readw(device->reg_base + offset); -} - -static inline u32 ioatdma_read32(struct ioat_device *device, - unsigned int offset) -{ - return readl(device->reg_base + offset); -} - -static inline void ioatdma_write8(struct ioat_device *device, - unsigned int offset, u8 value) -{ - writeb(value, device->reg_base + offset); -} - -static inline void ioatdma_write16(struct ioat_device *device, - unsigned int offset, u16 value) -{ - writew(value, device->reg_base + offset); -} - -static inline void ioatdma_write32(struct ioat_device *device, - unsigned int offset, u32 value) -{ - writel(value, device->reg_base + offset); -} - -static inline u8 ioatdma_chan_read8(struct ioat_dma_chan *chan, - unsigned int offset) -{ - return readb(chan->reg_base + offset); -} - -static inline u16 ioatdma_chan_read16(struct ioat_dma_chan *chan, - unsigned int offset) -{ - return readw(chan->reg_base + offset); -} - -static inline u32 ioatdma_chan_read32(struct ioat_dma_chan *chan, - unsigned int offset) -{ - return readl(chan->reg_base + offset); -} - -static inline void ioatdma_chan_write8(struct ioat_dma_chan *chan, - unsigned int offset, u8 value) -{ - writeb(value, chan->reg_base + offset); -} - -static inline void ioatdma_chan_write16(struct ioat_dma_chan *chan, - unsigned int offset, u16 value) -{ - writew(value, chan->reg_base + offset); -} - -static inline void ioatdma_chan_write32(struct ioat_dma_chan *chan, - unsigned int offset, u32 value) -{ - writel(value, chan->reg_base + offset); -} - -#if (BITS_PER_LONG == 64) -static inline u64 ioatdma_chan_read64(struct ioat_dma_chan *chan, - unsigned int offset) -{ - return readq(chan->reg_base + offset); -} - -static inline void ioatdma_chan_write64(struct ioat_dma_chan *chan, - unsigned int offset, u64 value) -{ - writeq(value, chan->reg_base + offset); -} -#endif - -#endif /* IOATDMA_IO_H */ - diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 66e9a72..ef0a6cd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1108,6 +1108,8 @@ int tcp_recvmsg(struct kiocb *iocb, stru long timeo; struct task_struct *user_recv = NULL; int copied_early = 0; + int available = 0; + struct sk_buff *skb; lock_sock(sk); @@ -1134,7 +1136,11 @@ int tcp_recvmsg(struct kiocb *iocb, stru #ifdef CONFIG_NET_DMA tp->ucopy.dma_chan = NULL; preempt_disable(); - if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && + skb = skb_peek_tail(&sk->sk_receive_queue); + if (skb) + available = TCP_SKB_CB(skb)->seq + skb->len - (*seq); + if ((available < target) && + (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) { preempt_enable_no_resched(); tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); @@ -1143,7 +1149,6 @@ #ifdef CONFIG_NET_DMA #endif do { - struct sk_buff *skb; u32 offset; /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ @@ -1431,7 +1436,6 @@ skip_copy: #ifdef CONFIG_NET_DMA if (tp->ucopy.dma_chan) { - struct sk_buff *skb; dma_cookie_t done, used; dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);