GIT 80cd47427b1ff1a88215c82820ce5ecfc4c814d6 git://lost.foo-projects.org/~cleech/linux-2.6#master commit Author: Chris Leech Date: Fri Nov 17 11:37:31 2006 -0800 I/OAT: Only offload copies for TCP when there will be a context switch The performance wins come with having the DMA copy engine doing the copies in parallel with the context switch. If there is enough data ready on the socket at recv time just use a regular copy. Signed-off-by: Chris Leech commit 59ca04088a138cfe606810cc0b8e87317130e7c6 Author: Chris Leech Date: Fri Nov 17 11:37:31 2006 -0800 I/OAT: Add entries to MAINTAINERS for the DMA memcpy subsystem and ioatdma Signed-off-by: Chris Leech commit d8238afa7eedc047b57da7ec98e98fb051fc4e85 Author: Chris Leech Date: Fri Nov 17 11:37:29 2006 -0800 I/OAT: Add documentation for the tcp_dma_copybreak sysctl Signed-off-by: Chris Leech commit b68e20f799334c2ad49ee7bb872e2615cd8ef06e Author: Chris Leech Date: Fri Nov 17 11:37:28 2006 -0800 I/OAT: Remove the use of writeq from the ioatdma driver There's only one now anyway, and it's not in a performance path, so make it behave the same on 32-bit and 64-bit CPUs. Signed-off-by: Chris Leech commit 5d3c9bd8ded4d243b6bc8b92199c4bb07ddd35e1 Author: Chris Leech Date: Fri Nov 17 11:37:28 2006 -0800 I/OAT: Remove the wrappers around read(bwl)/write(bwl) in ioatdma Signed-off-by: Chris Leech commit 4cd11e2c2f30ec0d74002a741b280c69f61ec097 Author: Jeff Garzik Date: Fri Nov 17 11:37:27 2006 -0800 drivers/dma: handle sysfs errors From: Jeff Garzik Signed-off-by: Jeff Garzik Signed-off-by: Chris Leech commit 7d34c3f00e65ac83c5159e824ae094e5ceb2a47e Author: Chris Leech Date: Fri Nov 17 11:37:27 2006 -0800 I/OAT: Push pending transactions to hardware more frequently Every 20 descriptors turns out to be to few append commands with newer/faster CPUs. Pushing every 4 still cuts down on MMIO writes to an acceptable level without letting the DMA engine run out of work. Signed-off-by: Chris Leech Documentation/networking/ip-sysctl.txt | 16 ++++ MAINTAINERS | 12 +++ drivers/dma/dmaengine.c | 22 +++++- drivers/dma/ioatdma.c | 68 ++++++++---------- drivers/dma/ioatdma_io.h | 118 -------------------------------- net/ipv4/tcp.c | 10 ++- 6 files changed, 86 insertions(+), 160 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index a0f6842..eb785c1 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -387,6 +387,22 @@ tcp_workaround_signed_windows - BOOLEAN not receive a window scaling option from them. Default: 0 +<<<<<<< HEAD/Documentation/networking/ip-sysctl.txt +======= +tcp_slow_start_after_idle - BOOLEAN + If set, provide RFC2861 behavior and time out the congestion + window after an idle period. An idle period is defined at + the current RTO. If unset, the congestion window will not + be timed out after an idle period. + Default: 1 + +tcp_dma_copybreak - INTEGER + Lower limit, in bytes, of the size of socket reads that will be + offloaded to a DMA copy engine, if one is present in the system + and CONFIG_NET_DMA is enabled. + Default: 4096 + +>>>>>>> /Documentation/networking/ip-sysctl.txt CIPSOv4 Variables: cipso_cache_enable - BOOLEAN diff --git a/MAINTAINERS b/MAINTAINERS index b202493..c30ba3d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -935,6 +935,12 @@ M: tori@unhappy.mine.nu L: linux-kernel@vger.kernel.org S: Maintained +DMA GENERIC MEMCPY SUBSYSTEM +P: Chris Leech +M: christopher.leech@intel.com +L: linux-kernel@vger.kernel.org +S: Maintained + DOCBOOK FOR DOCUMENTATION P: Martin Waitz M: tali@admingilde.org @@ -1537,6 +1543,12 @@ P: Tigran Aivazian M: tigran@aivazian.fsnet.co.uk S: Maintained +INTEL I/OAT DMA DRIVER +P: Chris Leech +M: christopher.leech@intel.com +L: linux-kernel@vger.kernel.org +S: Supported + INTEL IXP4XX RANDOM NUMBER GENERATOR SUPPORT P: Deepak Saxena M: dsaxena@plexity.net diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 1527804..dc65773 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -312,7 +312,7 @@ void dma_async_client_chan_request(struc int dma_async_device_register(struct dma_device *device) { static int id; - int chancnt = 0; + int chancnt = 0, rc; struct dma_chan* chan; if (!device) @@ -334,8 +334,15 @@ int dma_async_device_register(struct dma snprintf(chan->class_dev.class_id, BUS_ID_SIZE, "dma%dchan%d", device->dev_id, chan->chan_id); + rc = class_device_register(&chan->class_dev); + if (rc) { + chancnt--; + free_percpu(chan->local); + chan->local = NULL; + goto err_out; + } + kref_get(&device->refcount); - class_device_register(&chan->class_dev); } mutex_lock(&dma_list_mutex); @@ -345,6 +352,17 @@ int dma_async_device_register(struct dma dma_chans_rebalance(); return 0; + +err_out: + list_for_each_entry(chan, &device->channels, device_node) { + if (chan->local == NULL) + continue; + kref_put(&device->refcount, dma_async_device_cleanup); + class_device_unregister(&chan->class_dev); + chancnt--; + free_percpu(chan->local); + } + return rc; } /** diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c index 8e87261..cbf93ca 100644 --- a/drivers/dma/ioatdma.c +++ b/drivers/dma/ioatdma.c @@ -32,7 +32,6 @@ #include #include #include #include "ioatdma.h" -#include "ioatdma_io.h" #include "ioatdma_registers.h" #include "ioatdma_hw.h" @@ -51,8 +50,8 @@ static int enumerate_dma_channels(struct int i; struct ioat_dma_chan *ioat_chan; - device->common.chancnt = ioatdma_read8(device, IOAT_CHANCNT_OFFSET); - xfercap_scale = ioatdma_read8(device, IOAT_XFERCAP_OFFSET); + device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); + xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); for (i = 0; i < device->common.chancnt; i++) { @@ -123,7 +122,7 @@ static int ioat_dma_alloc_chan_resources * In-use bit automatically set by reading chanctrl * If 0, we got it, if 1, someone else did */ - chanctrl = ioatdma_chan_read16(ioat_chan, IOAT_CHANCTRL_OFFSET); + chanctrl = readw(ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); if (chanctrl & IOAT_CHANCTRL_CHANNEL_IN_USE) return -EBUSY; @@ -132,12 +131,12 @@ static int ioat_dma_alloc_chan_resources IOAT_CHANCTRL_ERR_INT_EN | IOAT_CHANCTRL_ANY_ERR_ABORT_EN | IOAT_CHANCTRL_ERR_COMPLETION_EN; - ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl); + writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); - chanerr = ioatdma_chan_read32(ioat_chan, IOAT_CHANERR_OFFSET); + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); if (chanerr) { printk("IOAT: CHANERR = %x, clearing\n", chanerr); - ioatdma_chan_write32(ioat_chan, IOAT_CHANERR_OFFSET, chanerr); + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); } /* Allocate descriptors */ @@ -161,10 +160,10 @@ static int ioat_dma_alloc_chan_resources &ioat_chan->completion_addr); memset(ioat_chan->completion_virt, 0, sizeof(*ioat_chan->completion_virt)); - ioatdma_chan_write32(ioat_chan, IOAT_CHANCMP_OFFSET_LOW, - ((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF); - ioatdma_chan_write32(ioat_chan, IOAT_CHANCMP_OFFSET_HIGH, - ((u64) ioat_chan->completion_addr) >> 32); + writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64) ioat_chan->completion_addr) >> 32, + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); ioat_start_null_desc(ioat_chan); return i; @@ -182,7 +181,7 @@ static void ioat_dma_free_chan_resources ioat_dma_memcpy_cleanup(ioat_chan); - ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET); + writeb(IOAT_CHANCMD_RESET, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); spin_lock_bh(&ioat_chan->desc_lock); list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) { @@ -210,9 +209,9 @@ static void ioat_dma_free_chan_resources ioat_chan->last_completion = ioat_chan->completion_addr = 0; /* Tell hw the chan is free */ - chanctrl = ioatdma_chan_read16(ioat_chan, IOAT_CHANCTRL_OFFSET); + chanctrl = readw(ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); chanctrl &= ~IOAT_CHANCTRL_CHANNEL_IN_USE; - ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl); + writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); } /** @@ -310,7 +309,7 @@ static dma_cookie_t do_ioat_dma_memcpy(s list_splice_init(&new_chain, ioat_chan->used_desc.prev); ioat_chan->pending += desc_count; - if (ioat_chan->pending >= 20) { + if (ioat_chan->pending >= 4) { append = 1; ioat_chan->pending = 0; } @@ -318,9 +317,8 @@ static dma_cookie_t do_ioat_dma_memcpy(s spin_unlock_bh(&ioat_chan->desc_lock); if (append) - ioatdma_chan_write8(ioat_chan, - IOAT_CHANCMD_OFFSET, - IOAT_CHANCMD_APPEND); + writeb(IOAT_CHANCMD_APPEND, + ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); return cookie; } @@ -417,9 +415,8 @@ static void ioat_dma_memcpy_issue_pendin if (ioat_chan->pending != 0) { ioat_chan->pending = 0; - ioatdma_chan_write8(ioat_chan, - IOAT_CHANCMD_OFFSET, - IOAT_CHANCMD_APPEND); + writeb(IOAT_CHANCMD_APPEND, + ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); } } @@ -449,7 +446,7 @@ #endif if ((chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { printk("IOAT: Channel halted, chanerr = %x\n", - ioatdma_chan_read32(chan, IOAT_CHANERR_OFFSET)); + readl(chan->reg_base + IOAT_CHANERR_OFFSET)); /* TODO do something to salvage the situation */ } @@ -569,21 +566,21 @@ static irqreturn_t ioat_do_interrupt(int unsigned long attnstatus; u8 intrctrl; - intrctrl = ioatdma_read8(instance, IOAT_INTRCTRL_OFFSET); + intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) return IRQ_NONE; if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { - ioatdma_write8(instance, IOAT_INTRCTRL_OFFSET, intrctrl); + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); return IRQ_NONE; } - attnstatus = ioatdma_read32(instance, IOAT_ATTNSTATUS_OFFSET); + attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); printk(KERN_ERR "ioatdma error: interrupt! status %lx\n", attnstatus); - ioatdma_write8(instance, IOAT_INTRCTRL_OFFSET, intrctrl); + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); return IRQ_HANDLED; } @@ -611,15 +608,12 @@ static void ioat_start_null_desc(struct list_add_tail(&desc->node, &ioat_chan->used_desc); spin_unlock_bh(&ioat_chan->desc_lock); -#if (BITS_PER_LONG == 64) - ioatdma_chan_write64(ioat_chan, IOAT_CHAINADDR_OFFSET, desc->phys); -#else - ioatdma_chan_write32(ioat_chan, - IOAT_CHAINADDR_OFFSET_LOW, - (u32) desc->phys); - ioatdma_chan_write32(ioat_chan, IOAT_CHAINADDR_OFFSET_HIGH, 0); -#endif - ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_START); + writel(((u64) desc->phys) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->phys) >> 32, + ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_HIGH); + + writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); } /* @@ -748,7 +742,7 @@ #endif device->reg_base = reg_base; - ioatdma_write8(device, IOAT_INTRCTRL_OFFSET, IOAT_INTRCTRL_MASTER_INT_EN); + writeb(IOAT_INTRCTRL_MASTER_INT_EN, device->reg_base + IOAT_INTRCTRL_OFFSET); pci_set_master(pdev); INIT_LIST_HEAD(&device->common.channels); @@ -818,7 +812,7 @@ #endif } /* MODULE API */ -MODULE_VERSION("1.7"); +MODULE_VERSION("1.9"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Intel Corporation"); diff --git a/drivers/dma/ioatdma_io.h b/drivers/dma/ioatdma_io.h deleted file mode 100644 index c0b4bf6..0000000 --- a/drivers/dma/ioatdma_io.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * The full GNU General Public License is included in this distribution in the - * file called COPYING. - */ -#ifndef IOATDMA_IO_H -#define IOATDMA_IO_H - -#include - -/* - * device and per-channel MMIO register read and write functions - * this is a lot of anoying inline functions, but it's typesafe - */ - -static inline u8 ioatdma_read8(struct ioat_device *device, - unsigned int offset) -{ - return readb(device->reg_base + offset); -} - -static inline u16 ioatdma_read16(struct ioat_device *device, - unsigned int offset) -{ - return readw(device->reg_base + offset); -} - -static inline u32 ioatdma_read32(struct ioat_device *device, - unsigned int offset) -{ - return readl(device->reg_base + offset); -} - -static inline void ioatdma_write8(struct ioat_device *device, - unsigned int offset, u8 value) -{ - writeb(value, device->reg_base + offset); -} - -static inline void ioatdma_write16(struct ioat_device *device, - unsigned int offset, u16 value) -{ - writew(value, device->reg_base + offset); -} - -static inline void ioatdma_write32(struct ioat_device *device, - unsigned int offset, u32 value) -{ - writel(value, device->reg_base + offset); -} - -static inline u8 ioatdma_chan_read8(struct ioat_dma_chan *chan, - unsigned int offset) -{ - return readb(chan->reg_base + offset); -} - -static inline u16 ioatdma_chan_read16(struct ioat_dma_chan *chan, - unsigned int offset) -{ - return readw(chan->reg_base + offset); -} - -static inline u32 ioatdma_chan_read32(struct ioat_dma_chan *chan, - unsigned int offset) -{ - return readl(chan->reg_base + offset); -} - -static inline void ioatdma_chan_write8(struct ioat_dma_chan *chan, - unsigned int offset, u8 value) -{ - writeb(value, chan->reg_base + offset); -} - -static inline void ioatdma_chan_write16(struct ioat_dma_chan *chan, - unsigned int offset, u16 value) -{ - writew(value, chan->reg_base + offset); -} - -static inline void ioatdma_chan_write32(struct ioat_dma_chan *chan, - unsigned int offset, u32 value) -{ - writel(value, chan->reg_base + offset); -} - -#if (BITS_PER_LONG == 64) -static inline u64 ioatdma_chan_read64(struct ioat_dma_chan *chan, - unsigned int offset) -{ - return readq(chan->reg_base + offset); -} - -static inline void ioatdma_chan_write64(struct ioat_dma_chan *chan, - unsigned int offset, u64 value) -{ - writeq(value, chan->reg_base + offset); -} -#endif - -#endif /* IOATDMA_IO_H */ - diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 090c690..84e1625 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1110,6 +1110,8 @@ int tcp_recvmsg(struct kiocb *iocb, stru long timeo; struct task_struct *user_recv = NULL; int copied_early = 0; + int available = 0; + struct sk_buff *skb; lock_sock(sk); @@ -1136,7 +1138,11 @@ int tcp_recvmsg(struct kiocb *iocb, stru #ifdef CONFIG_NET_DMA tp->ucopy.dma_chan = NULL; preempt_disable(); - if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && + skb = skb_peek_tail(&sk->sk_receive_queue); + if (skb) + available = TCP_SKB_CB(skb)->seq + skb->len - (*seq); + if ((available < target) && + (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) { preempt_enable_no_resched(); tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); @@ -1145,7 +1151,6 @@ #ifdef CONFIG_NET_DMA #endif do { - struct sk_buff *skb; u32 offset; /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ @@ -1433,7 +1438,6 @@ skip_copy: #ifdef CONFIG_NET_DMA if (tp->ucopy.dma_chan) { - struct sk_buff *skb; dma_cookie_t done, used; dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);