GIT 0e057664d2d11d653cb223465bab18c02cd9552c git+ssh://master.kernel.org/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git#for-akpm

commit bb2f0e5fbcdcfa0e175f7fdf52a84ff026f3fbff
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Tue Aug 7 09:37:10 2007 +0200

    SPARC64: sg chaining support
    
    This updates the sparc64 iommu/pci dma mappers to sg chaining.
    
    Acked-by: David S. Miller <davem@davemloft.net>
    
    Later updated to newer kernel with unified sparc64 iommu sg handling.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 2f6ac9f61c6214998a0fd07df6729c5a2db8903c
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Mon May 14 15:44:38 2007 +0200

    SPARC: sg chaining support
    
    This updates the sparc iommu/pci dma mappers to sg chaining.
    
    Acked-by: David S. Miller <davem@davemloft.net>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit d914c767631126ef1049c23208b9056a92f7903b
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Thu Aug 16 08:50:57 2007 +0200

    PPC: sg chaining support
    
    This updates the ppc iommu/pci dma mappers to sg chaining. Includes
    further fixes from FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 735dfe3639f738929010d6bcebc8377d4eff8e5a
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Thu Jul 19 08:22:17 2007 +0200

    PS3: sg chaining support
    
    Acked-by: Geoff Levand <geoffrey.levand@am.sony.com>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 17a231d33a4482b0535c81b53139793c83477f37
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Mon May 14 13:01:35 2007 +0200

    IA64: sg chaining support
    
    This updates the ia64 iommu/pci dma mappers to sg chaining.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 575682010bb3f32daea54d97416e6ef4d8371581
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Thu May 10 11:59:55 2007 +0200

    x86-64: enable sg chaining
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 5e38912833bf58fba4bfe7efc6f3faf9019c9285
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Tue Jul 24 12:40:44 2007 +0200

    x86-64: update pci-gart iommu to sg helpers
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 8e2113a54b83dfc9a566c529a094f384cb144ad1
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Tue Jul 24 12:39:27 2007 +0200

    x86-64: update nommu to sg helpers
    
    Acked-by: Muli Ben-Yehuda <muli@il.ibm.com>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 0fefc7d540cd15639a23dfacb4e9bea5a2db6fb2
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Tue Jul 24 12:38:15 2007 +0200

    x86-64: update calgary iommu to sg helpers
    
    Acked-by: Muli Ben-Yehuda <muli@il.ibm.com>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 365a01fa028ed1c6d5fc9618c83ca175e4144190
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Fri May 11 14:56:18 2007 +0200

    swiotlb: sg chaining support
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 26b930ece23afc921cc23ccea31a5deb122886a0
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Fri May 11 12:57:30 2007 +0200

    i386: enable sg chaining
    
    We don't need to do more on x86, there's no iommu to be worried about.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 62cbb1cdd20b403819c1eab5b17ce6b2c238cf49
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Wed May 9 09:15:27 2007 +0200

    i386 dma_map_sg: convert to using sg helpers
    
    The dma mapping helpers need to be converted to using
    sg helpers as well, so they will work with a chained
    sglist setup.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 97ac480e2c145d186dff85d8dc9aa63bd35ea5e8
Author: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Date:   Fri Sep 21 08:33:55 2007 +0200

    Panic in blk_rq_map_sg() from CCISS driver
    
    New scatter/gather list chaining [sg_next()] treats 'page' member of
    struct scatterlist with low bit set [0x01] as a chain pointer to
    another struct scatterlist [array].  The CCISS driver request function
    passes an uninitialized, temporary, on-stack scatterlist array to
    blk_rq_map_sq().  sg_next() interprets random data on the stack as a
    chain pointer and eventually tries to de-reference an invalid pointer,
    resulting in:
    
    [<ffffffff8031dd70>] blk_rq_map_sg+0x70/0x170
    PGD 6090c3067 PUD 0
    Oops: 0000 [1] SMP
    last sysfs file: /block/cciss!c0d0/cciss!c0d0p1/dev
    CPU 6
    Modules linked in: ehci_hcd ohci_hcd uhci_hcd
    Pid: 1, comm: init Not tainted 2.6.23-rc6-mm1 #3
    RIP: 0010:[<ffffffff8031dd70>] [<ffffffff8031dd70>] blk_rq_map_sg+0x70/0x170
    RSP: 0018:ffff81060901f768 EFLAGS: 00010206
    RAX: 000000040b161000 RBX: ffff81060901f7d8 RCX: 000000040b162c00
    RDX: 0000000000000000 RSI: ffff81060b13a260 RDI: ffff81060b139600
    RBP: 0000000000001400 R08: 00000000fffffffe R09: 0000000000000400
    R10: 0000000000000000 R11: 000000040b163000 R12: ffff810102fe0000
    R13: 0000000000000001 R14: 0000000000000001 R15: 00001e0000000000
    FS: 00000000026108f0(0063) GS:ffff810409000b80(0000) knlGS:0000000000000000
    CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
    CR2: 000000010000001e CR3: 00000006090c6000 CR4: 00000000000006e0
    DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
    Process init (pid: 1, threadinfo ffff81060901e000, task ffff810409020800)
    last branch before last exception/interrupt
    from [<ffffffff8031de0a>] blk_rq_map_sg+0x10a/0x170
    to [<ffffffff8031dd70>] blk_rq_map_sg+0x70/0x170
    Stack: 000000018068ea00 ffff810102fe0000 0000000000000000 ffff810011400000
    0000000000000002 0000000000000000 ffff81040b172000 ffffffff803acd3d
    0000000000003ec1 ffff8106090d5000 ffff8106090d5000 ffff810102fe0000
    Call Trace:
    [<ffffffff803acd3d>] do_cciss_request+0x15d/0x4c0
    [<ffffffff80298968>] new_slab+0x1c8/0x270
    [<ffffffff80298ffd>] __slab_alloc+0x22d/0x470
    [<ffffffff8027327b>] mempool_alloc+0x4b/0x130
    [<ffffffff8032b21e>] cfq_set_request+0xee/0x380
    [<ffffffff8027327b>] mempool_alloc+0x4b/0x130
    [<ffffffff8031ff98>] get_request+0x168/0x360
    [<ffffffff80331b0d>] rb_insert_color+0x8d/0x110
    [<ffffffff8031cfd8>] elv_rb_add+0x58/0x60
    [<ffffffff8032a329>] cfq_add_rq_rb+0x69/0xa0
    [<ffffffff8031c1ab>] elv_merged_request+0x5b/0x60
    [<ffffffff803224fd>] __make_request+0x23d/0x650
    [<ffffffff80298ffd>] __slab_alloc+0x22d/0x470
    [<ffffffff80270000>] generic_write_checks+0x140/0x190
    [<ffffffff8031f012>] generic_make_request+0x1c2/0x3a0
    <etc>
    Kernel panic - not syncing: Attempted to kill init!
    
    This patch initializes the tmp_sg array to zeroes.  Perhaps not the ultimate
    fix, but an effective work-around.  I can now boot 23-rc6-mm1 on an HP
    Proliant x86_64 with CCISS boot disk.
    
    Signed-off-by:  Lee Schermerhorn <lee.schermerhorn@hp.com>
    
     drivers/block/cciss.c |    1 +
     1 file changed, 1 insertion(+)
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 3d8c9c66bbc7d35f7eb7acb73399f51ce75aadb2
Author: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date:   Tue Sep 18 12:17:28 2007 +0200

    remove sglist_len
    
    Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 1451f841f2e122499776a6643815d9f11a618f98
Author: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date:   Tue Sep 18 12:16:45 2007 +0200

    remove blk_queue_max_phys_segments in libata
    
    LIBATA_MAX_PRD is the maximum number of DMA scatter/gather elements
    permitted by the HBA's DMA engine. It's properly set to
    q->max_hw_segments via the sg_tablesize parameter.
    
    libata shouldn't call blk_queue_max_phys_segments. Now LIBATA_MAX_PRD
    is equal to SCSI_MAX_PHYS_SEGMENTS by default (both is 128), so
    everything is fine. But if they are changed, some code (like the scsi
    mid layer, sg chaining, etc) might not work properly.
    
    (Addition from Jens) The basic issue is that the physical segment
    setting is purely a driver issue. And since SCSI is managing the sglist,
    libata has no business changing the setting. All libata should care
    about is the hw segment setting.
    
    Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit f7885fac34b17b5a659dfd083684458d69f7a5ae
Author: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date:   Tue Sep 18 12:14:37 2007 +0200

    revert sg segment size ifdefs
    
    This reverts sg segment size ifdefs that the current code has in order
    to provide a way to reduce sgpool memory consumption.
    
    Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 712d99a80db62b5d6199d26b8a1532050bfdccb8
Author: Andrew Morton <akpm@linux-foundation.org>
Date:   Mon Sep 17 15:29:44 2007 +0200

    Fixup u14-34f ENABLE_SG_CHAINING
    
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit ca324199e6a102e386fe49556f7ceb0412b0a238
Author: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date:   Mon Sep 17 15:28:22 2007 +0200

    qla1280: enable use_sg_chaining option
    
    Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit ddce84fb6f83f462998099307b19256cd4ef48b5
Author: FUJITA Tomonori <tomof@acm.org>
Date:   Fri Sep 14 10:23:39 2007 +0200

    add use_sg_chaining option to scsi_host_template
    
    This option is true if a low-level driver can support sg
    chaining. This will be removed eventually when all the drivers are
    converted to support sg chaining. q->max_phys_segments is set to
    SCSI_MAX_SG_SEGMENTS if false.
    
    Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 874c8a73e8a76e111224a4c9221d0b8dd5df632b
Author: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date:   Mon Sep 10 04:17:13 2007 +0100

    qla1280: sg chaining fixes
    
    Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 89969221fda0fee0b61170fc84f071328d6eeda3
Author: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date:   Sat Aug 18 18:27:36 2007 +0900

    libata sg chaining support fix
    
    Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 9d0c4d79f7cb0ce7afcc4384807ffd0c783aef92
Author: FUJITA Tomonori <tomof@acm.org>
Date:   Mon Jul 30 23:01:32 2007 +0900

    zfcp: sg chaining support
    
    Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 3eb5b5943754ccc6a1c03c094bab61917765b2e6
Author: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date:   Mon Jul 16 15:24:14 2007 +0200

    ips: sg chaining support
    
    ips properly uses scsi_for_each_sg for the normal I/O path, however,
    the breakup path doesn't.
    
    Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 3eaa69cf5a8740f691ff53e21732f51542a574a2
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Wed Jul 25 08:13:56 2007 +0200

    IDE: sg chaining support
    
    Acked-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 7fb2ddca16880bc11e7aa536b65bfa62469ddf96
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Tue Jul 24 14:42:11 2007 +0200

    i2o: sg chaining support
    
    Acked-by: Alan Cox <alan@redhat.com>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 42bbbde956eb9d8800734e91c17cc2a6ffc851ac
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Mon Jul 16 15:30:33 2007 +0200

    Fusion: sg chaining support
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 0d0f9bf2c76b3b9e5d35adbcbdace4c53774d527
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Fri May 11 12:33:09 2007 +0200

    USB storage: sg chaining support
    
    [PATCH] USB storage: sg chaining support
    
    Modify usb_stor_access_xfer_buf() to take a pointer to an sg
    entry pointer, so we can keep track of that instead of passing
    around an integer index (which we can't use when dealing with
    multiple scatterlist arrays).
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit c0886f3f65319cc36e1a78a58f8e6208c60388e9
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Tue Jul 24 14:41:13 2007 +0200

    infiniband: sg chaining support
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 8f8cbea28d1cba1b5c483c5e414f7aed795d5219
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Mon Jul 30 16:10:30 2007 +0200

    advansys: convert to use the data buffer accessors
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 025eecdc50f5233ba5fca4e4a4eb749215439f62
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Mon Jul 16 10:00:31 2007 +0200

    aha1542: convert to use the data buffer accessors
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 134145790a17dd884ec442a91e5c0e1b0b302ecf
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Wed May 9 14:42:23 2007 +0200

    gdth: sg chaining support
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 5edd9d1572d80a00e9b1a48c34f328c033bf53ca
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Wed May 9 14:34:52 2007 +0200

    ide-scsi: sg chaining support
    
    Acked-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit e7da502aa4625563e4e2c2c641cec47fdd32dc0d
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Wed May 9 13:43:12 2007 +0200

    qlogicpti: sg chaining support
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit aaf3440b7949e46c81d2111e96bf1067315b361c
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Wed May 9 13:18:54 2007 +0200

    aic94xx: sg chaining support
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit c11e15c6a8a9ce8992bd6569ad81039bb0ca9f40
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Wed May 9 13:02:43 2007 +0200

    qla1280: sg chaining support
    
    Interesting hardware setup...
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 2d82d96b60b9dc719e1af2bd7288fd1946748e7e
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Fri May 11 15:01:01 2007 +0200

    scsi generic: sg chaining support
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 57021761fbae351dea6977984df9502edbf4cf75
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Mon Jul 16 10:19:24 2007 +0200

    scsi_debug: support sg chaining
    
    Signed-off-by: Douglas Gilbert <dougg@torque.net>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit ba0ca2d6201b73cb2879367a19777a976cec341a
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Wed May 9 09:08:18 2007 +0200

    libata: convert to using sg helpers
    
    This converts libata to using the sg helpers for looking up sg
    elements, instead of doing it manually.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit f090146f0e8f3a2c39d2e205bb2d325cbf182a2a
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Tue Aug 7 09:02:51 2007 +0200

    SCSI: support for allocating large scatterlists
    
    This is what enables large commands. If we need to allocate an
    sgtable that doesn't fit in a single page, allocate several
    SCSI_MAX_SG_SEGMENTS sized tables and chain them together.
    
    SCSI defaults to large chained sg tables, if the arch supports it.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 4909aca28c3cc956863df95a145a0ec055c54c3a
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Mon Jul 16 09:30:38 2007 +0200

    scsi: simplify scsi_free_sgtable()
    
    Just pass in the command, no point in passing in the scatterlist
    and scatterlist pool index seperately.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 612645fafba5729e7da66ba1c4529ed86606a4b8
Author: saeed bishara <saeed.bishara@gmail.com>
Date:   Wed Aug 8 13:09:00 2007 +0200

    use sg helper function in DMA mapping documentation
    
    Signed-off-by: Saeed Bishara <saeed.bishara@gmail.com>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit a15d7d4b6d8f9089e58e349a29ba4dbb18d0a4d4
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Thu May 10 13:29:51 2007 +0200

    ll_rw_blk: temporarily enable max_segments tweaking
    
    Expose this setting for now, so that users can play with enabling
    large commands without defaulting it to on globally. This is a debug
    patch, it will be dropped for the final versions.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 70fc608ebde742af003b142dfa670e517af2b970
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Mon Jul 16 21:17:16 2007 +0200

    Add chained sg support to linux/scatterlist.h
    
    The core of the patch - allow the last sg element in a scatterlist
    table to point to the start of a new table. We overload the LSB of
    the page pointer to indicate whether this is a valid sg entry, or
    merely a link to the next list.
    
    Includes a fix from Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
    correcting the ifdef ARCH_HAS_SG_CHAIN guarding sg_last().
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 21d64022d7c55e7bd4ad11a176c283c4013e3e59
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Tue Jul 24 12:31:42 2007 +0200

    scsi: convert to using sg helpers
    
    This converts the SCSI mid layer to using the sg helpers for looking up
    sg elements, instead of doing it manually.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit dbdf7cd591f4d7d1325fbec13f33523b5e984cf5
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Fri Sep 21 10:44:19 2007 +0200

    block: convert to using sg helpers
    
    Convert the main rq mapper (blk_rq_map_sg()) to the sg helper setup.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit c76648414904010cd60f9e409a1d246017ec336c
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Wed May 9 09:02:57 2007 +0200

    Add sg helpers for iterating over a scatterlist table
    
    First step to being able to change the scatterlist setup without
    having to modify drivers (a lot :-)
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit b7f7caf616dac25acb42e596ac8c2ff3876217aa
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Wed May 9 08:58:32 2007 +0200

    crypto: don't pollute the global namespace with sg_next()
    
    It's a subsystem function, prefix it as such.
    
    Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit a6ddf1165c67cd6c48dbe739e515058523b9d9ee
Author: Laurent Riffard <laurent.riffard@free.fr>
Date:   Fri Sep 21 08:32:28 2007 +0200

    pktcdvd: don't rely on bio_init() preserving bio->bi_destructor
    
    Signed-off-by: Laurent Riffard <laurent.riffard@free.fr>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 73ab6826f28df7940c3480b1c03f8575d44f2ec7
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Fri Sep 14 13:06:53 2007 +0200

    pktcdvd: don't rely on bio_init() preserving bio->bi_io_vec
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 01666d533521e2e6e39b59cd059ec72bd49c126c
Author: Adrian Bunk <bunk@kernel.org>
Date:   Fri Sep 14 10:02:06 2007 +0200

    remove ide_get_error_location()
    
    Signed-off-by: Adrian Bunk <bunk@kernel.org>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit d76f0ddcdae6aaaa22ea7fdd4eac2d440fbaf8b8
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Tue Sep 25 12:53:45 2007 +0200

    block: convert blkdev_issue_flush() to use empty barriers
    
    Then we can get rid of ->issue_flush_fn() and all the driver private
    implementations of that.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 1b43d03a35619b34c2e61465db2d3730bb8a97e2
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Wed Jul 18 14:28:03 2007 +0200

    block: Initial support for data-less (or empty) barrier support
    
    This implements functionality to pass down or insert a barrier
    in a queue, without having data attached to it. The ->prepare_flush_fn()
    infrastructure from data barriers are reused to provide this
    functionality.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit ea9c4b4aadf59344adc0472e776d2729510a4241
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Wed Jul 18 13:27:58 2007 +0200

    block: factor our bio_check_eod()
    
    End of device check is done twice in __generic_make_request() and it's
    fully inlined each time.  Factor out bio_check_eod().
    
    Signed-off-by: Tejun Heo <htejun@gmail.com>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 497ad23453f0c4b5d4fdffc075ab23f30337871a
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Fri Sep 21 10:41:07 2007 +0200

    block: add end_queued_request() and end_dequeued_request() helpers
    
    We can use this helper in the elevator core for BLKPREP_KILL, and it'll
    also be useful for the empty barrier patch.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit cf9fdd6ffb384885af914a1d42d513433b30781f
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Wed Jul 18 13:18:08 2007 +0200

    bio: make freeing of ->bi_io_vec conditional in bio_free()
    
    The empty barrier patches do not carry data, so they have no
    iovec attached.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit a9e3949c9d4171b3c50b5ad67165f1247ba647bb
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Wed Jul 18 13:14:03 2007 +0200

    bio: use memset() in bio_init()
    
    Use memset() to clear the bio, instead of doing each field manually.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 1f4acc9e759c2b76f7dc5186bbac0974b2ad4592
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Wed Jul 18 13:13:10 2007 +0200

    block: ll_rw_blk.c: cosmetics
    
    Fix ?: construct, a typo, whitespace, and similar.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 01c79cd6cbae6890aa62ea1730f464c0be21e186
Author: Adrian Bunk <bunk@kernel.org>
Date:   Fri Sep 14 10:20:07 2007 +0200

    make tcp_splice_data_recv() static
    
    Signed-off-by: Adrian Bunk <bunk@kernel.org>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit f7790173d42022cfe3489232bafe6a8f93f05535
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Thu Jul 19 11:08:32 2007 +0200

    splice: add tcp_splice_read() to IPV6
    
    Thanks to YOSHIFUJI Hideaki for the hint!
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit db7767c7f70e1811f5754c55906dbc12ad1bb5ec
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Tue Sep 25 12:41:54 2007 +0200

    TCP splice receive support
    
    Support for network splice receive.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 43b6be09c6d2a507ce2f837fac1cb8102cbe1c52
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Mon Jun 11 13:00:32 2007 +0200

    splice: don't assume regular pages in splice_to_pipe()
    
    Allow caller to pass in a release function, there might be
    other resources that need releasing as well. Needed for
    network receive.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit c1279f51a9fc4b2e47c333396a0d4aa5c6851eba
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Fri Sep 21 08:45:25 2007 +0200

    vmsplice: implement real vmsplice to userspace
    
    I'm sure this is riddled with bugs currently, it's a 3 hour hack
    but it does appear to work. It attempts to map pipe pages into
    a vma, so that it appears in the applications address space
    without having to copy data.
    
    The caller must manage the address region with mmap() and
    munmap(), vmsplice() only works on existing vmas.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit c7ff7f6eb34184c58035e9098feac31c5731c4d6
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Fri Sep 21 09:19:54 2007 +0200

    Fix warnings with !CONFIG_BLOCK
    
    Hide everything in blkdev.h with CONFIG_BLOCK isn't set, and fixup
    the (few) files that fail to build because they were relying on blkdev.h
    pulling in extra includes for them.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 435d57d4f5f5d4f3c30c44476eeadf7c1f13b11f
Author: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Date:   Thu Aug 23 10:43:07 2007 +0200

    Corrections in Documentation/block/ioprio.txt
    
    The newer glibc does not allow system calls to be made via _syscallN()
    wrapper. They have to be made through syscall(). The ionice code used
    the older interface. Correcting it to use syscall.
    
    Signed-off-by: Dhaval Giani <dhaval@linux.vnet.ibm.com>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 077420bc6dd81dcffca070e8198636d221cc9f55
Author: Satyam Sharma <satyam@infradead.org>
Date:   Thu Aug 23 09:29:40 2007 +0200

    ll_rw_blk: blk_cpu_notifier should be __cpuinitdata
    
    blk_cpu_notifier is marked as __devinitdata, but __devinitdata need not
    be __init even if HOTPLUG_CPU=n, which wastes space. It should be marked
    __cpuinitdata, and the callback itself as __cpuinit.
    
    Signed-off-by: Satyam Sharma <satyam@infradead.org>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 164e2a03e109a75ba9e8e3f9206e34205837fb4b
Author: Jens Axboe <jens.axboe@oracle.com>
Date:   Thu Aug 16 13:43:12 2007 +0200

    Fixup rq_for_each_segment() indentation
    
    Remove one level of nesting where appropriate.
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 2bb12033c8663b557d5aa49bacaa89013f1ea4d5
Author: NeilBrown <neilb@suse.de>
Date:   Thu Aug 16 13:31:30 2007 +0200

    Share code between init_request_from_bio and blk_rq_bio_prep
    
    These have very similar functions and should share code where
    possible.
    
    Signed-off-by: Neil Brown <neilb@suse.de>
    
    diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 775bb8c60a52e24fcd2a120057493ae0ef6876fe
Author: NeilBrown <neilb@suse.de>
Date:   Thu Aug 16 13:31:28 2007 +0200

    Stop exporting blk_rq_bio_prep
    
    blk_rq_bio_prep is exported for use in exactly
    one place.  That place can benefit from using
    the new blk_rq_append_bio instead.
    So
      - change dm-emc to call blk_rq_append_bio
      - stop exporting blk_rq_bio_prep, and
      - initialise rq_disk in blk_rq_bio_prep,
           as dm-emc needs it.
    
    Signed-off-by: Neil Brown <neilb@suse.de>
    
    diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 4a154a46477dc5c87c56c33d35a74e208b193d78
Author: NeilBrown <neilb@suse.de>
Date:   Thu Aug 16 13:31:27 2007 +0200

    New function blk_req_append_bio
    
    ll_back_merge_fn is currently exported to SCSI where is it used,
    together with blk_rq_bio_prep, in exactly the same way these
    functions are used in __blk_rq_map_user.
    
    So move the common code into a new function (blk_rq_append_bio), and
    don't export ll_back_merge_fn any longer.
    
    Signed-off-by: Neil Brown <neilb@suse.de>
    
    diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit 28447d5ce8624b7b518bb8a9eb7e5ec2b0e20b9a
Author: NeilBrown <neilb@suse.de>
Date:   Thu Aug 16 13:31:26 2007 +0200

    Fix various abuse of bio fields in umem.c
    
    umem.c:
      advances bi_idx and bi_sector to track where it is up to.
       But it is only ever doing this on one bio, so the updated
       fields can easily be kept elsewhere (current_*).
      updates bi_size, but never uses the updated values, so
       this isn't needed.
      reuses bi_phys_segments to count how many iovecs have been
       completely.  As the completion happens sequentiually, we
       can store this information outside the bio too.
    
    Signed-off-by: Neil Brown <neilb@suse.de>
    
    diff .prev/drivers/block/umem.c ./drivers/block/umem.c
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit b28073483666fd8018fe816ab8f8c9a5ab1e8ca5
Author: NeilBrown <neilb@suse.de>
Date:   Tue Sep 25 12:35:59 2007 +0200

    Introduce rq_for_each_segment replacing rq_for_each_bio
    
    Every usage of rq_for_each_bio wraps a usage of
    bio_for_each_segment, so these can be combined into
    rq_for_each_segment.
    
    We define "struct req_iterator" to hold the 'bio' and 'index' that
    are needed for the double iteration.
    
    Signed-off-by: Neil Brown <neilb@suse.de>
    
    Various compile fixes by me...
    
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

commit cbc21e3dd8837428a7249fbee8229d7fe4909399
Author: NeilBrown <neilb@suse.de>
Date:   Thu Aug 16 13:27:52 2007 +0200

    Merge blk_recount_segments into blk_recalc_rq_segments
    
    blk_recalc_rq_segments calls blk_recount_segments on each bio,
    then does some extra calculations to handle segments that overlap
    two bios.
    
    If we merge the code from blk_recount_segments into
    blk_recalc_rq_segments, we can process the whole request one bio_vec
    at a time, and not need the messy cross-bio calculations.
    
    Then blk_recount_segments can be implemented by calling
    blk_recalc_rq_segments, passing it a simple on-stack request which
    stores just the bio.
    
    Signed-off-by: Neil Brown <neilb@suse.de>
    
    diff .prev/block/ll_rw_blk.c ./block/ll_rw_blk.c
    Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 Documentation/DMA-mapping.txt             |    2 
 Documentation/block/biodoc.txt            |   22 
 Documentation/block/ioprio.txt            |   11 
 arch/ia64/hp/common/sba_iommu.c           |   14 
 arch/ia64/hp/sim/simscsi.c                |    1 
 arch/ia64/sn/pci/pci_dma.c                |   11 
 arch/powerpc/kernel/dma_64.c              |    5 
 arch/powerpc/kernel/ibmebus.c             |   11 
 arch/powerpc/kernel/iommu.c               |   23 
 arch/powerpc/platforms/ps3/system-bus.c   |    7 
 arch/sparc/kernel/ioport.c                |   25 
 arch/sparc/mm/io-unit.c                   |   12 
 arch/sparc/mm/iommu.c                     |   10 
 arch/sparc/mm/sun4c.c                     |   10 
 arch/sparc64/kernel/iommu.c               |   39 -
 arch/sparc64/kernel/pci_sun4v.c           |   32 -
 arch/x86_64/kernel/pci-calgary.c          |   24 
 arch/x86_64/kernel/pci-gart.c             |   63 +-
 arch/x86_64/kernel/pci-nommu.c            |    5 
 block/elevator.c                          |   17 
 block/ll_rw_blk.c                         |  524 ++++++++++++--------
 crypto/digest.c                           |    2 
 crypto/scatterwalk.c                      |    2 
 crypto/scatterwalk.h                      |    2 
 drivers/ata/libata-core.c                 |   35 -
 drivers/ata/libata-scsi.c                 |    2 
 drivers/block/cciss.c                     |    1 
 drivers/block/floppy.c                    |   81 +--
 drivers/block/lguest_blk.c                |   36 -
 drivers/block/nbd.c                       |   59 --
 drivers/block/pktcdvd.c                   |    7 
 drivers/block/ps3disk.c                   |   63 --
 drivers/block/umem.c                      |   38 -
 drivers/block/xen-blkfront.c              |   30 -
 drivers/ide/cris/ide-cris.c               |    3 
 drivers/ide/ide-disk.c                    |   29 -
 drivers/ide/ide-dma.c                     |    2 
 drivers/ide/ide-floppy.c                  |   56 --
 drivers/ide/ide-io.c                      |   38 -
 drivers/ide/ide-probe.c                   |    2 
 drivers/ide/ide-taskfile.c                |   18 
 drivers/ide/mips/au1xxx-ide.c             |    2 
 drivers/ide/pci/sgiioc4.c                 |    3 
 drivers/ide/ppc/pmac.c                    |    2 
 drivers/infiniband/hw/ipath/ipath_dma.c   |   10 
 drivers/infiniband/ulp/iser/iser_memory.c |   75 +-
 drivers/md/dm-emc.c                       |   10 
 drivers/md/dm-table.c                     |   28 -
 drivers/md/dm.c                           |   16 
 drivers/md/dm.h                           |    1 
 drivers/md/linear.c                       |   20 
 drivers/md/md.c                           |    1 
 drivers/md/multipath.c                    |   30 -
 drivers/md/raid0.c                        |   21 
 drivers/md/raid1.c                        |   31 -
 drivers/md/raid10.c                       |   31 -
 drivers/md/raid5.c                        |   31 -
 drivers/message/fusion/mptscsih.c         |    6 
 drivers/message/i2o/i2o_block.c           |   24 
 drivers/s390/block/dasd_diag.c            |   37 -
 drivers/s390/block/dasd_eckd.c            |   28 -
 drivers/s390/block/dasd_fba.c             |   28 -
 drivers/s390/char/tape_34xx.c             |   32 -
 drivers/s390/char/tape_3590.c             |   37 -
 drivers/s390/scsi/zfcp_def.h              |    1 
 drivers/s390/scsi/zfcp_qdio.c             |    6 
 drivers/scsi/3w-9xxx.c                    |    1 
 drivers/scsi/3w-xxxx.c                    |    1 
 drivers/scsi/BusLogic.c                   |    1 
 drivers/scsi/NCR53c406a.c                 |    3 
 drivers/scsi/a100u2w.c                    |    1 
 drivers/scsi/aacraid/linit.c              |    1 
 drivers/scsi/advansys.c                   |   12 
 drivers/scsi/aha1542.c                    |   32 -
 drivers/scsi/aha1740.c                    |    1 
 drivers/scsi/aic7xxx/aic79xx_osm.c        |    1 
 drivers/scsi/aic7xxx/aic7xxx_osm.c        |    1 
 drivers/scsi/aic7xxx_old.c                |    1 
 drivers/scsi/aic94xx/aic94xx_task.c       |    6 
 drivers/scsi/arcmsr/arcmsr_hba.c          |    1 
 drivers/scsi/dc395x.c                     |    1 
 drivers/scsi/dpt_i2o.c                    |    1 
 drivers/scsi/eata.c                       |    3 
 drivers/scsi/gdth.c                       |   45 -
 drivers/scsi/hptiop.c                     |    1 
 drivers/scsi/ibmmca.c                     |    1 
 drivers/scsi/ibmvscsi/ibmvscsi.c          |    1 
 drivers/scsi/ide-scsi.c                   |   19 
 drivers/scsi/initio.c                     |    1 
 drivers/scsi/ips.c                        |   14 
 drivers/scsi/lpfc/lpfc_scsi.c             |    2 
 drivers/scsi/mac53c94.c                   |    1 
 drivers/scsi/megaraid.c                   |    1 
 drivers/scsi/megaraid/megaraid_mbox.c     |    1 
 drivers/scsi/megaraid/megaraid_sas.c      |    1 
 drivers/scsi/mesh.c                       |    1 
 drivers/scsi/nsp32.c                      |    1 
 drivers/scsi/pcmcia/sym53c500_cs.c        |    1 
 drivers/scsi/qla1280.c                    |   70 +-
 drivers/scsi/qla2xxx/qla_os.c             |    2 
 drivers/scsi/qla4xxx/ql4_os.c             |    1 
 drivers/scsi/qlogicfas.c                  |    1 
 drivers/scsi/qlogicpti.c                  |   15 
 drivers/scsi/scsi_debug.c                 |   30 -
 drivers/scsi/scsi_lib.c                   |  239 ++++++---
 drivers/scsi/scsi_tgt_lib.c               |    4 
 drivers/scsi/sd.c                         |   21 
 drivers/scsi/sg.c                         |   16 
 drivers/scsi/stex.c                       |    1 
 drivers/scsi/sym53c416.c                  |    1 
 drivers/scsi/sym53c8xx_2/sym_glue.c       |    1 
 drivers/scsi/u14-34f.c                    |    3 
 drivers/scsi/ultrastor.c                  |    1 
 drivers/scsi/wd7000.c                     |    1 
 drivers/usb/storage/alauda.c              |   16 
 drivers/usb/storage/datafab.c             |   10 
 drivers/usb/storage/jumpshot.c            |   10 
 drivers/usb/storage/protocol.c            |   20 
 drivers/usb/storage/protocol.h            |    2 
 drivers/usb/storage/sddr09.c              |   16 
 drivers/usb/storage/sddr55.c              |   16 
 drivers/usb/storage/shuttle_usbat.c       |   17 
 fs/bio.c                                  |   23 
 fs/fs-writeback.c                         |    1 
 fs/splice.c                               |  340 ++++++++----
 include/asm-i386/dma-mapping.h            |   13 
 include/asm-i386/scatterlist.h            |    2 
 include/asm-ia64/dma-mapping.h            |    1 
 include/asm-ia64/scatterlist.h            |    2 
 include/asm-powerpc/dma-mapping.h         |   17 
 include/asm-powerpc/scatterlist.h         |    2 
 include/asm-sparc/scatterlist.h           |    2 
 include/asm-sparc64/scatterlist.h         |    2 
 include/asm-x86_64/dma-mapping.h          |    3 
 include/asm-x86_64/scatterlist.h          |    2 
 include/linux/bio.h                       |   19 
 include/linux/blkdev.h                    |   32 -
 include/linux/i2o.h                       |    3 
 include/linux/ide.h                       |    7 
 include/linux/libata.h                    |   16 
 include/linux/net.h                       |    3 
 include/linux/scatterlist.h               |   84 +++
 include/linux/skbuff.h                    |    6 
 include/linux/splice.h                    |    1 
 include/linux/writeback.h                 |    1 
 include/net/tcp.h                         |    3 
 include/scsi/scsi.h                       |    7 
 include/scsi/scsi_cmnd.h                  |    7 
 include/scsi/scsi_host.h                  |   13 
 kernel/sched.c                            |    1 
 lib/swiotlb.c                             |   19 
 mm/bounce.c                               |    6 
 mm/readahead.c                            |    1 
 net/core/skbuff.c                         |  246 +++++++++
 net/ipv4/af_inet.c                        |    1 
 net/ipv4/tcp.c                            |  129 ++++
 net/ipv6/af_inet6.c                       |    1 
 net/socket.c                              |   13 
 158 files changed, 2134 insertions(+), 1453 deletions(-)

diff -puN Documentation/DMA-mapping.txt~git-block Documentation/DMA-mapping.txt
--- a/Documentation/DMA-mapping.txt~git-block
+++ a/Documentation/DMA-mapping.txt
@@ -514,7 +514,7 @@ With scatterlists, you map a region gath
 	int i, count = pci_map_sg(dev, sglist, nents, direction);
 	struct scatterlist *sg;
 
-	for (i = 0, sg = sglist; i < count; i++, sg++) {
+	for_each_sg(sglist, sg, count, i) {
 		hw_address[i] = sg_dma_address(sg);
 		hw_len[i] = sg_dma_len(sg);
 	}
diff -puN Documentation/block/biodoc.txt~git-block Documentation/block/biodoc.txt
--- a/Documentation/block/biodoc.txt~git-block
+++ a/Documentation/block/biodoc.txt
@@ -477,9 +477,9 @@ With this multipage bio design:
   the same bi_io_vec array, but with the index and size accordingly modified)
 - A linked list of bios is used as before for unrelated merges (*) - this
   avoids reallocs and makes independent completions easier to handle.
-- Code that traverses the req list needs to make a distinction between
-  segments of a request (bio_for_each_segment) and the distinct completion
-  units/bios (rq_for_each_bio).
+- Code that traverses the req list can find all the segments of a bio
+  by using rq_for_each_segment.  This handles the fact that a request
+  has multiple bios, each of which can have multiple segments.
 - Drivers which can't process a large bio in one shot can use the bi_idx
   field to keep track of the next bio_vec entry to process.
   (e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE)
@@ -664,14 +664,14 @@ in lvm or md.
 
 3.2.1 Traversing segments and completion units in a request
 
-The macros bio_for_each_segment() and rq_for_each_bio() should be used for
-traversing the bios in the request list (drivers should avoid directly
-trying to do it themselves). Using these helpers should also make it easier
-to cope with block changes in the future.
-
-	rq_for_each_bio(bio, rq)
-		bio_for_each_segment(bio_vec, bio, i)
-			/* bio_vec is now current segment */
+The macro rq_for_each_segment() should be used for traversing the bios
+in the request list (drivers should avoid directly trying to do it
+themselves). Using these helpers should also make it easier to cope
+with block changes in the future.
+
+	struct req_iterator iter;
+	rq_for_each_segment(bio_vec, rq, iter)
+		/* bio_vec is now current segment */
 
 I/O completion callbacks are per-bio rather than per-segment, so drivers
 that traverse bio chains on completion need to keep that in mind. Drivers
diff -puN Documentation/block/ioprio.txt~git-block Documentation/block/ioprio.txt
--- a/Documentation/block/ioprio.txt~git-block
+++ a/Documentation/block/ioprio.txt
@@ -86,8 +86,15 @@ extern int sys_ioprio_get(int, int);
 #error "Unsupported arch"
 #endif
 
-_syscall3(int, ioprio_set, int, which, int, who, int, ioprio);
-_syscall2(int, ioprio_get, int, which, int, who);
+static inline int ioprio_set(int which, int who, int ioprio)
+{
+	return syscall(__NR_ioprio_set, which, who, ioprio);
+}
+
+static inline int ioprio_get(int which, int who)
+{
+	return syscall(__NR_ioprio_get, which, who);
+}
 
 enum {
 	IOPRIO_CLASS_NONE,
diff -puN arch/ia64/hp/common/sba_iommu.c~git-block arch/ia64/hp/common/sba_iommu.c
--- a/arch/ia64/hp/common/sba_iommu.c~git-block
+++ a/arch/ia64/hp/common/sba_iommu.c
@@ -396,7 +396,7 @@ sba_dump_sg( struct ioc *ioc, struct sca
 		printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents,
 		       startsg->dma_address, startsg->dma_length,
 		       sba_sg_address(startsg));
-		startsg++;
+		startsg = sg_next(startsg);
 	}
 }
 
@@ -409,7 +409,7 @@ sba_check_sg( struct ioc *ioc, struct sc
 	while (the_nents-- > 0) {
 		if (sba_sg_address(the_sg) == 0x0UL)
 			sba_dump_sg(NULL, startsg, nents);
-		the_sg++;
+		the_sg = sg_next(the_sg);
 	}
 }
 
@@ -1201,7 +1201,7 @@ sba_fill_pdir(
 			u32 pide = startsg->dma_address & ~PIDE_FLAG;
 			dma_offset = (unsigned long) pide & ~iovp_mask;
 			startsg->dma_address = 0;
-			dma_sg++;
+			dma_sg = sg_next(dma_sg);
 			dma_sg->dma_address = pide | ioc->ibase;
 			pdirp = &(ioc->pdir_base[pide >> iovp_shift]);
 			n_mappings++;
@@ -1228,7 +1228,7 @@ sba_fill_pdir(
 				pdirp++;
 			} while (cnt > 0);
 		}
-		startsg++;
+		startsg = sg_next(startsg);
 	}
 	/* force pdir update */
 	wmb();
@@ -1297,7 +1297,7 @@ sba_coalesce_chunks( struct ioc *ioc,
 		while (--nents > 0) {
 			unsigned long vaddr;	/* tmp */
 
-			startsg++;
+			startsg = sg_next(startsg);
 
 			/* PARANOID */
 			startsg->dma_address = startsg->dma_length = 0;
@@ -1407,7 +1407,7 @@ int sba_map_sg(struct device *dev, struc
 #ifdef ALLOW_IOV_BYPASS_SG
 	ASSERT(to_pci_dev(dev)->dma_mask);
 	if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) {
-		for (sg = sglist ; filled < nents ; filled++, sg++){
+		for_each_sg(sglist, sg, nents, filled) {
 			sg->dma_length = sg->length;
 			sg->dma_address = virt_to_phys(sba_sg_address(sg));
 		}
@@ -1501,7 +1501,7 @@ void sba_unmap_sg (struct device *dev, s
 	while (nents && sglist->dma_length) {
 
 		sba_unmap_single(dev, sglist->dma_address, sglist->dma_length, dir);
-		sglist++;
+		sglist = sg_next(sglist);
 		nents--;
 	}
 
diff -puN arch/ia64/hp/sim/simscsi.c~git-block arch/ia64/hp/sim/simscsi.c
--- a/arch/ia64/hp/sim/simscsi.c~git-block
+++ a/arch/ia64/hp/sim/simscsi.c
@@ -360,6 +360,7 @@ static struct scsi_host_template driver_
 	.max_sectors		= 1024,
 	.cmd_per_lun		= SIMSCSI_REQ_QUEUE_LEN,
 	.use_clustering		= DISABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 static int __init
diff -puN arch/ia64/sn/pci/pci_dma.c~git-block arch/ia64/sn/pci/pci_dma.c
--- a/arch/ia64/sn/pci/pci_dma.c~git-block
+++ a/arch/ia64/sn/pci/pci_dma.c
@@ -218,16 +218,17 @@ EXPORT_SYMBOL(sn_dma_unmap_single);
  *
  * Unmap a set of streaming mode DMA translations.
  */
-void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
 		     int nhwentries, int direction)
 {
 	int i;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
+	struct scatterlist *sg;
 
 	BUG_ON(dev->bus != &pci_bus_type);
 
-	for (i = 0; i < nhwentries; i++, sg++) {
+	for_each_sg(sgl, sg, nhwentries, i) {
 		provider->dma_unmap(pdev, sg->dma_address, direction);
 		sg->dma_address = (dma_addr_t) NULL;
 		sg->dma_length = 0;
@@ -244,11 +245,11 @@ EXPORT_SYMBOL(sn_dma_unmap_sg);
  *
  * Maps each entry of @sg for DMA.
  */
-int sn_dma_map_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nhwentries,
 		  int direction)
 {
 	unsigned long phys_addr;
-	struct scatterlist *saved_sg = sg;
+	struct scatterlist *saved_sg = sgl, *sg;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 	int i;
@@ -258,7 +259,7 @@ int sn_dma_map_sg(struct device *dev, st
 	/*
 	 * Setup a DMA address for each entry in the scatterlist.
 	 */
-	for (i = 0; i < nhwentries; i++, sg++) {
+	for_each_sg(sgl, sg, nhwentries, i) {
 		phys_addr = SG_ENT_PHYS_ADDRESS(sg);
 		sg->dma_address = provider->dma_map(pdev,
 						    phys_addr, sg->length,
diff -puN arch/powerpc/kernel/dma_64.c~git-block arch/powerpc/kernel/dma_64.c
--- a/arch/powerpc/kernel/dma_64.c~git-block
+++ a/arch/powerpc/kernel/dma_64.c
@@ -154,12 +154,13 @@ static void dma_direct_unmap_single(stru
 {
 }
 
-static int dma_direct_map_sg(struct device *dev, struct scatterlist *sg,
+static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
 			     int nents, enum dma_data_direction direction)
 {
+	struct scatterlist *sg;
 	int i;
 
-	for (i = 0; i < nents; i++, sg++) {
+	for_each_sg(sgl, sg, nents, i) {
 		sg->dma_address = (page_to_phys(sg->page) + sg->offset) |
 			dma_direct_offset;
 		sg->dma_length = sg->length;
diff -puN arch/powerpc/kernel/ibmebus.c~git-block arch/powerpc/kernel/ibmebus.c
--- a/arch/powerpc/kernel/ibmebus.c~git-block
+++ a/arch/powerpc/kernel/ibmebus.c
@@ -87,15 +87,16 @@ static void ibmebus_unmap_single(struct 
 }
 
 static int ibmebus_map_sg(struct device *dev,
-			  struct scatterlist *sg,
+			  struct scatterlist *sgl,
 			  int nents, enum dma_data_direction direction)
 {
+	struct scatterlist *sg;
 	int i;
 
-	for (i = 0; i < nents; i++) {
-		sg[i].dma_address = (dma_addr_t)page_address(sg[i].page)
-			+ sg[i].offset;
-		sg[i].dma_length = sg[i].length;
+	for_each_sg(sgl, sg, nents, i) {
+		sg->dma_address = (dma_addr_t)page_address(sg->page)
+			+ sg->offset;
+		sg->dma_length = sg->length;
 	}
 
 	return nents;
diff -puN arch/powerpc/kernel/iommu.c~git-block arch/powerpc/kernel/iommu.c
--- a/arch/powerpc/kernel/iommu.c~git-block
+++ a/arch/powerpc/kernel/iommu.c
@@ -277,7 +277,7 @@ int iommu_map_sg(struct iommu_table *tbl
 	dma_addr_t dma_next = 0, dma_addr;
 	unsigned long flags;
 	struct scatterlist *s, *outs, *segstart;
-	int outcount, incount;
+	int outcount, incount, i;
 	unsigned long handle;
 
 	BUG_ON(direction == DMA_NONE);
@@ -297,7 +297,7 @@ int iommu_map_sg(struct iommu_table *tbl
 
 	spin_lock_irqsave(&(tbl->it_lock), flags);
 
-	for (s = outs; nelems; nelems--, s++) {
+	for_each_sg(sglist, s, nelems, i) {
 		unsigned long vaddr, npages, entry, slen;
 
 		slen = s->length;
@@ -341,7 +341,8 @@ int iommu_map_sg(struct iommu_table *tbl
 			if (novmerge || (dma_addr != dma_next)) {
 				/* Can't merge: create a new segment */
 				segstart = s;
-				outcount++; outs++;
+				outcount++;
+				outs = sg_next(outs);
 				DBG("    can't merge, new segment.\n");
 			} else {
 				outs->dma_length += s->length;
@@ -374,7 +375,7 @@ int iommu_map_sg(struct iommu_table *tbl
 	 * next entry of the sglist if we didn't fill the list completely
 	 */
 	if (outcount < incount) {
-		outs++;
+		outs = sg_next(outs);
 		outs->dma_address = DMA_ERROR_CODE;
 		outs->dma_length = 0;
 	}
@@ -385,7 +386,7 @@ int iommu_map_sg(struct iommu_table *tbl
 	return outcount;
 
  failure:
-	for (s = &sglist[0]; s <= outs; s++) {
+	for_each_sg(sglist, s, nelems, i) {
 		if (s->dma_length != 0) {
 			unsigned long vaddr, npages;
 
@@ -395,6 +396,8 @@ int iommu_map_sg(struct iommu_table *tbl
 			s->dma_address = DMA_ERROR_CODE;
 			s->dma_length = 0;
 		}
+		if (s == outs)
+			break;
 	}
 	spin_unlock_irqrestore(&(tbl->it_lock), flags);
 	return 0;
@@ -404,6 +407,7 @@ int iommu_map_sg(struct iommu_table *tbl
 void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 		int nelems, enum dma_data_direction direction)
 {
+	struct scatterlist *sg;
 	unsigned long flags;
 
 	BUG_ON(direction == DMA_NONE);
@@ -413,15 +417,16 @@ void iommu_unmap_sg(struct iommu_table *
 
 	spin_lock_irqsave(&(tbl->it_lock), flags);
 
+	sg = sglist;
 	while (nelems--) {
 		unsigned int npages;
-		dma_addr_t dma_handle = sglist->dma_address;
+		dma_addr_t dma_handle = sg->dma_address;
 
-		if (sglist->dma_length == 0)
+		if (sg->dma_length == 0)
 			break;
-		npages = iommu_num_pages(dma_handle,sglist->dma_length);
+		npages = iommu_num_pages(dma_handle, sg->dma_length);
 		__iommu_free(tbl, dma_handle, npages);
-		sglist++;
+		sg = sg_next(sg);
 	}
 
 	/* Flush/invalidate TLBs if necessary. As for iommu_free(), we
diff -puN arch/powerpc/platforms/ps3/system-bus.c~git-block arch/powerpc/platforms/ps3/system-bus.c
--- a/arch/powerpc/platforms/ps3/system-bus.c~git-block
+++ a/arch/powerpc/platforms/ps3/system-bus.c
@@ -617,17 +617,18 @@ static void ps3_unmap_single(struct devi
 	}
 }
 
-static int ps3_sb_map_sg(struct device *_dev, struct scatterlist *sg, int nents,
-	enum dma_data_direction direction)
+static int ps3_sb_map_sg(struct device *_dev, struct scatterlist *sgl,
+	int nents, enum dma_data_direction direction)
 {
 #if defined(CONFIG_PS3_DYNAMIC_DMA)
 	BUG_ON("do");
 	return -EPERM;
 #else
 	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	struct scatterlist *sg;
 	int i;
 
-	for (i = 0; i < nents; i++, sg++) {
+	for_each_sg(sgl, sg, nents, i) {
 		int result = ps3_dma_map(dev->d_region,
 			page_to_phys(sg->page) + sg->offset, sg->length,
 					 &sg->dma_address, 0);
diff -puN arch/sparc/kernel/ioport.c~git-block arch/sparc/kernel/ioport.c
--- a/arch/sparc/kernel/ioport.c~git-block
+++ a/arch/sparc/kernel/ioport.c
@@ -35,6 +35,7 @@
 #include <linux/slab.h>
 #include <linux/pci.h>		/* struct pci_dev */
 #include <linux/proc_fs.h>
+#include <linux/scatterlist.h>
 
 #include <asm/io.h>
 #include <asm/vaddrs.h>
@@ -717,19 +718,19 @@ void pci_unmap_page(struct pci_dev *hwde
  * Device ownership issues as mentioned above for pci_map_single are
  * the same here.
  */
-int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents,
+int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents,
     int direction)
 {
+	struct scatterlist *sg;
 	int n;
 
 	BUG_ON(direction == PCI_DMA_NONE);
 	/* IIep is write-through, not flushing. */
-	for (n = 0; n < nents; n++) {
+	for_each_sg(sgl, sg, nents, n) {
 		BUG_ON(page_address(sg->page) == NULL);
 		sg->dvma_address =
 			virt_to_phys(page_address(sg->page)) + sg->offset;
 		sg->dvma_length = sg->length;
-		sg++;
 	}
 	return nents;
 }
@@ -738,19 +739,19 @@ int pci_map_sg(struct pci_dev *hwdev, st
  * Again, cpu read rules concerning calls here are the same as for
  * pci_unmap_single() above.
  */
-void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents,
+void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents,
     int direction)
 {
+	struct scatterlist *sg;
 	int n;
 
 	BUG_ON(direction == PCI_DMA_NONE);
 	if (direction != PCI_DMA_TODEVICE) {
-		for (n = 0; n < nents; n++) {
+		for_each_sg(sgl, sg, nents, n) {
 			BUG_ON(page_address(sg->page) == NULL);
 			mmu_inval_dma_area(
 			    (unsigned long) page_address(sg->page),
 			    (sg->length + PAGE_SIZE-1) & PAGE_MASK);
-			sg++;
 		}
 	}
 }
@@ -789,34 +790,34 @@ void pci_dma_sync_single_for_device(stru
  * The same as pci_dma_sync_single_* but for a scatter-gather list,
  * same rules and usage.
  */
-void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
+void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sgl, int nents, int direction)
 {
+	struct scatterlist *sg;
 	int n;
 
 	BUG_ON(direction == PCI_DMA_NONE);
 	if (direction != PCI_DMA_TODEVICE) {
-		for (n = 0; n < nents; n++) {
+		for_each_sg(sgl, sg, nents, n) {
 			BUG_ON(page_address(sg->page) == NULL);
 			mmu_inval_dma_area(
 			    (unsigned long) page_address(sg->page),
 			    (sg->length + PAGE_SIZE-1) & PAGE_MASK);
-			sg++;
 		}
 	}
 }
 
-void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
+void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sgl, int nents, int direction)
 {
+	struct scatterlist *sg;
 	int n;
 
 	BUG_ON(direction == PCI_DMA_NONE);
 	if (direction != PCI_DMA_TODEVICE) {
-		for (n = 0; n < nents; n++) {
+		for_each_sg(sgl, sg, nents, n) {
 			BUG_ON(page_address(sg->page) == NULL);
 			mmu_inval_dma_area(
 			    (unsigned long) page_address(sg->page),
 			    (sg->length + PAGE_SIZE-1) & PAGE_MASK);
-			sg++;
 		}
 	}
 }
diff -puN arch/sparc/mm/io-unit.c~git-block arch/sparc/mm/io-unit.c
--- a/arch/sparc/mm/io-unit.c~git-block
+++ a/arch/sparc/mm/io-unit.c
@@ -11,8 +11,8 @@
 #include <linux/mm.h>
 #include <linux/highmem.h>	/* pte_offset_map => kmap_atomic */
 #include <linux/bitops.h>
+#include <linux/scatterlist.h>
 
-#include <asm/scatterlist.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/sbus.h>
@@ -144,8 +144,9 @@ static void iounit_get_scsi_sgl(struct s
 	spin_lock_irqsave(&iounit->lock, flags);
 	while (sz != 0) {
 		--sz;
-		sg[sz].dvma_address = iounit_get_area(iounit, (unsigned long)page_address(sg[sz].page) + sg[sz].offset, sg[sz].length);
-		sg[sz].dvma_length = sg[sz].length;
+		sg->dvma_address = iounit_get_area(iounit, (unsigned long)page_address(sg->page) + sg->offset, sg->length);
+		sg->dvma_length = sg->length;
+		sg = sg_next(sg);
 	}
 	spin_unlock_irqrestore(&iounit->lock, flags);
 }
@@ -173,11 +174,12 @@ static void iounit_release_scsi_sgl(stru
 	spin_lock_irqsave(&iounit->lock, flags);
 	while (sz != 0) {
 		--sz;
-		len = ((sg[sz].dvma_address & ~PAGE_MASK) + sg[sz].length + (PAGE_SIZE-1)) >> PAGE_SHIFT;
-		vaddr = (sg[sz].dvma_address - IOUNIT_DMA_BASE) >> PAGE_SHIFT;
+		len = ((sg->dvma_address & ~PAGE_MASK) + sg->length + (PAGE_SIZE-1)) >> PAGE_SHIFT;
+		vaddr = (sg->dvma_address - IOUNIT_DMA_BASE) >> PAGE_SHIFT;
 		IOD(("iounit_release %08lx-%08lx\n", (long)vaddr, (long)len+vaddr));
 		for (len += vaddr; vaddr < len; vaddr++)
 			clear_bit(vaddr, iounit->bmap);
+		sg = sg_next(sg);
 	}
 	spin_unlock_irqrestore(&iounit->lock, flags);
 }
diff -puN arch/sparc/mm/iommu.c~git-block arch/sparc/mm/iommu.c
--- a/arch/sparc/mm/iommu.c~git-block
+++ a/arch/sparc/mm/iommu.c
@@ -12,8 +12,8 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>	/* pte_offset_map => kmap_atomic */
+#include <linux/scatterlist.h>
 
-#include <asm/scatterlist.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/sbus.h>
@@ -240,7 +240,7 @@ static void iommu_get_scsi_sgl_noflush(s
 		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
 		sg->dvma_address = iommu_get_one(sg->page, n, sbus) + sg->offset;
 		sg->dvma_length = (__u32) sg->length;
-		sg++;
+		sg = sg_next(sg);
 	}
 }
 
@@ -254,7 +254,7 @@ static void iommu_get_scsi_sgl_gflush(st
 		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
 		sg->dvma_address = iommu_get_one(sg->page, n, sbus) + sg->offset;
 		sg->dvma_length = (__u32) sg->length;
-		sg++;
+		sg = sg_next(sg);
 	}
 }
 
@@ -285,7 +285,7 @@ static void iommu_get_scsi_sgl_pflush(st
 
 		sg->dvma_address = iommu_get_one(sg->page, n, sbus) + sg->offset;
 		sg->dvma_length = (__u32) sg->length;
-		sg++;
+		sg = sg_next(sg);
 	}
 }
 
@@ -325,7 +325,7 @@ static void iommu_release_scsi_sgl(struc
 		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
 		iommu_release_one(sg->dvma_address & PAGE_MASK, n, sbus);
 		sg->dvma_address = 0x21212121;
-		sg++;
+		sg = sg_next(sg);
 	}
 }
 
diff -puN arch/sparc/mm/sun4c.c~git-block arch/sparc/mm/sun4c.c
--- a/arch/sparc/mm/sun4c.c~git-block
+++ a/arch/sparc/mm/sun4c.c
@@ -17,8 +17,8 @@
 #include <linux/highmem.h>
 #include <linux/fs.h>
 #include <linux/seq_file.h>
+#include <linux/scatterlist.h>
 
-#include <asm/scatterlist.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
@@ -1228,8 +1228,9 @@ static void sun4c_get_scsi_sgl(struct sc
 {
 	while (sz != 0) {
 		--sz;
-		sg[sz].dvma_address = (__u32)sun4c_lockarea(page_address(sg[sz].page) + sg[sz].offset, sg[sz].length);
-		sg[sz].dvma_length = sg[sz].length;
+		sg->dvma_address = (__u32)sun4c_lockarea(page_address(sg->page) + sg->offset, sg->length);
+		sg->dvma_length = sg->length;
+		sg = sg_next(sg);
 	}
 }
 
@@ -1244,7 +1245,8 @@ static void sun4c_release_scsi_sgl(struc
 {
 	while (sz != 0) {
 		--sz;
-		sun4c_unlockarea((char *)sg[sz].dvma_address, sg[sz].length);
+		sun4c_unlockarea((char *)sg->dvma_address, sg->length);
+		sg = sg_next(sg);
 	}
 }
 
diff -puN arch/sparc64/kernel/iommu.c~git-block arch/sparc64/kernel/iommu.c
--- a/arch/sparc64/kernel/iommu.c~git-block
+++ a/arch/sparc64/kernel/iommu.c
@@ -10,6 +10,7 @@
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
 #include <linux/errno.h>
+#include <linux/scatterlist.h>
 
 #ifdef CONFIG_PCI
 #include <linux/pci.h>
@@ -480,7 +481,7 @@ static inline void fill_sg(iopte_t *iopt
 			   unsigned long iopte_protection)
 {
 	struct scatterlist *dma_sg = sg;
-	struct scatterlist *sg_end = sg + nelems;
+	struct scatterlist *sg_end = sg_last(sg, nelems);
 	int i;
 
 	for (i = 0; i < nused; i++) {
@@ -515,7 +516,7 @@ static inline void fill_sg(iopte_t *iopt
 					len -= (IO_PAGE_SIZE - (tmp & (IO_PAGE_SIZE - 1UL)));
 					break;
 				}
-				sg++;
+				sg = sg_next(sg);
 			}
 
 			pteval = iopte_protection | (pteval & IOPTE_PAGE);
@@ -528,24 +529,24 @@ static inline void fill_sg(iopte_t *iopt
 			}
 
 			pteval = (pteval & IOPTE_PAGE) + len;
-			sg++;
+			sg = sg_next(sg);
 
 			/* Skip over any tail mappings we've fully mapped,
 			 * adjusting pteval along the way.  Stop when we
 			 * detect a page crossing event.
 			 */
-			while (sg < sg_end &&
+			while (sg != sg_end &&
 			       (pteval << (64 - IO_PAGE_SHIFT)) != 0UL &&
 			       (pteval == SG_ENT_PHYS_ADDRESS(sg)) &&
 			       ((pteval ^
 				 (SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) {
 				pteval += sg->length;
-				sg++;
+				sg = sg_next(sg);
 			}
 			if ((pteval << (64 - IO_PAGE_SHIFT)) == 0UL)
 				pteval = ~0UL;
 		} while (dma_npages != 0);
-		dma_sg++;
+		dma_sg = sg_next(dma_sg);
 	}
 }
 
@@ -606,7 +607,7 @@ static int dma_4u_map_sg(struct device *
 	sgtmp = sglist;
 	while (used && sgtmp->dma_length) {
 		sgtmp->dma_address += dma_base;
-		sgtmp++;
+		sgtmp = sg_next(sgtmp);
 		used--;
 	}
 	used = nelems - used;
@@ -642,6 +643,7 @@ static void dma_4u_unmap_sg(struct devic
 	struct strbuf *strbuf;
 	iopte_t *base;
 	unsigned long flags, ctx, i, npages;
+	struct scatterlist *sg, *sgprv;
 	u32 bus_addr;
 
 	if (unlikely(direction == DMA_NONE)) {
@@ -654,11 +656,14 @@ static void dma_4u_unmap_sg(struct devic
 
 	bus_addr = sglist->dma_address & IO_PAGE_MASK;
 
-	for (i = 1; i < nelems; i++)
-		if (sglist[i].dma_length == 0)
+	sgprv = NULL;
+	for_each_sg(sglist, sg, nelems, i) {
+		if (sg->dma_length == 0)
 			break;
-	i--;
-	npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) -
+		sgprv = sg;
+	}
+
+	npages = (IO_PAGE_ALIGN(sgprv->dma_address + sgprv->dma_length) -
 		  bus_addr) >> IO_PAGE_SHIFT;
 
 	base = iommu->page_table +
@@ -730,6 +735,7 @@ static void dma_4u_sync_sg_for_cpu(struc
 	struct iommu *iommu;
 	struct strbuf *strbuf;
 	unsigned long flags, ctx, npages, i;
+	struct scatterlist *sg, *sgprv;
 	u32 bus_addr;
 
 	iommu = dev->archdata.iommu;
@@ -753,11 +759,14 @@ static void dma_4u_sync_sg_for_cpu(struc
 
 	/* Step 2: Kick data out of streaming buffers. */
 	bus_addr = sglist[0].dma_address & IO_PAGE_MASK;
-	for(i = 1; i < nelems; i++)
-		if (!sglist[i].dma_length)
+	sgprv = NULL;
+	for_each_sg(sglist, sg, nelems, i) {
+		if (sg->dma_length == 0)
 			break;
-	i--;
-	npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length)
+		sgprv = sg;
+	}
+
+	npages = (IO_PAGE_ALIGN(sgprv->dma_address + sgprv->dma_length)
 		  - bus_addr) >> IO_PAGE_SHIFT;
 	strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
 
diff -puN arch/sparc64/kernel/pci_sun4v.c~git-block arch/sparc64/kernel/pci_sun4v.c
--- a/arch/sparc64/kernel/pci_sun4v.c~git-block
+++ a/arch/sparc64/kernel/pci_sun4v.c
@@ -13,6 +13,7 @@
 #include <linux/irq.h>
 #include <linux/msi.h>
 #include <linux/log2.h>
+#include <linux/scatterlist.h>
 
 #include <asm/iommu.h>
 #include <asm/irq.h>
@@ -373,7 +374,7 @@ static inline long fill_sg(long entry, s
 			   int nused, int nelems, unsigned long prot)
 {
 	struct scatterlist *dma_sg = sg;
-	struct scatterlist *sg_end = sg + nelems;
+	struct scatterlist *sg_end = sg_last(sg, nelems);
 	unsigned long flags;
 	int i;
 
@@ -413,7 +414,7 @@ static inline long fill_sg(long entry, s
 					len -= (IO_PAGE_SIZE - (tmp & (IO_PAGE_SIZE - 1UL)));
 					break;
 				}
-				sg++;
+				sg = sg_next(sg);
 			}
 
 			pteval = (pteval & IOPTE_PAGE);
@@ -431,24 +432,25 @@ static inline long fill_sg(long entry, s
 			}
 
 			pteval = (pteval & IOPTE_PAGE) + len;
-			sg++;
+			sg = sg_next(sg);
 
 			/* Skip over any tail mappings we've fully mapped,
 			 * adjusting pteval along the way.  Stop when we
 			 * detect a page crossing event.
 			 */
-			while (sg < sg_end &&
-			       (pteval << (64 - IO_PAGE_SHIFT)) != 0UL &&
+			while ((pteval << (64 - IO_PAGE_SHIFT)) != 0UL &&
 			       (pteval == SG_ENT_PHYS_ADDRESS(sg)) &&
 			       ((pteval ^
 				 (SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) {
 				pteval += sg->length;
-				sg++;
+				if (sg == sg_end)
+					break;
+				sg = sg_next(sg);
 			}
 			if ((pteval << (64 - IO_PAGE_SHIFT)) == 0UL)
 				pteval = ~0UL;
 		} while (dma_npages != 0);
-		dma_sg++;
+		dma_sg = sg_next(dma_sg);
 	}
 
 	if (unlikely(iommu_batch_end() < 0L))
@@ -510,7 +512,7 @@ static int dma_4v_map_sg(struct device *
 	sgtmp = sglist;
 	while (used && sgtmp->dma_length) {
 		sgtmp->dma_address += dma_base;
-		sgtmp++;
+		sgtmp = sg_next(sgtmp);
 		used--;
 	}
 	used = nelems - used;
@@ -545,6 +547,7 @@ static void dma_4v_unmap_sg(struct devic
 	struct pci_pbm_info *pbm;
 	struct iommu *iommu;
 	unsigned long flags, i, npages;
+	struct scatterlist *sg, *sgprv;
 	long entry;
 	u32 devhandle, bus_addr;
 
@@ -558,12 +561,15 @@ static void dma_4v_unmap_sg(struct devic
 	devhandle = pbm->devhandle;
 	
 	bus_addr = sglist->dma_address & IO_PAGE_MASK;
-
-	for (i = 1; i < nelems; i++)
-		if (sglist[i].dma_length == 0)
+	sgprv = NULL;
+	for_each_sg(sglist, sg, nelems, i) {
+		if (sg->dma_length == 0)
 			break;
-	i--;
-	npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) -
+
+		sgprv = sg;
+	}
+
+	npages = (IO_PAGE_ALIGN(sgprv->dma_address + sgprv->dma_length) -
 		  bus_addr) >> IO_PAGE_SHIFT;
 
 	entry = ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
diff -puN arch/x86_64/kernel/pci-calgary.c~git-block arch/x86_64/kernel/pci-calgary.c
--- a/arch/x86_64/kernel/pci-calgary.c~git-block
+++ a/arch/x86_64/kernel/pci-calgary.c
@@ -35,6 +35,7 @@
 #include <linux/pci_ids.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
+#include <linux/scatterlist.h>
 #include <asm/iommu.h>
 #include <asm/calgary.h>
 #include <asm/tce.h>
@@ -384,31 +385,32 @@ static void calgary_unmap_sg(struct devi
 	struct scatterlist *sglist, int nelems, int direction)
 {
 	struct iommu_table *tbl = find_iommu_table(dev);
+	struct scatterlist *s;
+	int i;
 
 	if (!translate_phb(to_pci_dev(dev)))
 		return;
 
-	while (nelems--) {
+	for_each_sg(sglist, s, nelems, i) {
 		unsigned int npages;
-		dma_addr_t dma = sglist->dma_address;
-		unsigned int dmalen = sglist->dma_length;
+		dma_addr_t dma = s->dma_address;
+		unsigned int dmalen = s->dma_length;
 
 		if (dmalen == 0)
 			break;
 
 		npages = num_dma_pages(dma, dmalen);
 		iommu_free(tbl, dma, npages);
-		sglist++;
 	}
 }
 
 static int calgary_nontranslate_map_sg(struct device* dev,
 	struct scatterlist *sg, int nelems, int direction)
 {
+	struct scatterlist *s;
 	int i;
 
-	for (i = 0; i < nelems; i++ ) {
-		struct scatterlist *s = &sg[i];
+	for_each_sg(sg, s, nelems, i) {
 		BUG_ON(!s->page);
 		s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
 		s->dma_length = s->length;
@@ -420,6 +422,7 @@ static int calgary_map_sg(struct device 
 	int nelems, int direction)
 {
 	struct iommu_table *tbl = find_iommu_table(dev);
+	struct scatterlist *s;
 	unsigned long vaddr;
 	unsigned int npages;
 	unsigned long entry;
@@ -428,8 +431,7 @@ static int calgary_map_sg(struct device 
 	if (!translate_phb(to_pci_dev(dev)))
 		return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
 
-	for (i = 0; i < nelems; i++ ) {
-		struct scatterlist *s = &sg[i];
+	for_each_sg(sg, s, nelems, i) {
 		BUG_ON(!s->page);
 
 		vaddr = (unsigned long)page_address(s->page) + s->offset;
@@ -454,9 +456,9 @@ static int calgary_map_sg(struct device 
 	return nelems;
 error:
 	calgary_unmap_sg(dev, sg, nelems, direction);
-	for (i = 0; i < nelems; i++) {
-		sg[i].dma_address = bad_dma_address;
-		sg[i].dma_length = 0;
+	for_each_sg(sg, s, nelems, i) {
+		sg->dma_address = bad_dma_address;
+		sg->dma_length = 0;
 	}
 	return 0;
 }
diff -puN arch/x86_64/kernel/pci-gart.c~git-block arch/x86_64/kernel/pci-gart.c
--- a/arch/x86_64/kernel/pci-gart.c~git-block
+++ a/arch/x86_64/kernel/pci-gart.c
@@ -23,6 +23,7 @@
 #include <linux/interrupt.h>
 #include <linux/bitops.h>
 #include <linux/kdebug.h>
+#include <linux/scatterlist.h>
 #include <asm/atomic.h>
 #include <asm/io.h>
 #include <asm/mtrr.h>
@@ -278,10 +279,10 @@ static void gart_unmap_single(struct dev
  */
 static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
 {
+	struct scatterlist *s;
 	int i;
 
-	for (i = 0; i < nents; i++) {
-		struct scatterlist *s = &sg[i];
+	for_each_sg(sg, s, nents, i) {
 		if (!s->dma_length || !s->length)
 			break;
 		gart_unmap_single(dev, s->dma_address, s->dma_length, dir);
@@ -292,14 +293,14 @@ static void gart_unmap_sg(struct device 
 static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
 			       int nents, int dir)
 {
+	struct scatterlist *s;
 	int i;
 
 #ifdef CONFIG_IOMMU_DEBUG
 	printk(KERN_DEBUG "dma_map_sg overflow\n");
 #endif
 
- 	for (i = 0; i < nents; i++ ) {
-		struct scatterlist *s = &sg[i];
+	for_each_sg(sg, s, nents, i) {
 		unsigned long addr = page_to_phys(s->page) + s->offset; 
 		if (nonforced_iommu(dev, addr, s->length)) { 
 			addr = dma_map_area(dev, addr, s->length, dir);
@@ -319,23 +320,23 @@ static int dma_map_sg_nonforce(struct de
 }
 
 /* Map multiple scatterlist entries continuous into the first. */
-static int __dma_map_cont(struct scatterlist *sg, int start, int stopat,
+static int __dma_map_cont(struct scatterlist *start, int nelems,
 		      struct scatterlist *sout, unsigned long pages)
 {
 	unsigned long iommu_start = alloc_iommu(pages);
 	unsigned long iommu_page = iommu_start; 
+	struct scatterlist *s;
 	int i;
 
 	if (iommu_start == -1)
 		return -1;
-	
-	for (i = start; i < stopat; i++) {
-		struct scatterlist *s = &sg[i];
+
+	for_each_sg(start, s, nelems, i) {
 		unsigned long pages, addr;
 		unsigned long phys_addr = s->dma_address;
 		
-		BUG_ON(i > start && s->offset);
-		if (i == start) {
+		BUG_ON(s != start && s->offset);
+		if (s == start) {
 			*sout = *s; 
 			sout->dma_address = iommu_bus_base;
 			sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
@@ -357,17 +358,17 @@ static int __dma_map_cont(struct scatter
 	return 0;
 }
 
-static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat,
+static inline int dma_map_cont(struct scatterlist *start, int nelems,
 		      struct scatterlist *sout,
 		      unsigned long pages, int need)
 {
-	if (!need) { 
-		BUG_ON(stopat - start != 1);
-		*sout = sg[start]; 
-		sout->dma_length = sg[start].length; 
+	if (!need) {
+		BUG_ON(nelems != 1);
+		*sout = *start;
+		sout->dma_length = start->length;
 		return 0;
-	} 
-	return __dma_map_cont(sg, start, stopat, sout, pages);
+	}
+	return __dma_map_cont(start, nelems, sout, pages);
 }
 		
 /*
@@ -381,6 +382,7 @@ int gart_map_sg(struct device *dev, stru
 	int start;
 	unsigned long pages = 0;
 	int need = 0, nextneed;
+	struct scatterlist *s, *ps, *start_sg, *sgmap;
 
 	if (nents == 0) 
 		return 0;
@@ -390,8 +392,9 @@ int gart_map_sg(struct device *dev, stru
 
 	out = 0;
 	start = 0;
-	for (i = 0; i < nents; i++) {
-		struct scatterlist *s = &sg[i];
+	start_sg = sgmap = sg;
+	ps = NULL; /* shut up gcc */
+	for_each_sg(sg, s, nents, i) {
 		dma_addr_t addr = page_to_phys(s->page) + s->offset;
 		s->dma_address = addr;
 		BUG_ON(s->length == 0); 
@@ -400,29 +403,31 @@ int gart_map_sg(struct device *dev, stru
 
 		/* Handle the previous not yet processed entries */
 		if (i > start) {
-			struct scatterlist *ps = &sg[i-1];
 			/* Can only merge when the last chunk ends on a page 
 			   boundary and the new one doesn't have an offset. */
 			if (!iommu_merge || !nextneed || !need || s->offset ||
-			    (ps->offset + ps->length) % PAGE_SIZE) { 
-				if (dma_map_cont(sg, start, i, sg+out, pages,
-						 need) < 0)
+			    (ps->offset + ps->length) % PAGE_SIZE) {
+				if (dma_map_cont(start_sg, i - start, sgmap,
+						  pages, need) < 0)
 					goto error;
 				out++;
+				sgmap = sg_next(sgmap);
 				pages = 0;
-				start = i;	
+				start = i;
+				start_sg = s;
 			}
 		}
 
 		need = nextneed;
 		pages += to_pages(s->offset, s->length);
+		ps = s;
 	}
-	if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
+	if (dma_map_cont(start_sg, i - start, sgmap, pages, need) < 0)
 		goto error;
 	out++;
 	flush_gart();
-	if (out < nents) 
-		sg[out].dma_length = 0; 
+	if (out < nents)
+		ps->dma_length = 0;
 	return out;
 
 error:
@@ -437,8 +442,8 @@ error:
 	if (panic_on_overflow)
 		panic("dma_map_sg: overflow on %lu pages\n", pages);
 	iommu_full(dev, pages << PAGE_SHIFT, dir);
-	for (i = 0; i < nents; i++)
-		sg[i].dma_address = bad_dma_address;
+	for_each_sg(sg, s, nents, i)
+		s->dma_address = bad_dma_address;
 	return 0;
 } 
 
diff -puN arch/x86_64/kernel/pci-nommu.c~git-block arch/x86_64/kernel/pci-nommu.c
--- a/arch/x86_64/kernel/pci-nommu.c~git-block
+++ a/arch/x86_64/kernel/pci-nommu.c
@@ -5,6 +5,7 @@
 #include <linux/pci.h>
 #include <linux/string.h>
 #include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
 
 #include <asm/iommu.h>
 #include <asm/processor.h>
@@ -57,10 +58,10 @@ static void nommu_unmap_single(struct de
 static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
 	       int nents, int direction)
 {
+	struct scatterlist *s;
 	int i;
 
- 	for (i = 0; i < nents; i++ ) {
-		struct scatterlist *s = &sg[i];
+	for_each_sg(sg, s, nents, i) {
 		BUG_ON(!s->page);
 		s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
 		if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
diff -puN block/elevator.c~git-block block/elevator.c
--- a/block/elevator.c~git-block
+++ a/block/elevator.c
@@ -712,6 +712,14 @@ struct request *elv_next_request(struct 
 	int ret;
 
 	while ((rq = __elv_next_request(q)) != NULL) {
+		/*
+		 * Kill the empty barrier place holder, the driver must
+		 * not ever see it.
+		 */
+		if (blk_empty_barrier(rq)) {
+			end_queued_request(rq, 1);
+			continue;
+		}
 		if (!(rq->cmd_flags & REQ_STARTED)) {
 			/*
 			 * This is the first time the device driver
@@ -751,15 +759,8 @@ struct request *elv_next_request(struct 
 			rq = NULL;
 			break;
 		} else if (ret == BLKPREP_KILL) {
-			int nr_bytes = rq->hard_nr_sectors << 9;
-
-			if (!nr_bytes)
-				nr_bytes = rq->data_len;
-
-			blkdev_dequeue_request(rq);
 			rq->cmd_flags |= REQ_QUIET;
-			end_that_request_chunk(rq, 0, nr_bytes);
-			end_that_request_last(rq, 0);
+			end_queued_request(rq, 0);
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
 								ret);
diff -puN block/ll_rw_blk.c~git-block block/ll_rw_blk.c
--- a/block/ll_rw_blk.c~git-block
+++ a/block/ll_rw_blk.c
@@ -30,6 +30,7 @@
 #include <linux/cpu.h>
 #include <linux/blktrace_api.h>
 #include <linux/fault-inject.h>
+#include <linux/scatterlist.h>
 
 /*
  * for max sense size
@@ -42,6 +43,9 @@ static void drive_stat_acct(struct reque
 static void init_request_from_bio(struct request *req, struct bio *bio);
 static int __make_request(struct request_queue *q, struct bio *bio);
 static struct io_context *current_io_context(gfp_t gfp_flags, int node);
+static void blk_recalc_rq_segments(struct request *rq);
+static void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
+			    struct bio *bio);
 
 /*
  * For the allocated request tables
@@ -301,23 +305,6 @@ int blk_queue_ordered(struct request_que
 
 EXPORT_SYMBOL(blk_queue_ordered);
 
-/**
- * blk_queue_issue_flush_fn - set function for issuing a flush
- * @q:     the request queue
- * @iff:   the function to be called issuing the flush
- *
- * Description:
- *   If a driver supports issuing a flush command, the support is notified
- *   to the block layer by defining it through this call.
- *
- **/
-void blk_queue_issue_flush_fn(struct request_queue *q, issue_flush_fn *iff)
-{
-	q->issue_flush_fn = iff;
-}
-
-EXPORT_SYMBOL(blk_queue_issue_flush_fn);
-
 /*
  * Cache flushing for ordered writes handling
  */
@@ -374,10 +361,12 @@ void blk_ordered_complete_seq(struct req
 	/*
 	 * Okay, sequence complete.
 	 */
-	rq = q->orig_bar_rq;
-	uptodate = q->orderr ? q->orderr : 1;
+	uptodate = 1;
+	if (q->orderr)
+		uptodate = q->orderr;
 
 	q->ordseq = 0;
+	rq = q->orig_bar_rq;
 
 	end_that_request_first(rq, uptodate, rq->hard_nr_sectors);
 	end_that_request_last(rq, uptodate);
@@ -443,7 +432,8 @@ static inline struct request *start_orde
 	rq_init(q, rq);
 	if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
 		rq->cmd_flags |= REQ_RW;
-	rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
+	if (q->ordered & QUEUE_ORDERED_FUA)
+		rq->cmd_flags |= REQ_FUA;
 	rq->elevator_private = NULL;
 	rq->elevator_private2 = NULL;
 	init_request_from_bio(rq, q->orig_bar_rq->bio);
@@ -453,9 +443,12 @@ static inline struct request *start_orde
 	 * Queue ordered sequence.  As we stack them at the head, we
 	 * need to queue in reverse order.  Note that we rely on that
 	 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
-	 * request gets inbetween ordered sequence.
+	 * request gets inbetween ordered sequence. If this request is
+	 * an empty barrier, we don't need to do a postflush ever since
+	 * there will be no data written between the pre and post flush.
+	 * Hence a single flush will suffice.
 	 */
-	if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
+	if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
 		queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
 	else
 		q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
@@ -479,7 +472,7 @@ static inline struct request *start_orde
 int blk_do_ordered(struct request_queue *q, struct request **rqp)
 {
 	struct request *rq = *rqp;
-	int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
+	const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
 
 	if (!q->ordseq) {
 		if (!is_barrier)
@@ -538,6 +531,8 @@ static int flush_dry_bio_endio(struct bi
 
 	if (bio->bi_size)
 		return 1;
+	if (bio_empty_barrier(bio))
+		return 0;
 
 	/* Reset bio */
 	set_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -561,8 +556,17 @@ static int ordered_bio_endio(struct requ
 	/*
 	 * Okay, this is the barrier request in progress, dry finish it.
 	 */
-	if (error && !q->orderr)
-		q->orderr = error;
+	if (error) {
+		if (!q->orderr)
+			q->orderr = error;
+
+		/*
+		 * Let the original request inherit the error location, if
+		 * this is an empty barrier request.
+		 */
+		if (bio_empty_barrier(bio))
+			q->orig_bar_rq->sector = bio->bi_sector;
+	}
 
 	endio = bio->bi_end_io;
 	private = bio->bi_private;
@@ -1220,16 +1224,40 @@ EXPORT_SYMBOL(blk_dump_rq_flags);
 
 void blk_recount_segments(struct request_queue *q, struct bio *bio)
 {
+	struct request rq;
+	struct bio *nxt = bio->bi_next;
+	rq.q = q;
+	rq.bio = rq.biotail = bio;
+	bio->bi_next = NULL;
+	blk_recalc_rq_segments(&rq);
+	bio->bi_next = nxt;
+	bio->bi_phys_segments = rq.nr_phys_segments;
+	bio->bi_hw_segments = rq.nr_hw_segments;
+	bio->bi_flags |= (1 << BIO_SEG_VALID);
+}
+EXPORT_SYMBOL(blk_recount_segments);
+
+static void blk_recalc_rq_segments(struct request *rq)
+{
+	int nr_phys_segs;
+	int nr_hw_segs;
+	unsigned int phys_size;
+	unsigned int hw_size;
 	struct bio_vec *bv, *bvprv = NULL;
-	int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster;
+	int seg_size;
+	int hw_seg_size;
+	int cluster;
+	struct req_iterator iter;
 	int high, highprv = 1;
+	struct request_queue *q = rq->q;
 
-	if (unlikely(!bio->bi_io_vec))
+	if (!rq->bio)
 		return;
 
 	cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
-	hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0;
-	bio_for_each_segment(bv, bio, i) {
+	hw_seg_size = seg_size = 0;
+	phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
+	rq_for_each_segment(bv, rq, iter) {
 		/*
 		 * the trick here is making sure that a high page is never
 		 * considered part of another segment, since that might
@@ -1255,12 +1283,13 @@ void blk_recount_segments(struct request
 		}
 new_segment:
 		if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
-		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
+		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
 			hw_seg_size += bv->bv_len;
-		} else {
+		else {
 new_hw_segment:
-			if (hw_seg_size > bio->bi_hw_front_size)
-				bio->bi_hw_front_size = hw_seg_size;
+			if (nr_hw_segs == 1 &&
+			    hw_seg_size > rq->bio->bi_hw_front_size)
+				rq->bio->bi_hw_front_size = hw_seg_size;
 			hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
 			nr_hw_segs++;
 		}
@@ -1270,15 +1299,15 @@ new_hw_segment:
 		seg_size = bv->bv_len;
 		highprv = high;
 	}
-	if (hw_seg_size > bio->bi_hw_back_size)
-		bio->bi_hw_back_size = hw_seg_size;
-	if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size)
-		bio->bi_hw_front_size = hw_seg_size;
-	bio->bi_phys_segments = nr_phys_segs;
-	bio->bi_hw_segments = nr_hw_segs;
-	bio->bi_flags |= (1 << BIO_SEG_VALID);
+
+	if (nr_hw_segs == 1 &&
+	    hw_seg_size > rq->bio->bi_hw_front_size)
+		rq->bio->bi_hw_front_size = hw_seg_size;
+	if (hw_seg_size > rq->biotail->bi_hw_back_size)
+		rq->biotail->bi_hw_back_size = hw_seg_size;
+	rq->nr_phys_segments = nr_phys_segs;
+	rq->nr_hw_segments = nr_hw_segs;
 }
-EXPORT_SYMBOL(blk_recount_segments);
 
 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
 				   struct bio *nxt)
@@ -1322,11 +1351,12 @@ static int blk_hw_contig_segment(struct 
  * must make sure sg can hold rq->nr_phys_segments entries
  */
 int blk_rq_map_sg(struct request_queue *q, struct request *rq,
-		  struct scatterlist *sg)
+		  struct scatterlist *sglist)
 {
 	struct bio_vec *bvec, *bvprv;
-	struct bio *bio;
-	int nsegs, i, cluster;
+	struct scatterlist *next_sg, *sg;
+	struct req_iterator iter;
+	int nsegs, cluster;
 
 	nsegs = 0;
 	cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
@@ -1335,35 +1365,32 @@ int blk_rq_map_sg(struct request_queue *
 	 * for each bio in rq
 	 */
 	bvprv = NULL;
-	rq_for_each_bio(bio, rq) {
-		/*
-		 * for each segment in bio
-		 */
-		bio_for_each_segment(bvec, bio, i) {
-			int nbytes = bvec->bv_len;
+	sg = next_sg = &sglist[0];
+	rq_for_each_segment(bvec, rq, iter) {
+		int nbytes = bvec->bv_len;
 
-			if (bvprv && cluster) {
-				if (sg[nsegs - 1].length + nbytes > q->max_segment_size)
-					goto new_segment;
-
-				if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
-					goto new_segment;
-				if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
-					goto new_segment;
+		if (bvprv && cluster) {
+			if (sg->length + nbytes > q->max_segment_size)
+				goto new_segment;
 
-				sg[nsegs - 1].length += nbytes;
-			} else {
+			if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
+				goto new_segment;
+			if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
+				goto new_segment;
+
+			sg->length += nbytes;
+		} else {
 new_segment:
-				memset(&sg[nsegs],0,sizeof(struct scatterlist));
-				sg[nsegs].page = bvec->bv_page;
-				sg[nsegs].length = nbytes;
-				sg[nsegs].offset = bvec->bv_offset;
+			sg = next_sg;
+			next_sg = sg_next(sg);
 
-				nsegs++;
-			}
-			bvprv = bvec;
-		} /* segments in bio */
-	} /* bios in rq */
+			sg->page = bvec->bv_page;
+			sg->length = nbytes;
+			sg->offset = bvec->bv_offset;
+			nsegs++;
+		}
+		bvprv = bvec;
+	} /* segments in rq */
 
 	return nsegs;
 }
@@ -1420,7 +1447,8 @@ static inline int ll_new_hw_segment(stru
 	return 1;
 }
 
-int ll_back_merge_fn(struct request_queue *q, struct request *req, struct bio *bio)
+static int ll_back_merge_fn(struct request_queue *q, struct request *req,
+			    struct bio *bio)
 {
 	unsigned short max_sectors;
 	int len;
@@ -1456,7 +1484,6 @@ int ll_back_merge_fn(struct request_queu
 
 	return ll_new_hw_segment(q, req, bio);
 }
-EXPORT_SYMBOL(ll_back_merge_fn);
 
 static int ll_front_merge_fn(struct request_queue *q, struct request *req, 
 			     struct bio *bio)
@@ -2346,6 +2373,23 @@ static int __blk_rq_unmap_user(struct bi
 	return ret;
 }
 
+int blk_rq_append_bio(struct request_queue *q, struct request *rq,
+		      struct bio *bio)
+{
+	if (!rq->bio)
+		blk_rq_bio_prep(q, rq, bio);
+	else if (!ll_back_merge_fn(q, rq, bio))
+		return -EINVAL;
+	else {
+		rq->biotail->bi_next = bio;
+		rq->biotail = bio;
+
+		rq->data_len += bio->bi_size;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(blk_rq_append_bio);
+
 static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
 			     void __user *ubuf, unsigned int len)
 {
@@ -2377,21 +2421,10 @@ static int __blk_rq_map_user(struct requ
 	 */
 	bio_get(bio);
 
-	if (!rq->bio)
-		blk_rq_bio_prep(q, rq, bio);
-	else if (!ll_back_merge_fn(q, rq, bio)) {
-		ret = -EINVAL;
-		goto unmap_bio;
-	} else {
-		rq->biotail->bi_next = bio;
-		rq->biotail = bio;
+	ret = blk_rq_append_bio(q, rq, bio);
+	if (!ret)
+		return bio->bi_size;
 
-		rq->data_len += bio->bi_size;
-	}
-
-	return bio->bi_size;
-
-unmap_bio:
 	/* if it was boucned we must call the end io function */
 	bio_endio(bio, bio->bi_size, 0);
 	__blk_rq_unmap_user(orig_bio);
@@ -2652,6 +2685,16 @@ int blk_execute_rq(struct request_queue 
 
 EXPORT_SYMBOL(blk_execute_rq);
 
+static int bio_end_empty_barrier(struct bio *bio, unsigned int bytes_done,
+				 int err)
+{
+	if (err)
+		clear_bit(BIO_UPTODATE, &bio->bi_flags);
+
+	complete(bio->bi_private);
+	return 0;
+}
+
 /**
  * blkdev_issue_flush - queue a flush
  * @bdev:	blockdev to issue flush for
@@ -2664,7 +2707,10 @@ EXPORT_SYMBOL(blk_execute_rq);
  */
 int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 {
+	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q;
+	struct bio *bio;
+	int ret;
 
 	if (bdev->bd_disk == NULL)
 		return -ENXIO;
@@ -2672,10 +2718,32 @@ int blkdev_issue_flush(struct block_devi
 	q = bdev_get_queue(bdev);
 	if (!q)
 		return -ENXIO;
-	if (!q->issue_flush_fn)
-		return -EOPNOTSUPP;
 
-	return q->issue_flush_fn(q, bdev->bd_disk, error_sector);
+	bio = bio_alloc(GFP_KERNEL, 0);
+	if (!bio)
+		return -ENOMEM;
+
+	bio->bi_end_io = bio_end_empty_barrier;
+	bio->bi_private = &wait;
+	bio->bi_bdev = bdev;
+	submit_bio(1 << BIO_RW_BARRIER, bio);
+
+	wait_for_completion(&wait);
+
+	/*
+	 * The driver must store the error location in ->bi_sector, if
+	 * it supports it. For non-stacked drivers, this should be copied
+	 * from rq->sector.
+	 */
+	if (error_sector)
+		*error_sector = bio->bi_sector;
+
+	ret = 0;
+	if (!bio_flagged(bio, BIO_UPTODATE))
+		ret = -EIO;
+
+	bio_put(bio);
+	return ret;
 }
 
 EXPORT_SYMBOL(blkdev_issue_flush);
@@ -2912,15 +2980,9 @@ static void init_request_from_bio(struct
 
 	req->errors = 0;
 	req->hard_sector = req->sector = bio->bi_sector;
-	req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio);
-	req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio);
-	req->nr_phys_segments = bio_phys_segments(req->q, bio);
-	req->nr_hw_segments = bio_hw_segments(req->q, bio);
-	req->buffer = bio_data(bio);	/* see ->buffer comment above */
-	req->bio = req->biotail = bio;
 	req->ioprio = bio_prio(bio);
-	req->rq_disk = bio->bi_bdev->bd_disk;
 	req->start_time = jiffies;
+	blk_rq_bio_prep(req->q, req, bio);
 }
 
 static int __make_request(struct request_queue *q, struct bio *bio)
@@ -3049,7 +3111,7 @@ static inline void blk_partition_remap(s
 {
 	struct block_device *bdev = bio->bi_bdev;
 
-	if (bdev != bdev->bd_contains) {
+	if (bio_sectors(bio) && bdev != bdev->bd_contains) {
 		struct hd_struct *p = bdev->bd_part;
 		const int rw = bio_data_dir(bio);
 
@@ -3115,6 +3177,35 @@ static inline int should_fail_request(st
 
 #endif /* CONFIG_FAIL_MAKE_REQUEST */
 
+/*
+ * Check whether this bio extends beyond the end of the device.
+ */
+static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
+{
+	sector_t maxsector;
+
+	if (!nr_sectors)
+		return 0;
+
+	/* Test device or partition size, when known. */
+	maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
+	if (maxsector) {
+		sector_t sector = bio->bi_sector;
+
+		if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
+			/*
+			 * This may well happen - the kernel calls bread()
+			 * without checking the size of the device, e.g., when
+			 * mounting a device.
+			 */
+			handle_bad_sector(bio);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
 /**
  * generic_make_request: hand a buffer to its device driver for I/O
  * @bio:  The bio describing the location in memory and on the device.
@@ -3142,27 +3233,14 @@ static inline int should_fail_request(st
 static inline void __generic_make_request(struct bio *bio)
 {
 	struct request_queue *q;
-	sector_t maxsector;
 	sector_t old_sector;
 	int ret, nr_sectors = bio_sectors(bio);
 	dev_t old_dev;
 
 	might_sleep();
-	/* Test device or partition size, when known. */
-	maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
-	if (maxsector) {
-		sector_t sector = bio->bi_sector;
 
-		if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
-			/*
-			 * This may well happen - the kernel calls bread()
-			 * without checking the size of the device, e.g., when
-			 * mounting a device.
-			 */
-			handle_bad_sector(bio);
-			goto end_io;
-		}
-	}
+	if (bio_check_eod(bio, nr_sectors))
+		goto end_io;
 
 	/*
 	 * Resolve the mapping until finished. (drivers are
@@ -3189,7 +3267,7 @@ end_io:
 			break;
 		}
 
-		if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) {
+		if (unlikely(nr_sectors > q->max_hw_sectors)) {
 			printk("bio too big device %s (%u > %u)\n", 
 				bdevname(bio->bi_bdev, b),
 				bio_sectors(bio),
@@ -3210,7 +3288,7 @@ end_io:
 		blk_partition_remap(bio);
 
 		if (old_sector != -1)
-			blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, 
+			blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
 					    old_sector);
 
 		blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
@@ -3218,21 +3296,8 @@ end_io:
 		old_sector = bio->bi_sector;
 		old_dev = bio->bi_bdev->bd_dev;
 
-		maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
-		if (maxsector) {
-			sector_t sector = bio->bi_sector;
-
-			if (maxsector < nr_sectors ||
-					maxsector - nr_sectors < sector) {
-				/*
-				 * This may well happen - partitions are not
-				 * checked to make sure they are within the size
-				 * of the whole device.
-				 */
-				handle_bad_sector(bio);
-				goto end_io;
-			}
-		}
+		if (bio_check_eod(bio, nr_sectors))
+			goto end_io;
 
 		ret = q->make_request_fn(q, bio);
 	} while (ret);
@@ -3305,72 +3370,39 @@ void submit_bio(int rw, struct bio *bio)
 {
 	int count = bio_sectors(bio);
 
-	BIO_BUG_ON(!bio->bi_size);
-	BIO_BUG_ON(!bio->bi_io_vec);
 	bio->bi_rw |= rw;
-	if (rw & WRITE) {
-		count_vm_events(PGPGOUT, count);
-	} else {
-		task_io_account_read(bio->bi_size);
-		count_vm_events(PGPGIN, count);
-	}
-
-	if (unlikely(block_dump)) {
-		char b[BDEVNAME_SIZE];
-		printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
-			current->comm, current->pid,
-			(rw & WRITE) ? "WRITE" : "READ",
-			(unsigned long long)bio->bi_sector,
-			bdevname(bio->bi_bdev,b));
-	}
 
-	generic_make_request(bio);
-}
+	/*
+	 * If it's a regular read/write or a barrier with data attached,
+	 * go through the normal accounting stuff before submission.
+	 */
+	if (!bio_empty_barrier(bio)) {
 
-EXPORT_SYMBOL(submit_bio);
+		BIO_BUG_ON(!bio->bi_size);
+		BIO_BUG_ON(!bio->bi_io_vec);
 
-static void blk_recalc_rq_segments(struct request *rq)
-{
-	struct bio *bio, *prevbio = NULL;
-	int nr_phys_segs, nr_hw_segs;
-	unsigned int phys_size, hw_size;
-	struct request_queue *q = rq->q;
-
-	if (!rq->bio)
-		return;
+		if (rw & WRITE) {
+			count_vm_events(PGPGOUT, count);
+		} else {
+			task_io_account_read(bio->bi_size);
+			count_vm_events(PGPGIN, count);
+		}
 
-	phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
-	rq_for_each_bio(bio, rq) {
-		/* Force bio hw/phys segs to be recalculated. */
-		bio->bi_flags &= ~(1 << BIO_SEG_VALID);
-
-		nr_phys_segs += bio_phys_segments(q, bio);
-		nr_hw_segs += bio_hw_segments(q, bio);
-		if (prevbio) {
-			int pseg = phys_size + prevbio->bi_size + bio->bi_size;
-			int hseg = hw_size + prevbio->bi_size + bio->bi_size;
-
-			if (blk_phys_contig_segment(q, prevbio, bio) &&
-			    pseg <= q->max_segment_size) {
-				nr_phys_segs--;
-				phys_size += prevbio->bi_size + bio->bi_size;
-			} else
-				phys_size = 0;
-
-			if (blk_hw_contig_segment(q, prevbio, bio) &&
-			    hseg <= q->max_segment_size) {
-				nr_hw_segs--;
-				hw_size += prevbio->bi_size + bio->bi_size;
-			} else
-				hw_size = 0;
+		if (unlikely(block_dump)) {
+			char b[BDEVNAME_SIZE];
+			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
+				current->comm, current->pid,
+				(rw & WRITE) ? "WRITE" : "READ",
+				(unsigned long long)bio->bi_sector,
+				bdevname(bio->bi_bdev,b));
 		}
-		prevbio = bio;
 	}
 
-	rq->nr_phys_segments = nr_phys_segs;
-	rq->nr_hw_segments = nr_hw_segs;
+	generic_make_request(bio);
 }
 
+EXPORT_SYMBOL(submit_bio);
+
 static void blk_recalc_rq_sectors(struct request *rq, int nsect)
 {
 	if (blk_fs_request(rq)) {
@@ -3439,6 +3471,14 @@ static int __end_that_request_first(stru
 	while ((bio = req->bio) != NULL) {
 		int nbytes;
 
+		/*
+		 * For an empty barrier request, the low level driver must
+		 * store a potential error location in ->sector. We pass
+		 * that back up in ->bi_sector.
+		 */
+		if (blk_empty_barrier(req))
+			bio->bi_sector = req->sector;
+
 		if (nr_bytes >= bio->bi_size) {
 			req->bio = bio->bi_next;
 			nbytes = bio->bi_size;
@@ -3574,7 +3614,7 @@ static void blk_done_softirq(struct soft
 	}
 }
 
-static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
+static int __cpuinit blk_cpu_notify(struct notifier_block *self, unsigned long action,
 			  void *hcpu)
 {
 	/*
@@ -3595,7 +3635,7 @@ static int blk_cpu_notify(struct notifie
 }
 
 
-static struct notifier_block __devinitdata blk_cpu_notifier = {
+static struct notifier_block blk_cpu_notifier __cpuinitdata = {
 	.notifier_call	= blk_cpu_notify,
 };
 
@@ -3606,7 +3646,7 @@ static struct notifier_block __devinitda
  * Description:
  *     Ends all I/O on a request. It does not handle partial completions,
  *     unless the driver actually implements this in its completion callback
- *     through requeueing. Theh actual completion happens out-of-order,
+ *     through requeueing. The actual completion happens out-of-order,
  *     through a softirq handler. The user must have registered a completion
  *     callback through blk_queue_softirq_done().
  **/
@@ -3669,19 +3709,87 @@ void end_that_request_last(struct reques
 
 EXPORT_SYMBOL(end_that_request_last);
 
-void end_request(struct request *req, int uptodate)
+static inline void __end_request(struct request *rq, int uptodate,
+				 unsigned int nr_bytes, int dequeue)
 {
-	if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) {
-		add_disk_randomness(req->rq_disk);
-		blkdev_dequeue_request(req);
-		end_that_request_last(req, uptodate);
+	if (!end_that_request_chunk(rq, uptodate, nr_bytes)) {
+		if (dequeue)
+			blkdev_dequeue_request(rq);
+		add_disk_randomness(rq->rq_disk);
+		end_that_request_last(rq, uptodate);
 	}
 }
 
+static unsigned int rq_byte_size(struct request *rq)
+{
+	if (blk_fs_request(rq))
+		return rq->hard_nr_sectors << 9;
+
+	return rq->data_len;
+}
+
+/**
+ * end_queued_request - end all I/O on a queued request
+ * @rq:		the request being processed
+ * @uptodate:	error value or 0/1 uptodate flag
+ *
+ * Description:
+ *     Ends all I/O on a request, and removes it from the block layer queues.
+ *     Not suitable for normal IO completion, unless the driver still has
+ *     the request attached to the block layer.
+ *
+ **/
+void end_queued_request(struct request *rq, int uptodate)
+{
+	__end_request(rq, uptodate, rq_byte_size(rq), 1);
+}
+EXPORT_SYMBOL(end_queued_request);
+
+/**
+ * end_dequeued_request - end all I/O on a dequeued request
+ * @rq:		the request being processed
+ * @uptodate:	error value or 0/1 uptodate flag
+ *
+ * Description:
+ *     Ends all I/O on a request. The request must already have been
+ *     dequeued using blkdev_dequeue_request(), as is normally the case
+ *     for most drivers.
+ *
+ **/
+void end_dequeued_request(struct request *rq, int uptodate)
+{
+	__end_request(rq, uptodate, rq_byte_size(rq), 0);
+}
+EXPORT_SYMBOL(end_dequeued_request);
+
+
+/**
+ * end_request - end I/O on the current segment of the request
+ * @rq:		the request being processed
+ * @uptodate:	error value or 0/1 uptodate flag
+ *
+ * Description:
+ *     Ends I/O on the current segment of a request. If that is the only
+ *     remaining segment, the request is also completed and freed.
+ *
+ *     This is a remnant of how older block drivers handled IO completions.
+ *     Modern drivers typically end IO on the full request in one go, unless
+ *     they have a residual value to account for. For that case this function
+ *     isn't really useful, unless the residual just happens to be the
+ *     full current segment. In other words, don't use this function in new
+ *     code. Either use end_request_completely(), or the
+ *     end_that_request_chunk() (along with end_that_request_last()) for
+ *     partial completions.
+ *
+ **/
+void end_request(struct request *req, int uptodate)
+{
+	__end_request(req, uptodate, req->hard_cur_sectors << 9, 1);
+}
 EXPORT_SYMBOL(end_request);
 
-void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
-		     struct bio *bio)
+static void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
+			    struct bio *bio)
 {
 	/* first two bits are identical in rq->cmd_flags and bio->bi_rw */
 	rq->cmd_flags |= (bio->bi_rw & 3);
@@ -3695,9 +3803,10 @@ void blk_rq_bio_prep(struct request_queu
 	rq->data_len = bio->bi_size;
 
 	rq->bio = rq->biotail = bio;
-}
 
-EXPORT_SYMBOL(blk_rq_bio_prep);
+	if (bio->bi_bdev)
+		rq->rq_disk = bio->bi_bdev->bd_disk;
+}
 
 int kblockd_schedule_work(struct work_struct *work)
 {
@@ -3999,7 +4108,23 @@ static ssize_t queue_max_hw_sectors_show
 	return queue_var_show(max_hw_sectors_kb, (page));
 }
 
+static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(q->max_phys_segments, page);
+}
+
+static ssize_t queue_max_segments_store(struct request_queue *q,
+					const char *page, size_t count)
+{
+	unsigned long segments;
+	ssize_t ret = queue_var_store(&segments, page, count);
+
+	spin_lock_irq(q->queue_lock);
+	q->max_phys_segments = segments;
+	spin_unlock_irq(q->queue_lock);
 
+	return ret;
+}
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_requests_show,
@@ -4023,6 +4148,12 @@ static struct queue_sysfs_entry queue_ma
 	.show = queue_max_hw_sectors_show,
 };
 
+static struct queue_sysfs_entry queue_max_segments_entry = {
+	.attr = {.name = "max_segments", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_max_segments_show,
+	.store = queue_max_segments_store,
+};
+
 static struct queue_sysfs_entry queue_iosched_entry = {
 	.attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
 	.show = elv_iosched_show,
@@ -4034,6 +4165,7 @@ static struct attribute *default_attrs[]
 	&queue_ra_entry.attr,
 	&queue_max_hw_sectors_entry.attr,
 	&queue_max_sectors_entry.attr,
+	&queue_max_segments_entry.attr,
 	&queue_iosched_entry.attr,
 	NULL,
 };
diff -puN crypto/digest.c~git-block crypto/digest.c
--- a/crypto/digest.c~git-block
+++ a/crypto/digest.c
@@ -77,7 +77,7 @@ static int update2(struct hash_desc *des
 
 		if (!nbytes)
 			break;
-		sg = sg_next(sg);
+		sg = scatterwalk_sg_next(sg);
 	}
 
 	return 0;
diff -puN crypto/scatterwalk.c~git-block crypto/scatterwalk.c
--- a/crypto/scatterwalk.c~git-block
+++ a/crypto/scatterwalk.c
@@ -70,7 +70,7 @@ static void scatterwalk_pagedone(struct 
 		walk->offset += PAGE_SIZE - 1;
 		walk->offset &= PAGE_MASK;
 		if (walk->offset >= walk->sg->offset + walk->sg->length)
-			scatterwalk_start(walk, sg_next(walk->sg));
+			scatterwalk_start(walk, scatterwalk_sg_next(walk->sg));
 	}
 }
 
diff -puN crypto/scatterwalk.h~git-block crypto/scatterwalk.h
--- a/crypto/scatterwalk.h~git-block
+++ a/crypto/scatterwalk.h
@@ -20,7 +20,7 @@
 
 #include "internal.h"
 
-static inline struct scatterlist *sg_next(struct scatterlist *sg)
+static inline struct scatterlist *scatterwalk_sg_next(struct scatterlist *sg)
 {
 	return (++sg)->length ? sg : (void *)sg->page;
 }
diff -puN drivers/ata/libata-core.c~git-block drivers/ata/libata-core.c
--- a/drivers/ata/libata-core.c~git-block
+++ a/drivers/ata/libata-core.c
@@ -1389,7 +1389,7 @@ static void ata_qc_complete_internal(str
  */
 unsigned ata_exec_internal_sg(struct ata_device *dev,
 			      struct ata_taskfile *tf, const u8 *cdb,
-			      int dma_dir, struct scatterlist *sg,
+			      int dma_dir, struct scatterlist *sgl,
 			      unsigned int n_elem)
 {
 	struct ata_link *link = dev->link;
@@ -1451,11 +1451,12 @@ unsigned ata_exec_internal_sg(struct ata
 	qc->dma_dir = dma_dir;
 	if (dma_dir != DMA_NONE) {
 		unsigned int i, buflen = 0;
+		struct scatterlist *sg;
 
-		for (i = 0; i < n_elem; i++)
-			buflen += sg[i].length;
+		for_each_sg(sgl, sg, n_elem, i)
+			buflen += sg->length;
 
-		ata_sg_init(qc, sg, n_elem);
+		ata_sg_init(qc, sgl, n_elem);
 		qc->nbytes = buflen;
 	}
 
@@ -4272,7 +4273,7 @@ void ata_sg_clean(struct ata_queued_cmd 
 		if (qc->n_elem)
 			dma_unmap_sg(ap->dev, sg, qc->n_elem, dir);
 		/* restore last sg */
-		sg[qc->orig_n_elem - 1].length += qc->pad_len;
+		sg_last(sg, qc->orig_n_elem)->length += qc->pad_len;
 		if (pad_buf) {
 			struct scatterlist *psg = &qc->pad_sgent;
 			void *addr = kmap_atomic(psg->page, KM_IRQ0);
@@ -4527,6 +4528,7 @@ void ata_sg_init_one(struct ata_queued_c
 	qc->orig_n_elem = 1;
 	qc->buf_virt = buf;
 	qc->nbytes = buflen;
+	qc->cursg = qc->__sg;
 
 	sg_init_one(&qc->sgent, buf, buflen);
 }
@@ -4552,6 +4554,7 @@ void ata_sg_init(struct ata_queued_cmd *
 	qc->__sg = sg;
 	qc->n_elem = n_elem;
 	qc->orig_n_elem = n_elem;
+	qc->cursg = qc->__sg;
 }
 
 /**
@@ -4641,7 +4644,7 @@ static int ata_sg_setup(struct ata_queue
 {
 	struct ata_port *ap = qc->ap;
 	struct scatterlist *sg = qc->__sg;
-	struct scatterlist *lsg = &sg[qc->n_elem - 1];
+	struct scatterlist *lsg = sg_last(qc->__sg, qc->n_elem);
 	int n_elem, pre_n_elem, dir, trim_sg = 0;
 
 	VPRINTK("ENTER, ata%u\n", ap->print_id);
@@ -4805,7 +4808,6 @@ void ata_data_xfer_noirq(struct ata_devi
 static void ata_pio_sector(struct ata_queued_cmd *qc)
 {
 	int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
-	struct scatterlist *sg = qc->__sg;
 	struct ata_port *ap = qc->ap;
 	struct page *page;
 	unsigned int offset;
@@ -4814,8 +4816,8 @@ static void ata_pio_sector(struct ata_qu
 	if (qc->curbytes == qc->nbytes - qc->sect_size)
 		ap->hsm_task_state = HSM_ST_LAST;
 
-	page = sg[qc->cursg].page;
-	offset = sg[qc->cursg].offset + qc->cursg_ofs;
+	page = qc->cursg->page;
+	offset = qc->cursg->offset + qc->cursg_ofs;
 
 	/* get the current page and offset */
 	page = nth_page(page, (offset >> PAGE_SHIFT));
@@ -4843,8 +4845,8 @@ static void ata_pio_sector(struct ata_qu
 	qc->curbytes += qc->sect_size;
 	qc->cursg_ofs += qc->sect_size;
 
-	if (qc->cursg_ofs == (&sg[qc->cursg])->length) {
-		qc->cursg++;
+	if (qc->cursg_ofs == qc->cursg->length) {
+		qc->cursg = sg_next(qc->cursg);
 		qc->cursg_ofs = 0;
 	}
 }
@@ -4930,16 +4932,18 @@ static void __atapi_pio_bytes(struct ata
 {
 	int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
 	struct scatterlist *sg = qc->__sg;
+	struct scatterlist *lsg = sg_last(qc->__sg, qc->n_elem);
 	struct ata_port *ap = qc->ap;
 	struct page *page;
 	unsigned char *buf;
 	unsigned int offset, count;
+	int no_more_sg = 0;
 
 	if (qc->curbytes + bytes >= qc->nbytes)
 		ap->hsm_task_state = HSM_ST_LAST;
 
 next_sg:
-	if (unlikely(qc->cursg >= qc->n_elem)) {
+	if (unlikely(no_more_sg)) {
 		/*
 		 * The end of qc->sg is reached and the device expects
 		 * more data to transfer. In order not to overrun qc->sg
@@ -4962,7 +4966,7 @@ next_sg:
 		return;
 	}
 
-	sg = &qc->__sg[qc->cursg];
+	sg = qc->cursg;
 
 	page = sg->page;
 	offset = sg->offset + qc->cursg_ofs;
@@ -5001,7 +5005,10 @@ next_sg:
 	qc->cursg_ofs += count;
 
 	if (qc->cursg_ofs == sg->length) {
-		qc->cursg++;
+		if (qc->cursg == lsg)
+			no_more_sg = 1;
+
+		qc->cursg = sg_next(qc->cursg);
 		qc->cursg_ofs = 0;
 	}
 
diff -puN drivers/ata/libata-scsi.c~git-block drivers/ata/libata-scsi.c
--- a/drivers/ata/libata-scsi.c~git-block
+++ a/drivers/ata/libata-scsi.c
@@ -804,8 +804,6 @@ int ata_scsi_slave_config(struct scsi_de
 
 	ata_scsi_sdev_config(sdev);
 
-	blk_queue_max_phys_segments(sdev->request_queue, LIBATA_MAX_PRD);
-
 	sdev->manage_start_stop = 1;
 
 	if (dev)
diff -puN drivers/block/cciss.c~git-block drivers/block/cciss.c
--- a/drivers/block/cciss.c~git-block
+++ a/drivers/block/cciss.c
@@ -2570,6 +2570,7 @@ static void do_cciss_request(struct requ
 	       (int)creq->nr_sectors);
 #endif				/* CCISS_DEBUG */
 
+	memset(tmp_sg, 0, sizeof(tmp_sg));
 	seg = blk_rq_map_sg(q, creq, tmp_sg);
 
 	/* get the DMA records for the setup */
diff -puN drivers/block/floppy.c~git-block drivers/block/floppy.c
--- a/drivers/block/floppy.c~git-block
+++ a/drivers/block/floppy.c
@@ -2437,22 +2437,19 @@ static void rw_interrupt(void)
 /* Compute maximal contiguous buffer size. */
 static int buffer_chain_size(void)
 {
-	struct bio *bio;
 	struct bio_vec *bv;
-	int size, i;
+	int size;
+	struct req_iterator iter;
 	char *base;
 
 	base = bio_data(current_req->bio);
 	size = 0;
 
-	rq_for_each_bio(bio, current_req) {
-		bio_for_each_segment(bv, bio, i) {
-			if (page_address(bv->bv_page) + bv->bv_offset !=
-			    base + size)
-				break;
+	rq_for_each_segment(bv, current_req, iter) {
+		if (page_address(bv->bv_page) + bv->bv_offset != base + size)
+			break;
 
-			size += bv->bv_len;
-		}
+		size += bv->bv_len;
 	}
 
 	return size >> 9;
@@ -2479,9 +2476,9 @@ static void copy_buffer(int ssize, int m
 {
 	int remaining;		/* number of transferred 512-byte sectors */
 	struct bio_vec *bv;
-	struct bio *bio;
 	char *buffer, *dma_buffer;
-	int size, i;
+	int size;
+	struct req_iterator iter;
 
 	max_sector = transfer_size(ssize,
 				   min(max_sector, max_sector_2),
@@ -2514,43 +2511,41 @@ static void copy_buffer(int ssize, int m
 
 	size = current_req->current_nr_sectors << 9;
 
-	rq_for_each_bio(bio, current_req) {
-		bio_for_each_segment(bv, bio, i) {
-			if (!remaining)
-				break;
+	rq_for_each_segment(bv, current_req, iter) {
+		if (!remaining)
+			break;
 
-			size = bv->bv_len;
-			SUPBOUND(size, remaining);
+		size = bv->bv_len;
+		SUPBOUND(size, remaining);
 
-			buffer = page_address(bv->bv_page) + bv->bv_offset;
+		buffer = page_address(bv->bv_page) + bv->bv_offset;
 #ifdef FLOPPY_SANITY_CHECK
-			if (dma_buffer + size >
-			    floppy_track_buffer + (max_buffer_sectors << 10) ||
-			    dma_buffer < floppy_track_buffer) {
-				DPRINT("buffer overrun in copy buffer %d\n",
-				       (int)((floppy_track_buffer -
-					      dma_buffer) >> 9));
-				printk("fsector_t=%d buffer_min=%d\n",
-				       fsector_t, buffer_min);
-				printk("current_count_sectors=%ld\n",
-				       current_count_sectors);
-				if (CT(COMMAND) == FD_READ)
-					printk("read\n");
-				if (CT(COMMAND) == FD_WRITE)
-					printk("write\n");
-				break;
-			}
-			if (((unsigned long)buffer) % 512)
-				DPRINT("%p buffer not aligned\n", buffer);
-#endif
+		if (dma_buffer + size >
+		    floppy_track_buffer + (max_buffer_sectors << 10) ||
+		    dma_buffer < floppy_track_buffer) {
+			DPRINT("buffer overrun in copy buffer %d\n",
+			       (int)((floppy_track_buffer -
+				      dma_buffer) >> 9));
+			printk("fsector_t=%d buffer_min=%d\n",
+			       fsector_t, buffer_min);
+			printk("current_count_sectors=%ld\n",
+			       current_count_sectors);
 			if (CT(COMMAND) == FD_READ)
-				memcpy(buffer, dma_buffer, size);
-			else
-				memcpy(dma_buffer, buffer, size);
-
-			remaining -= size;
-			dma_buffer += size;
+				printk("read\n");
+			if (CT(COMMAND) == FD_WRITE)
+				printk("write\n");
+			break;
 		}
+		if (((unsigned long)buffer) % 512)
+			DPRINT("%p buffer not aligned\n", buffer);
+#endif
+		if (CT(COMMAND) == FD_READ)
+			memcpy(buffer, dma_buffer, size);
+		else
+			memcpy(dma_buffer, buffer, size);
+
+		remaining -= size;
+		dma_buffer += size;
 	}
 #ifdef FLOPPY_SANITY_CHECK
 	if (remaining) {
diff -puN drivers/block/lguest_blk.c~git-block drivers/block/lguest_blk.c
--- a/drivers/block/lguest_blk.c~git-block
+++ a/drivers/block/lguest_blk.c
@@ -142,25 +142,23 @@ static irqreturn_t lgb_irq(int irq, void
  * return the total length. */
 static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma)
 {
-	unsigned int i = 0, idx, len = 0;
-	struct bio *bio;
-
-	rq_for_each_bio(bio, req) {
-		struct bio_vec *bvec;
-		bio_for_each_segment(bvec, bio, idx) {
-			/* We told the block layer not to give us too many. */
-			BUG_ON(i == LGUEST_MAX_DMA_SECTIONS);
-			/* If we had a zero-length segment, it would look like
-			 * the end of the data referred to by the "struct
-			 * lguest_dma", so make sure that doesn't happen. */
-			BUG_ON(!bvec->bv_len);
-			/* Convert page & offset to a physical address */
-			dma->addr[i] = page_to_phys(bvec->bv_page)
-				+ bvec->bv_offset;
-			dma->len[i] = bvec->bv_len;
-			len += bvec->bv_len;
-			i++;
-		}
+	unsigned int i = 0, len = 0;
+	struct req_iterator iter;
+	struct bio_vec *bvec;
+
+	rq_for_each_segment(bvec, req, iter) {
+		/* We told the block layer not to give us too many. */
+		BUG_ON(i == LGUEST_MAX_DMA_SECTIONS);
+		/* If we had a zero-length segment, it would look like
+		 * the end of the data referred to by the "struct
+		 * lguest_dma", so make sure that doesn't happen. */
+		BUG_ON(!bvec->bv_len);
+		/* Convert page & offset to a physical address */
+		dma->addr[i] = page_to_phys(bvec->bv_page)
+			+ bvec->bv_offset;
+		dma->len[i] = bvec->bv_len;
+		len += bvec->bv_len;
+		i++;
 	}
 	/* If the array isn't full, we mark the end with a 0 length */
 	if (i < LGUEST_MAX_DMA_SECTIONS)
diff -puN drivers/block/nbd.c~git-block drivers/block/nbd.c
--- a/drivers/block/nbd.c~git-block
+++ a/drivers/block/nbd.c
@@ -180,7 +180,7 @@ static inline int sock_send_bvec(struct 
 
 static int nbd_send_req(struct nbd_device *lo, struct request *req)
 {
-	int result, i, flags;
+	int result, flags;
 	struct nbd_request request;
 	unsigned long size = req->nr_sectors << 9;
 	struct socket *sock = lo->sock;
@@ -205,27 +205,23 @@ static int nbd_send_req(struct nbd_devic
 	}
 
 	if (nbd_cmd(req) == NBD_CMD_WRITE) {
-		struct bio *bio;
+		struct req_iterator iter;
+		struct bio_vec *bvec;
 		/*
 		 * we are really probing at internals to determine
 		 * whether to set MSG_MORE or not...
 		 */
-		rq_for_each_bio(bio, req) {
-			struct bio_vec *bvec;
-			bio_for_each_segment(bvec, bio, i) {
-				flags = 0;
-				if ((i < (bio->bi_vcnt - 1)) || bio->bi_next)
-					flags = MSG_MORE;
-				dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
-						lo->disk->disk_name, req,
-						bvec->bv_len);
-				result = sock_send_bvec(sock, bvec, flags);
-				if (result <= 0) {
-					printk(KERN_ERR "%s: Send data failed (result %d)\n",
-							lo->disk->disk_name,
-							result);
-					goto error_out;
-				}
+		rq_for_each_segment(bvec, req, iter) {
+			flags = 0;
+			if (!rq_iter_last(req, iter))
+				flags = MSG_MORE;
+			dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
+					lo->disk->disk_name, req, bvec->bv_len);
+			result = sock_send_bvec(sock, bvec, flags);
+			if (result <= 0) {
+				printk(KERN_ERR "%s: Send data failed (result %d)\n",
+						lo->disk->disk_name, result);
+				goto error_out;
 			}
 		}
 	}
@@ -321,22 +317,19 @@ static struct request *nbd_read_stat(str
 	dprintk(DBG_RX, "%s: request %p: got reply\n",
 			lo->disk->disk_name, req);
 	if (nbd_cmd(req) == NBD_CMD_READ) {
-		int i;
-		struct bio *bio;
-		rq_for_each_bio(bio, req) {
-			struct bio_vec *bvec;
-			bio_for_each_segment(bvec, bio, i) {
-				result = sock_recv_bvec(sock, bvec);
-				if (result <= 0) {
-					printk(KERN_ERR "%s: Receive data failed (result %d)\n",
-							lo->disk->disk_name,
-							result);
-					req->errors++;
-					return req;
-				}
-				dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
-					lo->disk->disk_name, req, bvec->bv_len);
+		struct req_iterator iter;
+		struct bio_vec *bvec;
+
+		rq_for_each_segment(bvec, req, iter) {
+			result = sock_recv_bvec(sock, bvec);
+			if (result <= 0) {
+				printk(KERN_ERR "%s: Receive data failed (result %d)\n",
+						lo->disk->disk_name, result);
+				req->errors++;
+				return req;
 			}
+			dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
+				lo->disk->disk_name, req, bvec->bv_len);
 		}
 	}
 	return req;
diff -puN drivers/block/pktcdvd.c~git-block drivers/block/pktcdvd.c
--- a/drivers/block/pktcdvd.c~git-block
+++ a/drivers/block/pktcdvd.c
@@ -1142,16 +1142,21 @@ static void pkt_gather_data(struct pktcd
 	 * Schedule reads for missing parts of the packet.
 	 */
 	for (f = 0; f < pkt->frames; f++) {
+		struct bio_vec *vec;
+
 		int p, offset;
 		if (written[f])
 			continue;
 		bio = pkt->r_bios[f];
+		vec = bio->bi_io_vec;
 		bio_init(bio);
 		bio->bi_max_vecs = 1;
 		bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
 		bio->bi_bdev = pd->bdev;
 		bio->bi_end_io = pkt_end_io_read;
 		bio->bi_private = pkt;
+		bio->bi_io_vec = vec;
+		bio->bi_destructor = pkt_bio_destructor;
 
 		p = (f * CD_FRAMESIZE) / PAGE_SIZE;
 		offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
@@ -1448,6 +1453,8 @@ static void pkt_start_write(struct pktcd
 	pkt->w_bio->bi_bdev = pd->bdev;
 	pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
 	pkt->w_bio->bi_private = pkt;
+	pkt->w_bio->bi_io_vec = bvec;
+	pkt->w_bio->bi_destructor = pkt_bio_destructor;
 	for (f = 0; f < pkt->frames; f++)
 		if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset))
 			BUG();
diff -puN drivers/block/ps3disk.c~git-block drivers/block/ps3disk.c
--- a/drivers/block/ps3disk.c~git-block
+++ a/drivers/block/ps3disk.c
@@ -91,30 +91,29 @@ static void ps3disk_scatter_gather(struc
 				   struct request *req, int gather)
 {
 	unsigned int offset = 0;
-	struct bio *bio;
-	sector_t sector;
+	struct req_iterator iter;
 	struct bio_vec *bvec;
-	unsigned int i = 0, j;
+	unsigned int i = 0;
 	size_t size;
 	void *buf;
 
-	rq_for_each_bio(bio, req) {
-		sector = bio->bi_sector;
+	rq_for_each_segment(bvec, req, iter) {
+		unsigned long flags;
 		dev_dbg(&dev->sbd.core,
 			"%s:%u: bio %u: %u segs %u sectors from %lu\n",
-			__func__, __LINE__, i, bio_segments(bio),
-			bio_sectors(bio), sector);
-		bio_for_each_segment(bvec, bio, j) {
-			size = bvec->bv_len;
-			buf = __bio_kmap_atomic(bio, j, KM_IRQ0);
-			if (gather)
-				memcpy(dev->bounce_buf+offset, buf, size);
-			else
-				memcpy(buf, dev->bounce_buf+offset, size);
-			offset += size;
-			flush_kernel_dcache_page(bio_iovec_idx(bio, j)->bv_page);
-			__bio_kunmap_atomic(bio, KM_IRQ0);
-		}
+			__func__, __LINE__, i, bio_segments(iter.bio),
+			bio_sectors(iter.bio),
+			(unsigned long)iter.bio->bi_sector);
+
+		size = bvec->bv_len;
+		buf = bvec_kmap_irq(bvec, &flags);
+		if (gather)
+			memcpy(dev->bounce_buf+offset, buf, size);
+		else
+			memcpy(buf, dev->bounce_buf+offset, size);
+		offset += size;
+		flush_kernel_dcache_page(bvec->bv_page);
+		bvec_kunmap_irq(bvec, &flags);
 		i++;
 	}
 }
@@ -130,12 +129,13 @@ static int ps3disk_submit_request_sg(str
 
 #ifdef DEBUG
 	unsigned int n = 0;
-	struct bio *bio;
+	struct bio_vec *bv;
+	struct req_iterator iter;
 
-	rq_for_each_bio(bio, req)
+	rq_for_each_segment(bv, req, iter)
 		n++;
 	dev_dbg(&dev->sbd.core,
-		"%s:%u: %s req has %u bios for %lu sectors %lu hard sectors\n",
+		"%s:%u: %s req has %u bvecs for %lu sectors %lu hard sectors\n",
 		__func__, __LINE__, op, n, req->nr_sectors,
 		req->hard_nr_sectors);
 #endif
@@ -414,26 +414,6 @@ static void ps3disk_prepare_flush(struct
 	req->cmd_type = REQ_TYPE_FLUSH;
 }
 
-static int ps3disk_issue_flush(struct request_queue *q, struct gendisk *gendisk,
-			       sector_t *sector)
-{
-	struct ps3_storage_device *dev = q->queuedata;
-	struct request *req;
-	int res;
-
-	dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
-
-	req = blk_get_request(q, WRITE, __GFP_WAIT);
-	ps3disk_prepare_flush(q, req);
-	res = blk_execute_rq(q, gendisk, req, 0);
-	if (res)
-		dev_err(&dev->sbd.core, "%s:%u: flush request failed %d\n",
-			__func__, __LINE__, res);
-	blk_put_request(req);
-	return res;
-}
-
-
 static unsigned long ps3disk_mask;
 
 static DEFINE_MUTEX(ps3disk_mask_mutex);
@@ -506,7 +486,6 @@ static int __devinit ps3disk_probe(struc
 	blk_queue_dma_alignment(queue, dev->blk_size-1);
 	blk_queue_hardsect_size(queue, dev->blk_size);
 
-	blk_queue_issue_flush_fn(queue, ps3disk_issue_flush);
 	blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH,
 			  ps3disk_prepare_flush);
 
diff -puN drivers/block/umem.c~git-block drivers/block/umem.c
--- a/drivers/block/umem.c~git-block
+++ a/drivers/block/umem.c
@@ -113,6 +113,8 @@ struct cardinfo {
 				    * have been written
 				    */
 	struct bio	*bio, *currentbio, **biotail;
+	int		current_idx;
+	sector_t	current_sector;
 
 	struct request_queue *queue;
 
@@ -121,6 +123,7 @@ struct cardinfo {
 		struct mm_dma_desc	*desc;
 		int	 		cnt, headcnt;
 		struct bio		*bio, **biotail;
+		int			idx;
 	} mm_pages[2];
 #define DESC_PER_PAGE ((PAGE_SIZE*2)/sizeof(struct mm_dma_desc))
 
@@ -380,12 +383,16 @@ static int add_bio(struct cardinfo *card
 	dma_addr_t dma_handle;
 	int offset;
 	struct bio *bio;
+	struct bio_vec *vec;
+	int idx;
 	int rw;
 	int len;
 
 	bio = card->currentbio;
 	if (!bio && card->bio) {
 		card->currentbio = card->bio;
+		card->current_idx = card->bio->bi_idx;
+		card->current_sector = card->bio->bi_sector;
 		card->bio = card->bio->bi_next;
 		if (card->bio == NULL)
 			card->biotail = &card->bio;
@@ -394,15 +401,17 @@ static int add_bio(struct cardinfo *card
 	}
 	if (!bio)
 		return 0;
+	idx = card->current_idx;
 
 	rw = bio_rw(bio);
 	if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE)
 		return 0;
 
-	len = bio_iovec(bio)->bv_len;
-	dma_handle = pci_map_page(card->dev, 
-				  bio_page(bio),
-				  bio_offset(bio),
+	vec = bio_iovec_idx(bio, idx);
+	len = vec->bv_len;
+	dma_handle = pci_map_page(card->dev,
+				  vec->bv_page,
+				  vec->bv_offset,
 				  len,
 				  (rw==READ) ?
 				  PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
@@ -410,6 +419,8 @@ static int add_bio(struct cardinfo *card
 	p = &card->mm_pages[card->Ready];
 	desc = &p->desc[p->cnt];
 	p->cnt++;
+	if (p->bio == NULL)
+		p->idx = idx;
 	if ((p->biotail) != &bio->bi_next) {
 		*(p->biotail) = bio;
 		p->biotail = &(bio->bi_next);
@@ -419,7 +430,7 @@ static int add_bio(struct cardinfo *card
 	desc->data_dma_handle = dma_handle;
 
 	desc->pci_addr = cpu_to_le64((u64)desc->data_dma_handle);
-	desc->local_addr= cpu_to_le64(bio->bi_sector << 9);
+	desc->local_addr = cpu_to_le64(card->current_sector << 9);
 	desc->transfer_size = cpu_to_le32(len);
 	offset = ( ((char*)&desc->sem_control_bits) - ((char*)p->desc));
 	desc->sem_addr = cpu_to_le64((u64)(p->page_dma+offset));
@@ -435,10 +446,10 @@ static int add_bio(struct cardinfo *card
 		desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ);
 	desc->sem_control_bits = desc->control_bits;
 
-	bio->bi_sector += (len>>9);
-	bio->bi_size -= len;
-	bio->bi_idx++;
-	if (bio->bi_idx >= bio->bi_vcnt) 
+	card->current_sector += (len >> 9);
+	idx++;
+	card->current_idx = idx;
+	if (idx >= bio->bi_vcnt)
 		card->currentbio = NULL;
 
 	return 1;
@@ -474,10 +485,12 @@ static void process_page(unsigned long d
 			last=1; 
 		}
 		page->headcnt++;
-		idx = bio->bi_phys_segments;
-		bio->bi_phys_segments++;
-		if (bio->bi_phys_segments >= bio->bi_vcnt)
+		idx = page->idx;
+		page->idx++;
+		if (page->idx >= bio->bi_vcnt) {
 			page->bio = bio->bi_next;
+			page->idx = page->bio->bi_idx;
+		}
 
 		pci_unmap_page(card->dev, desc->data_dma_handle, 
 			       bio_iovec_idx(bio,idx)->bv_len,
@@ -547,7 +560,6 @@ static int mm_make_request(struct reques
 	pr_debug("mm_make_request %llu %u\n",
 		 (unsigned long long)bio->bi_sector, bio->bi_size);
 
-	bio->bi_phys_segments = bio->bi_idx; /* count of completed segments*/
 	spin_lock_irq(&card->lock);
 	*card->biotail = bio;
 	bio->bi_next = NULL;
diff -puN drivers/block/xen-blkfront.c~git-block drivers/block/xen-blkfront.c
--- a/drivers/block/xen-blkfront.c~git-block
+++ a/drivers/block/xen-blkfront.c
@@ -150,9 +150,8 @@ static int blkif_queue_request(struct re
 	struct blkfront_info *info = req->rq_disk->private_data;
 	unsigned long buffer_mfn;
 	struct blkif_request *ring_req;
-	struct bio *bio;
+	struct req_iterator iter;
 	struct bio_vec *bvec;
-	int idx;
 	unsigned long id;
 	unsigned int fsect, lsect;
 	int ref;
@@ -186,34 +185,31 @@ static int blkif_queue_request(struct re
 		ring_req->operation = BLKIF_OP_WRITE_BARRIER;
 
 	ring_req->nr_segments = 0;
-	rq_for_each_bio (bio, req) {
-		bio_for_each_segment (bvec, bio, idx) {
-			BUG_ON(ring_req->nr_segments
-			       == BLKIF_MAX_SEGMENTS_PER_REQUEST);
-			buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page));
-			fsect = bvec->bv_offset >> 9;
-			lsect = fsect + (bvec->bv_len >> 9) - 1;
-			/* install a grant reference. */
-			ref = gnttab_claim_grant_reference(&gref_head);
-			BUG_ON(ref == -ENOSPC);
+	rq_for_each_segment(bvec, req, iter) {
+		BUG_ON(ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST);
+		buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page));
+		fsect = bvec->bv_offset >> 9;
+		lsect = fsect + (bvec->bv_len >> 9) - 1;
+		/* install a grant reference. */
+		ref = gnttab_claim_grant_reference(&gref_head);
+		BUG_ON(ref == -ENOSPC);
 
-			gnttab_grant_foreign_access_ref(
+		gnttab_grant_foreign_access_ref(
 				ref,
 				info->xbdev->otherend_id,
 				buffer_mfn,
 				rq_data_dir(req) );
 
-			info->shadow[id].frame[ring_req->nr_segments] =
+		info->shadow[id].frame[ring_req->nr_segments] =
 				mfn_to_pfn(buffer_mfn);
 
-			ring_req->seg[ring_req->nr_segments] =
+		ring_req->seg[ring_req->nr_segments] =
 				(struct blkif_request_segment) {
 					.gref       = ref,
 					.first_sect = fsect,
 					.last_sect  = lsect };
 
-			ring_req->nr_segments++;
-		}
+		ring_req->nr_segments++;
 	}
 
 	info->ring.req_prod_pvt++;
diff -puN drivers/ide/cris/ide-cris.c~git-block drivers/ide/cris/ide-cris.c
--- a/drivers/ide/cris/ide-cris.c~git-block
+++ a/drivers/ide/cris/ide-cris.c
@@ -937,7 +937,8 @@ static int cris_ide_build_dmatable (ide_
 		/* group sequential buffers into one large buffer */
 		addr = page_to_phys(sg->page) + sg->offset;
 		size = sg_dma_len(sg);
-		while (sg++, --i) {
+		while (--i) {
+			sg = sg_next(sg);
 			if ((addr + size) != page_to_phys(sg->page) + sg->offset)
 				break;
 			size += sg_dma_len(sg);
diff -puN drivers/ide/ide-disk.c~git-block drivers/ide/ide-disk.c
--- a/drivers/ide/ide-disk.c~git-block
+++ a/drivers/ide/ide-disk.c
@@ -716,32 +716,6 @@ static void idedisk_prepare_flush(struct
 	rq->buffer = rq->cmd;
 }
 
-static int idedisk_issue_flush(struct request_queue *q, struct gendisk *disk,
-			       sector_t *error_sector)
-{
-	ide_drive_t *drive = q->queuedata;
-	struct request *rq;
-	int ret;
-
-	if (!drive->wcache)
-		return 0;
-
-	rq = blk_get_request(q, WRITE, __GFP_WAIT);
-
-	idedisk_prepare_flush(q, rq);
-
-	ret = blk_execute_rq(q, disk, rq, 0);
-
-	/*
-	 * if we failed and caller wants error offset, get it
-	 */
-	if (ret && error_sector)
-		*error_sector = ide_get_error_location(drive, rq->cmd);
-
-	blk_put_request(rq);
-	return ret;
-}
-
 /*
  * This is tightly woven into the driver->do_special can not touch.
  * DON'T do it again until a total personality rewrite is committed.
@@ -781,7 +755,6 @@ static void update_ordered(ide_drive_t *
 	struct hd_driveid *id = drive->id;
 	unsigned ordered = QUEUE_ORDERED_NONE;
 	prepare_flush_fn *prep_fn = NULL;
-	issue_flush_fn *issue_fn = NULL;
 
 	if (drive->wcache) {
 		unsigned long long capacity;
@@ -805,13 +778,11 @@ static void update_ordered(ide_drive_t *
 		if (barrier) {
 			ordered = QUEUE_ORDERED_DRAIN_FLUSH;
 			prep_fn = idedisk_prepare_flush;
-			issue_fn = idedisk_issue_flush;
 		}
 	} else
 		ordered = QUEUE_ORDERED_DRAIN;
 
 	blk_queue_ordered(drive->queue, ordered, prep_fn);
-	blk_queue_issue_flush_fn(drive->queue, issue_fn);
 }
 
 static int write_cache(ide_drive_t *drive, int arg)
diff -puN drivers/ide/ide-dma.c~git-block drivers/ide/ide-dma.c
--- a/drivers/ide/ide-dma.c~git-block
+++ a/drivers/ide/ide-dma.c
@@ -280,7 +280,7 @@ int ide_build_dmatable (ide_drive_t *dri
 			}
 		}
 
-		sg++;
+		sg = sg_next(sg);
 		i--;
 	}
 
diff -puN drivers/ide/ide-floppy.c~git-block drivers/ide/ide-floppy.c
--- a/drivers/ide/ide-floppy.c~git-block
+++ a/drivers/ide/ide-floppy.c
@@ -606,26 +606,24 @@ static void idefloppy_input_buffers (ide
 {
 	struct request *rq = pc->rq;
 	struct bio_vec *bvec;
-	struct bio *bio;
+	struct req_iterator iter;
 	unsigned long flags;
 	char *data;
-	int count, i, done = 0;
+	int count, done = 0;
 
-	rq_for_each_bio(bio, rq) {
-		bio_for_each_segment(bvec, bio, i) {
-			if (!bcount)
-				break;
+	rq_for_each_segment(bvec, rq, iter) {
+		if (!bcount)
+			break;
 
-			count = min(bvec->bv_len, bcount);
+		count = min(bvec->bv_len, bcount);
 
-			data = bvec_kmap_irq(bvec, &flags);
-			drive->hwif->atapi_input_bytes(drive, data, count);
-			bvec_kunmap_irq(data, &flags);
-
-			bcount -= count;
-			pc->b_count += count;
-			done += count;
-		}
+		data = bvec_kmap_irq(bvec, &flags);
+		drive->hwif->atapi_input_bytes(drive, data, count);
+		bvec_kunmap_irq(data, &flags);
+
+		bcount -= count;
+		pc->b_count += count;
+		done += count;
 	}
 
 	idefloppy_do_end_request(drive, 1, done >> 9);
@@ -639,27 +637,25 @@ static void idefloppy_input_buffers (ide
 static void idefloppy_output_buffers (ide_drive_t *drive, idefloppy_pc_t *pc, unsigned int bcount)
 {
 	struct request *rq = pc->rq;
-	struct bio *bio;
+	struct req_iterator iter;
 	struct bio_vec *bvec;
 	unsigned long flags;
-	int count, i, done = 0;
+	int count, done = 0;
 	char *data;
 
-	rq_for_each_bio(bio, rq) {
-		bio_for_each_segment(bvec, bio, i) {
-			if (!bcount)
-				break;
+	rq_for_each_segment(bvec, rq, iter) {
+		if (!bcount)
+			break;
 
-			count = min(bvec->bv_len, bcount);
+		count = min(bvec->bv_len, bcount);
 
-			data = bvec_kmap_irq(bvec, &flags);
-			drive->hwif->atapi_output_bytes(drive, data, count);
-			bvec_kunmap_irq(data, &flags);
-
-			bcount -= count;
-			pc->b_count += count;
-			done += count;
-		}
+		data = bvec_kmap_irq(bvec, &flags);
+		drive->hwif->atapi_output_bytes(drive, data, count);
+		bvec_kunmap_irq(data, &flags);
+
+		bcount -= count;
+		pc->b_count += count;
+		done += count;
 	}
 
 	idefloppy_do_end_request(drive, 1, done >> 9);
diff -puN drivers/ide/ide-io.c~git-block drivers/ide/ide-io.c
--- a/drivers/ide/ide-io.c~git-block
+++ a/drivers/ide/ide-io.c
@@ -322,41 +322,6 @@ static void ide_complete_pm_request (ide
 	spin_unlock_irqrestore(&ide_lock, flags);
 }
 
-/*
- * FIXME: probably move this somewhere else, name is bad too :)
- */
-u64 ide_get_error_location(ide_drive_t *drive, char *args)
-{
-	u32 high, low;
-	u8 hcyl, lcyl, sect;
-	u64 sector;
-
-	high = 0;
-	hcyl = args[5];
-	lcyl = args[4];
-	sect = args[3];
-
-	if (ide_id_has_flush_cache_ext(drive->id)) {
-		low = (hcyl << 16) | (lcyl << 8) | sect;
-		HWIF(drive)->OUTB(drive->ctl|0x80, IDE_CONTROL_REG);
-		high = ide_read_24(drive);
-	} else {
-		u8 cur = HWIF(drive)->INB(IDE_SELECT_REG);
-		if (cur & 0x40) {
-			high = cur & 0xf;
-			low = (hcyl << 16) | (lcyl << 8) | sect;
-		} else {
-			low = hcyl * drive->head * drive->sect;
-			low += lcyl * drive->sect;
-			low += sect - 1;
-		}
-	}
-
-	sector = ((u64) high << 24) | low;
-	return sector;
-}
-EXPORT_SYMBOL(ide_get_error_location);
-
 /**
  *	ide_end_drive_cmd	-	end an explicit drive command
  *	@drive: command 
@@ -882,7 +847,8 @@ void ide_init_sg_cmd(ide_drive_t *drive,
 	ide_hwif_t *hwif = drive->hwif;
 
 	hwif->nsect = hwif->nleft = rq->nr_sectors;
-	hwif->cursg = hwif->cursg_ofs = 0;
+	hwif->cursg_ofs = 0;
+	hwif->cursg = NULL;
 }
 
 EXPORT_SYMBOL_GPL(ide_init_sg_cmd);
diff -puN drivers/ide/ide-probe.c~git-block drivers/ide/ide-probe.c
--- a/drivers/ide/ide-probe.c~git-block
+++ a/drivers/ide/ide-probe.c
@@ -1317,7 +1317,7 @@ static int hwif_init(ide_hwif_t *hwif)
 	if (!hwif->sg_max_nents)
 		hwif->sg_max_nents = PRD_ENTRIES;
 
-	hwif->sg_table = kmalloc(sizeof(struct scatterlist)*hwif->sg_max_nents,
+	hwif->sg_table = kzalloc(sizeof(struct scatterlist)*hwif->sg_max_nents,
 				 GFP_KERNEL);
 	if (!hwif->sg_table) {
 		printk(KERN_ERR "%s: unable to allocate SG table.\n", hwif->name);
diff -puN drivers/ide/ide-taskfile.c~git-block drivers/ide/ide-taskfile.c
--- a/drivers/ide/ide-taskfile.c~git-block
+++ a/drivers/ide/ide-taskfile.c
@@ -45,6 +45,7 @@
 #include <linux/hdreg.h>
 #include <linux/ide.h>
 #include <linux/bitops.h>
+#include <linux/scatterlist.h>
 
 #include <asm/byteorder.h>
 #include <asm/irq.h>
@@ -263,6 +264,7 @@ static void ide_pio_sector(ide_drive_t *
 {
 	ide_hwif_t *hwif = drive->hwif;
 	struct scatterlist *sg = hwif->sg_table;
+	struct scatterlist *cursg = hwif->cursg;
 	struct page *page;
 #ifdef CONFIG_HIGHMEM
 	unsigned long flags;
@@ -270,8 +272,14 @@ static void ide_pio_sector(ide_drive_t *
 	unsigned int offset;
 	u8 *buf;
 
-	page = sg[hwif->cursg].page;
-	offset = sg[hwif->cursg].offset + hwif->cursg_ofs * SECTOR_SIZE;
+	cursg = hwif->cursg;
+	if (!cursg) {
+		cursg = sg;
+		hwif->cursg = sg;
+	}
+
+	page = cursg->page;
+	offset = cursg->offset + hwif->cursg_ofs * SECTOR_SIZE;
 
 	/* get the current page and offset */
 	page = nth_page(page, (offset >> PAGE_SHIFT));
@@ -285,8 +293,8 @@ static void ide_pio_sector(ide_drive_t *
 	hwif->nleft--;
 	hwif->cursg_ofs++;
 
-	if ((hwif->cursg_ofs * SECTOR_SIZE) == sg[hwif->cursg].length) {
-		hwif->cursg++;
+	if ((hwif->cursg_ofs * SECTOR_SIZE) == cursg->length) {
+		hwif->cursg = sg_next(hwif->cursg);
 		hwif->cursg_ofs = 0;
 	}
 
@@ -367,6 +375,8 @@ static ide_startstop_t task_error(ide_dr
 
 static void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat)
 {
+	HWIF(drive)->cursg = NULL;
+
 	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
 		ide_task_t *task = rq->special;
 
diff -puN drivers/ide/mips/au1xxx-ide.c~git-block drivers/ide/mips/au1xxx-ide.c
--- a/drivers/ide/mips/au1xxx-ide.c~git-block
+++ a/drivers/ide/mips/au1xxx-ide.c
@@ -296,7 +296,7 @@ static int auide_build_dmatable(ide_driv
 			cur_addr += tc;
 			cur_len -= tc;
 		}
-		sg++;
+		sg = sg_next(sg);
 		i--;
 	}
 
diff -puN drivers/ide/pci/sgiioc4.c~git-block drivers/ide/pci/sgiioc4.c
--- a/drivers/ide/pci/sgiioc4.c~git-block
+++ a/drivers/ide/pci/sgiioc4.c
@@ -29,6 +29,7 @@
 #include <linux/mm.h>
 #include <linux/ioport.h>
 #include <linux/blkdev.h>
+#include <linux/scatterlist.h>
 #include <linux/ioc4.h>
 #include <asm/io.h>
 
@@ -523,7 +524,7 @@ sgiioc4_build_dma_table(ide_drive_t * dr
 			}
 		}
 
-		sg++;
+		sg = sg_next(sg);
 		i--;
 	}
 
diff -puN drivers/ide/ppc/pmac.c~git-block drivers/ide/ppc/pmac.c
--- a/drivers/ide/ppc/pmac.c~git-block
+++ a/drivers/ide/ppc/pmac.c
@@ -1533,7 +1533,7 @@ pmac_ide_build_dmatable(ide_drive_t *dri
 			cur_len -= tc;
 			++table;
 		}
-		sg++;
+		sg = sg_next(sg);
 		i--;
 	}
 
diff -puN drivers/infiniband/hw/ipath/ipath_dma.c~git-block drivers/infiniband/hw/ipath/ipath_dma.c
--- a/drivers/infiniband/hw/ipath/ipath_dma.c~git-block
+++ a/drivers/infiniband/hw/ipath/ipath_dma.c
@@ -30,6 +30,7 @@
  * SOFTWARE.
  */
 
+#include <linux/scatterlist.h>
 #include <rdma/ib_verbs.h>
 
 #include "ipath_verbs.h"
@@ -96,17 +97,18 @@ static void ipath_dma_unmap_page(struct 
 	BUG_ON(!valid_dma_direction(direction));
 }
 
-static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents,
-			enum dma_data_direction direction)
+static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
+			int nents, enum dma_data_direction direction)
 {
+	struct scatterlist *sg;
 	u64 addr;
 	int i;
 	int ret = nents;
 
 	BUG_ON(!valid_dma_direction(direction));
 
-	for (i = 0; i < nents; i++) {
-		addr = (u64) page_address(sg[i].page);
+	for_each_sg(sgl, sg, nents, i) {
+		addr = (u64) page_address(sg->page);
 		/* TODO: handle highmem pages */
 		if (!addr) {
 			ret = 0;
diff -puN drivers/infiniband/ulp/iser/iser_memory.c~git-block drivers/infiniband/ulp/iser/iser_memory.c
--- a/drivers/infiniband/ulp/iser/iser_memory.c~git-block
+++ a/drivers/infiniband/ulp/iser/iser_memory.c
@@ -124,17 +124,19 @@ static int iser_start_rdma_unaligned_sg(
 
 	if (cmd_dir == ISER_DIR_OUT) {
 		/* copy the unaligned sg the buffer which is used for RDMA */
-		struct scatterlist *sg = (struct scatterlist *)data->buf;
+		struct scatterlist *sgl = (struct scatterlist *)data->buf;
+		struct scatterlist *sg;
 		int i;
 		char *p, *from;
 
-		for (p = mem, i = 0; i < data->size; i++) {
-			from = kmap_atomic(sg[i].page, KM_USER0);
+		p = mem;
+		for_each_sg(sgl, sg, data->size, i) {
+			from = kmap_atomic(sg->page, KM_USER0);
 			memcpy(p,
-			       from + sg[i].offset,
-			       sg[i].length);
+			       from + sg->offset,
+			       sg->length);
 			kunmap_atomic(from, KM_USER0);
-			p += sg[i].length;
+			p += sg->length;
 		}
 	}
 
@@ -176,7 +178,7 @@ void iser_finalize_rdma_unaligned_sg(str
 
 	if (cmd_dir == ISER_DIR_IN) {
 		char *mem;
-		struct scatterlist *sg;
+		struct scatterlist *sgl, *sg;
 		unsigned char *p, *to;
 		unsigned int sg_size;
 		int i;
@@ -184,16 +186,17 @@ void iser_finalize_rdma_unaligned_sg(str
 		/* copy back read RDMA to unaligned sg */
 		mem	= mem_copy->copy_buf;
 
-		sg	= (struct scatterlist *)iser_ctask->data[ISER_DIR_IN].buf;
+		sgl	= (struct scatterlist *)iser_ctask->data[ISER_DIR_IN].buf;
 		sg_size = iser_ctask->data[ISER_DIR_IN].size;
 
-		for (p = mem, i = 0; i < sg_size; i++){
-			to = kmap_atomic(sg[i].page, KM_SOFTIRQ0);
-			memcpy(to + sg[i].offset,
+		p = mem;
+		for_each_sg(sgl, sg, sg_size, i) {
+			to = kmap_atomic(sg->page, KM_SOFTIRQ0);
+			memcpy(to + sg->offset,
 			       p,
-			       sg[i].length);
+			       sg->length);
 			kunmap_atomic(to, KM_SOFTIRQ0);
-			p += sg[i].length;
+			p += sg->length;
 		}
 	}
 
@@ -224,7 +227,8 @@ static int iser_sg_to_page_vec(struct is
 			       struct iser_page_vec *page_vec,
 			       struct ib_device *ibdev)
 {
-	struct scatterlist *sg = (struct scatterlist *)data->buf;
+	struct scatterlist *sgl = (struct scatterlist *)data->buf;
+	struct scatterlist *sg;
 	u64 first_addr, last_addr, page;
 	int end_aligned;
 	unsigned int cur_page = 0;
@@ -232,24 +236,25 @@ static int iser_sg_to_page_vec(struct is
 	int i;
 
 	/* compute the offset of first element */
-	page_vec->offset = (u64) sg[0].offset & ~MASK_4K;
+	page_vec->offset = (u64) sgl[0].offset & ~MASK_4K;
 
-	for (i = 0; i < data->dma_nents; i++) {
-		unsigned int dma_len = ib_sg_dma_len(ibdev, &sg[i]);
+	for_each_sg(sgl, sg, data->dma_nents, i) {
+		unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
 
 		total_sz += dma_len;
 
-		first_addr = ib_sg_dma_address(ibdev, &sg[i]);
+		first_addr = ib_sg_dma_address(ibdev, sg);
 		last_addr  = first_addr + dma_len;
 
 		end_aligned   = !(last_addr  & ~MASK_4K);
 
 		/* continue to collect page fragments till aligned or SG ends */
 		while (!end_aligned && (i + 1 < data->dma_nents)) {
+			sg = sg_next(sg);
 			i++;
-			dma_len = ib_sg_dma_len(ibdev, &sg[i]);
+			dma_len = ib_sg_dma_len(ibdev, sg);
 			total_sz += dma_len;
-			last_addr = ib_sg_dma_address(ibdev, &sg[i]) + dma_len;
+			last_addr = ib_sg_dma_address(ibdev, sg) + dma_len;
 			end_aligned = !(last_addr  & ~MASK_4K);
 		}
 
@@ -284,25 +289,26 @@ static int iser_sg_to_page_vec(struct is
 static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data,
 					      struct ib_device *ibdev)
 {
-	struct scatterlist *sg;
+	struct scatterlist *sgl, *sg;
 	u64 end_addr, next_addr;
 	int i, cnt;
 	unsigned int ret_len = 0;
 
-	sg = (struct scatterlist *)data->buf;
+	sgl = (struct scatterlist *)data->buf;
 
-	for (cnt = 0, i = 0; i < data->dma_nents; i++, cnt++) {
+	cnt = 0;
+	for_each_sg(sgl, sg, data->dma_nents, i) {
 		/* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX "
 		   "offset: %ld sz: %ld\n", i,
-		   (unsigned long)page_to_phys(sg[i].page),
-		   (unsigned long)sg[i].offset,
-		   (unsigned long)sg[i].length); */
-		end_addr = ib_sg_dma_address(ibdev, &sg[i]) +
-			   ib_sg_dma_len(ibdev, &sg[i]);
+		   (unsigned long)page_to_phys(sg->page),
+		   (unsigned long)sg->offset,
+		   (unsigned long)sg->length); */
+		end_addr = ib_sg_dma_address(ibdev, sg) +
+			   ib_sg_dma_len(ibdev, sg);
 		/* iser_dbg("Checking sg iobuf end address "
 		       "0x%08lX\n", end_addr); */
 		if (i + 1 < data->dma_nents) {
-			next_addr = ib_sg_dma_address(ibdev, &sg[i+1]);
+			next_addr = ib_sg_dma_address(ibdev, sg_next(sg));
 			/* are i, i+1 fragments of the same page? */
 			if (end_addr == next_addr)
 				continue;
@@ -322,15 +328,16 @@ static unsigned int iser_data_buf_aligne
 static void iser_data_buf_dump(struct iser_data_buf *data,
 			       struct ib_device *ibdev)
 {
-	struct scatterlist *sg = (struct scatterlist *)data->buf;
+	struct scatterlist *sgl = (struct scatterlist *)data->buf;
+	struct scatterlist *sg;
 	int i;
 
-	for (i = 0; i < data->dma_nents; i++)
+	for_each_sg(sgl, sg, data->dma_nents, i)
 		iser_err("sg[%d] dma_addr:0x%lX page:0x%p "
 			 "off:0x%x sz:0x%x dma_len:0x%x\n",
-			 i, (unsigned long)ib_sg_dma_address(ibdev, &sg[i]),
-			 sg[i].page, sg[i].offset,
-			 sg[i].length, ib_sg_dma_len(ibdev, &sg[i]));
+			 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
+			 sg->page, sg->offset,
+			 sg->length, ib_sg_dma_len(ibdev, sg));
 }
 
 static void iser_dump_page_vec(struct iser_page_vec *page_vec)
diff -puN drivers/md/dm-emc.c~git-block drivers/md/dm-emc.c
--- a/drivers/md/dm-emc.c~git-block
+++ a/drivers/md/dm-emc.c
@@ -109,15 +109,7 @@ static struct request *get_failover_req(
 		return NULL;
 	}
 
-	rq->bio = rq->biotail = bio;
-	blk_rq_bio_prep(q, rq, bio);
-
-	rq->rq_disk = bdev->bd_contains->bd_disk;
-
-	/* bio backed don't set data */
-	rq->buffer = rq->data = NULL;
-	/* rq data_len used for pc cmd's request_bufflen */
-	rq->data_len = bio->bi_size;
+	blk_rq_append_bio(q, rq, bio);
 
 	rq->sense = h->sense;
 	memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
diff -puN drivers/md/dm-table.c~git-block drivers/md/dm-table.c
--- a/drivers/md/dm-table.c~git-block
+++ a/drivers/md/dm-table.c
@@ -998,33 +998,6 @@ void dm_table_unplug_all(struct dm_table
 	}
 }
 
-int dm_table_flush_all(struct dm_table *t)
-{
-	struct list_head *d, *devices = dm_table_get_devices(t);
-	int ret = 0;
-	unsigned i;
-
-	for (i = 0; i < t->num_targets; i++)
-		if (t->targets[i].type->flush)
-			t->targets[i].type->flush(&t->targets[i]);
-
-	for (d = devices->next; d != devices; d = d->next) {
-		struct dm_dev *dd = list_entry(d, struct dm_dev, list);
-		struct request_queue *q = bdev_get_queue(dd->bdev);
-		int err;
-
-		if (!q->issue_flush_fn)
-			err = -EOPNOTSUPP;
-		else
-			err = q->issue_flush_fn(q, dd->bdev->bd_disk, NULL);
-
-		if (!ret)
-			ret = err;
-	}
-
-	return ret;
-}
-
 struct mapped_device *dm_table_get_md(struct dm_table *t)
 {
 	dm_get(t->md);
@@ -1042,4 +1015,3 @@ EXPORT_SYMBOL(dm_table_get_md);
 EXPORT_SYMBOL(dm_table_put);
 EXPORT_SYMBOL(dm_table_get);
 EXPORT_SYMBOL(dm_table_unplug_all);
-EXPORT_SYMBOL(dm_table_flush_all);
diff -puN drivers/md/dm.c~git-block drivers/md/dm.c
--- a/drivers/md/dm.c~git-block
+++ a/drivers/md/dm.c
@@ -842,21 +842,6 @@ out_req:
 	return 0;
 }
 
-static int dm_flush_all(struct request_queue *q, struct gendisk *disk,
-			sector_t *error_sector)
-{
-	struct mapped_device *md = q->queuedata;
-	struct dm_table *map = dm_get_table(md);
-	int ret = -ENXIO;
-
-	if (map) {
-		ret = dm_table_flush_all(map);
-		dm_table_put(map);
-	}
-
-	return ret;
-}
-
 static void dm_unplug_all(struct request_queue *q)
 {
 	struct mapped_device *md = q->queuedata;
@@ -1005,7 +990,6 @@ static struct mapped_device *alloc_dev(i
 	blk_queue_make_request(md->queue, dm_request);
 	blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
 	md->queue->unplug_fn = dm_unplug_all;
-	md->queue->issue_flush_fn = dm_flush_all;
 
 	md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache);
 	if (!md->io_pool)
diff -puN drivers/md/dm.h~git-block drivers/md/dm.h
--- a/drivers/md/dm.h~git-block
+++ a/drivers/md/dm.h
@@ -111,7 +111,6 @@ void dm_table_postsuspend_targets(struct
 int dm_table_resume_targets(struct dm_table *t);
 int dm_table_any_congested(struct dm_table *t, int bdi_bits);
 void dm_table_unplug_all(struct dm_table *t);
-int dm_table_flush_all(struct dm_table *t);
 
 /*-----------------------------------------------------------------
  * A registry of target types.
diff -puN drivers/md/linear.c~git-block drivers/md/linear.c
--- a/drivers/md/linear.c~git-block
+++ a/drivers/md/linear.c
@@ -92,25 +92,6 @@ static void linear_unplug(struct request
 	}
 }
 
-static int linear_issue_flush(struct request_queue *q, struct gendisk *disk,
-			      sector_t *error_sector)
-{
-	mddev_t *mddev = q->queuedata;
-	linear_conf_t *conf = mddev_to_conf(mddev);
-	int i, ret = 0;
-
-	for (i=0; i < mddev->raid_disks && ret == 0; i++) {
-		struct block_device *bdev = conf->disks[i].rdev->bdev;
-		struct request_queue *r_queue = bdev_get_queue(bdev);
-
-		if (!r_queue->issue_flush_fn)
-			ret = -EOPNOTSUPP;
-		else
-			ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector);
-	}
-	return ret;
-}
-
 static int linear_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
@@ -279,7 +260,6 @@ static int linear_run (mddev_t *mddev)
 
 	blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
 	mddev->queue->unplug_fn = linear_unplug;
-	mddev->queue->issue_flush_fn = linear_issue_flush;
 	mddev->queue->backing_dev_info.congested_fn = linear_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
 	return 0;
diff -puN drivers/md/md.c~git-block drivers/md/md.c
--- a/drivers/md/md.c~git-block
+++ a/drivers/md/md.c
@@ -3472,7 +3472,6 @@ static int do_md_stop(mddev_t * mddev, i
 			mddev->pers->stop(mddev);
 			mddev->queue->merge_bvec_fn = NULL;
 			mddev->queue->unplug_fn = NULL;
-			mddev->queue->issue_flush_fn = NULL;
 			mddev->queue->backing_dev_info.congested_fn = NULL;
 			if (mddev->pers->sync_request)
 				sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
diff -puN drivers/md/multipath.c~git-block drivers/md/multipath.c
--- a/drivers/md/multipath.c~git-block
+++ a/drivers/md/multipath.c
@@ -199,35 +199,6 @@ static void multipath_status (struct seq
 	seq_printf (seq, "]");
 }
 
-static int multipath_issue_flush(struct request_queue *q, struct gendisk *disk,
-				 sector_t *error_sector)
-{
-	mddev_t *mddev = q->queuedata;
-	multipath_conf_t *conf = mddev_to_conf(mddev);
-	int i, ret = 0;
-
-	rcu_read_lock();
-	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-		mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
-		if (rdev && !test_bit(Faulty, &rdev->flags)) {
-			struct block_device *bdev = rdev->bdev;
-			struct request_queue *r_queue = bdev_get_queue(bdev);
-
-			if (!r_queue->issue_flush_fn)
-				ret = -EOPNOTSUPP;
-			else {
-				atomic_inc(&rdev->nr_pending);
-				rcu_read_unlock();
-				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
-							      error_sector);
-				rdev_dec_pending(rdev, mddev);
-				rcu_read_lock();
-			}
-		}
-	}
-	rcu_read_unlock();
-	return ret;
-}
 static int multipath_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
@@ -532,7 +503,6 @@ static int multipath_run (mddev_t *mddev
 	mddev->array_size = mddev->size;
 
 	mddev->queue->unplug_fn = multipath_unplug;
-	mddev->queue->issue_flush_fn = multipath_issue_flush;
 	mddev->queue->backing_dev_info.congested_fn = multipath_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
 
diff -puN drivers/md/raid0.c~git-block drivers/md/raid0.c
--- a/drivers/md/raid0.c~git-block
+++ a/drivers/md/raid0.c
@@ -40,26 +40,6 @@ static void raid0_unplug(struct request_
 	}
 }
 
-static int raid0_issue_flush(struct request_queue *q, struct gendisk *disk,
-			     sector_t *error_sector)
-{
-	mddev_t *mddev = q->queuedata;
-	raid0_conf_t *conf = mddev_to_conf(mddev);
-	mdk_rdev_t **devlist = conf->strip_zone[0].dev;
-	int i, ret = 0;
-
-	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-		struct block_device *bdev = devlist[i]->bdev;
-		struct request_queue *r_queue = bdev_get_queue(bdev);
-
-		if (!r_queue->issue_flush_fn)
-			ret = -EOPNOTSUPP;
-		else
-			ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector);
-	}
-	return ret;
-}
-
 static int raid0_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
@@ -250,7 +230,6 @@ static int create_strip_zones (mddev_t *
 
 	mddev->queue->unplug_fn = raid0_unplug;
 
-	mddev->queue->issue_flush_fn = raid0_issue_flush;
 	mddev->queue->backing_dev_info.congested_fn = raid0_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
 
diff -puN drivers/md/raid1.c~git-block drivers/md/raid1.c
--- a/drivers/md/raid1.c~git-block
+++ a/drivers/md/raid1.c
@@ -575,36 +575,6 @@ static void raid1_unplug(struct request_
 	md_wakeup_thread(mddev->thread);
 }
 
-static int raid1_issue_flush(struct request_queue *q, struct gendisk *disk,
-			     sector_t *error_sector)
-{
-	mddev_t *mddev = q->queuedata;
-	conf_t *conf = mddev_to_conf(mddev);
-	int i, ret = 0;
-
-	rcu_read_lock();
-	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
-		if (rdev && !test_bit(Faulty, &rdev->flags)) {
-			struct block_device *bdev = rdev->bdev;
-			struct request_queue *r_queue = bdev_get_queue(bdev);
-
-			if (!r_queue->issue_flush_fn)
-				ret = -EOPNOTSUPP;
-			else {
-				atomic_inc(&rdev->nr_pending);
-				rcu_read_unlock();
-				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
-							      error_sector);
-				rdev_dec_pending(rdev, mddev);
-				rcu_read_lock();
-			}
-		}
-	}
-	rcu_read_unlock();
-	return ret;
-}
-
 static int raid1_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
@@ -2013,7 +1983,6 @@ static int run(mddev_t *mddev)
 	mddev->array_size = mddev->size;
 
 	mddev->queue->unplug_fn = raid1_unplug;
-	mddev->queue->issue_flush_fn = raid1_issue_flush;
 	mddev->queue->backing_dev_info.congested_fn = raid1_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
 
diff -puN drivers/md/raid10.c~git-block drivers/md/raid10.c
--- a/drivers/md/raid10.c~git-block
+++ a/drivers/md/raid10.c
@@ -618,36 +618,6 @@ static void raid10_unplug(struct request
 	md_wakeup_thread(mddev->thread);
 }
 
-static int raid10_issue_flush(struct request_queue *q, struct gendisk *disk,
-			     sector_t *error_sector)
-{
-	mddev_t *mddev = q->queuedata;
-	conf_t *conf = mddev_to_conf(mddev);
-	int i, ret = 0;
-
-	rcu_read_lock();
-	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
-		if (rdev && !test_bit(Faulty, &rdev->flags)) {
-			struct block_device *bdev = rdev->bdev;
-			struct request_queue *r_queue = bdev_get_queue(bdev);
-
-			if (!r_queue->issue_flush_fn)
-				ret = -EOPNOTSUPP;
-			else {
-				atomic_inc(&rdev->nr_pending);
-				rcu_read_unlock();
-				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
-							      error_sector);
-				rdev_dec_pending(rdev, mddev);
-				rcu_read_lock();
-			}
-		}
-	}
-	rcu_read_unlock();
-	return ret;
-}
-
 static int raid10_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
@@ -2133,7 +2103,6 @@ static int run(mddev_t *mddev)
 	mddev->resync_max_sectors = size << conf->chunk_shift;
 
 	mddev->queue->unplug_fn = raid10_unplug;
-	mddev->queue->issue_flush_fn = raid10_issue_flush;
 	mddev->queue->backing_dev_info.congested_fn = raid10_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
 
diff -puN drivers/md/raid5.c~git-block drivers/md/raid5.c
--- a/drivers/md/raid5.c~git-block
+++ a/drivers/md/raid5.c
@@ -3778,36 +3778,6 @@ static void raid5_unplug_device(struct r
 	unplug_slaves(mddev);
 }
 
-static int raid5_issue_flush(struct request_queue *q, struct gendisk *disk,
-			     sector_t *error_sector)
-{
-	mddev_t *mddev = q->queuedata;
-	raid5_conf_t *conf = mddev_to_conf(mddev);
-	int i, ret = 0;
-
-	rcu_read_lock();
-	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-		mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
-		if (rdev && !test_bit(Faulty, &rdev->flags)) {
-			struct block_device *bdev = rdev->bdev;
-			struct request_queue *r_queue = bdev_get_queue(bdev);
-
-			if (!r_queue->issue_flush_fn)
-				ret = -EOPNOTSUPP;
-			else {
-				atomic_inc(&rdev->nr_pending);
-				rcu_read_unlock();
-				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
-							      error_sector);
-				rdev_dec_pending(rdev, mddev);
-				rcu_read_lock();
-			}
-		}
-	}
-	rcu_read_unlock();
-	return ret;
-}
-
 static int raid5_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
@@ -4981,7 +4951,6 @@ static int run(mddev_t *mddev)
 		       mdname(mddev));
 
 	mddev->queue->unplug_fn = raid5_unplug_device;
-	mddev->queue->issue_flush_fn = raid5_issue_flush;
 	mddev->queue->backing_dev_info.congested_data = mddev;
 	mddev->queue->backing_dev_info.congested_fn = raid5_congested;
 
diff -puN drivers/message/fusion/mptscsih.c~git-block drivers/message/fusion/mptscsih.c
--- a/drivers/message/fusion/mptscsih.c~git-block
+++ a/drivers/message/fusion/mptscsih.c
@@ -290,7 +290,7 @@ nextSGEset:
 	for (ii=0; ii < (numSgeThisFrame-1); ii++) {
 		thisxfer = sg_dma_len(sg);
 		if (thisxfer == 0) {
-			sg ++; /* Get next SG element from the OS */
+			sg = sg_next(sg); /* Get next SG element from the OS */
 			sg_done++;
 			continue;
 		}
@@ -298,7 +298,7 @@ nextSGEset:
 		v2 = sg_dma_address(sg);
 		mptscsih_add_sge(psge, sgflags | thisxfer, v2);
 
-		sg++;		/* Get next SG element from the OS */
+		sg = sg_next(sg);	/* Get next SG element from the OS */
 		psge += (sizeof(u32) + sizeof(dma_addr_t));
 		sgeOffset += (sizeof(u32) + sizeof(dma_addr_t));
 		sg_done++;
@@ -319,7 +319,7 @@ nextSGEset:
 		v2 = sg_dma_address(sg);
 		mptscsih_add_sge(psge, sgflags | thisxfer, v2);
 		/*
-		sg++;
+		sg = sg_next(sg);
 		psge += (sizeof(u32) + sizeof(dma_addr_t));
 		*/
 		sgeOffset += (sizeof(u32) + sizeof(dma_addr_t));
diff -puN drivers/message/i2o/i2o_block.c~git-block drivers/message/i2o/i2o_block.c
--- a/drivers/message/i2o/i2o_block.c~git-block
+++ a/drivers/message/i2o/i2o_block.c
@@ -149,29 +149,6 @@ static int i2o_block_device_flush(struct
 };
 
 /**
- *	i2o_block_issue_flush - device-flush interface for block-layer
- *	@queue: the request queue of the device which should be flushed
- *	@disk: gendisk
- *	@error_sector: error offset
- *
- *	Helper function to provide flush functionality to block-layer.
- *
- *	Returns 0 on success or negative error code on failure.
- */
-
-static int i2o_block_issue_flush(struct request_queue * queue, struct gendisk *disk,
-				 sector_t * error_sector)
-{
-	struct i2o_block_device *i2o_blk_dev = queue->queuedata;
-	int rc = -ENODEV;
-
-	if (likely(i2o_blk_dev))
-		rc = i2o_block_device_flush(i2o_blk_dev->i2o_dev);
-
-	return rc;
-}
-
-/**
  *	i2o_block_device_mount - Mount (load) the media of device dev
  *	@dev: I2O device which should receive the mount request
  *	@media_id: Media Identifier
@@ -1009,7 +986,6 @@ static struct i2o_block_device *i2o_bloc
 	}
 
 	blk_queue_prep_rq(queue, i2o_block_prep_req_fn);
-	blk_queue_issue_flush_fn(queue, i2o_block_issue_flush);
 
 	gd->major = I2O_MAJOR;
 	gd->queue = queue;
diff -puN drivers/s390/block/dasd_diag.c~git-block drivers/s390/block/dasd_diag.c
--- a/drivers/s390/block/dasd_diag.c~git-block
+++ a/drivers/s390/block/dasd_diag.c
@@ -472,14 +472,13 @@ dasd_diag_build_cp(struct dasd_device * 
 	struct dasd_ccw_req *cqr;
 	struct dasd_diag_req *dreq;
 	struct dasd_diag_bio *dbio;
-	struct bio *bio;
+	struct req_iterator iter;
 	struct bio_vec *bv;
 	char *dst;
 	unsigned int count, datasize;
 	sector_t recid, first_rec, last_rec;
 	unsigned int blksize, off;
 	unsigned char rw_cmd;
-	int i;
 
 	if (rq_data_dir(req) == READ)
 		rw_cmd = MDSK_READ_REQ;
@@ -493,13 +492,11 @@ dasd_diag_build_cp(struct dasd_device * 
 	last_rec = (req->sector + req->nr_sectors - 1) >> device->s2b_shift;
 	/* Check struct bio and count the number of blocks for the request. */
 	count = 0;
-	rq_for_each_bio(bio, req) {
-		bio_for_each_segment(bv, bio, i) {
-			if (bv->bv_len & (blksize - 1))
-				/* Fba can only do full blocks. */
-				return ERR_PTR(-EINVAL);
-			count += bv->bv_len >> (device->s2b_shift + 9);
-		}
+	rq_for_each_segment(bv, req, iter) {
+		if (bv->bv_len & (blksize - 1))
+			/* Fba can only do full blocks. */
+			return ERR_PTR(-EINVAL);
+		count += bv->bv_len >> (device->s2b_shift + 9);
 	}
 	/* Paranoia. */
 	if (count != last_rec - first_rec + 1)
@@ -516,18 +513,16 @@ dasd_diag_build_cp(struct dasd_device * 
 	dreq->block_count = count;
 	dbio = dreq->bio;
 	recid = first_rec;
-	rq_for_each_bio(bio, req) {
-		bio_for_each_segment(bv, bio, i) {
-			dst = page_address(bv->bv_page) + bv->bv_offset;
-			for (off = 0; off < bv->bv_len; off += blksize) {
-				memset(dbio, 0, sizeof (struct dasd_diag_bio));
-				dbio->type = rw_cmd;
-				dbio->block_number = recid + 1;
-				dbio->buffer = dst;
-				dbio++;
-				dst += blksize;
-				recid++;
-			}
+	rq_for_each_segment(bv, req, iter) {
+		dst = page_address(bv->bv_page) + bv->bv_offset;
+		for (off = 0; off < bv->bv_len; off += blksize) {
+			memset(dbio, 0, sizeof (struct dasd_diag_bio));
+			dbio->type = rw_cmd;
+			dbio->block_number = recid + 1;
+			dbio->buffer = dst;
+			dbio++;
+			dst += blksize;
+			recid++;
 		}
 	}
 	cqr->retries = DIAG_MAX_RETRIES;
diff -puN drivers/s390/block/dasd_eckd.c~git-block drivers/s390/block/dasd_eckd.c
--- a/drivers/s390/block/dasd_eckd.c~git-block
+++ a/drivers/s390/block/dasd_eckd.c
@@ -1176,7 +1176,7 @@ dasd_eckd_build_cp(struct dasd_device * 
 	struct LO_eckd_data *LO_data;
 	struct dasd_ccw_req *cqr;
 	struct ccw1 *ccw;
-	struct bio *bio;
+	struct req_iterator iter;
 	struct bio_vec *bv;
 	char *dst;
 	unsigned int blksize, blk_per_trk, off;
@@ -1185,7 +1185,6 @@ dasd_eckd_build_cp(struct dasd_device * 
 	sector_t first_trk, last_trk;
 	unsigned int first_offs, last_offs;
 	unsigned char cmd, rcmd;
-	int i;
 
 	private = (struct dasd_eckd_private *) device->private;
 	if (rq_data_dir(req) == READ)
@@ -1206,18 +1205,15 @@ dasd_eckd_build_cp(struct dasd_device * 
 	/* Check struct bio and count the number of blocks for the request. */
 	count = 0;
 	cidaw = 0;
-	rq_for_each_bio(bio, req) {
-		bio_for_each_segment(bv, bio, i) {
-			if (bv->bv_len & (blksize - 1))
-				/* Eckd can only do full blocks. */
-				return ERR_PTR(-EINVAL);
-			count += bv->bv_len >> (device->s2b_shift + 9);
+	rq_for_each_segment(bv, req, iter) {
+		if (bv->bv_len & (blksize - 1))
+			/* Eckd can only do full blocks. */
+			return ERR_PTR(-EINVAL);
+		count += bv->bv_len >> (device->s2b_shift + 9);
 #if defined(CONFIG_64BIT)
-			if (idal_is_needed (page_address(bv->bv_page),
-					    bv->bv_len))
-				cidaw += bv->bv_len >> (device->s2b_shift + 9);
+		if (idal_is_needed (page_address(bv->bv_page), bv->bv_len))
+			cidaw += bv->bv_len >> (device->s2b_shift + 9);
 #endif
-		}
 	}
 	/* Paranoia. */
 	if (count != last_rec - first_rec + 1)
@@ -1257,7 +1253,7 @@ dasd_eckd_build_cp(struct dasd_device * 
 		locate_record(ccw++, LO_data++, first_trk, first_offs + 1,
 			      last_rec - recid + 1, cmd, device, blksize);
 	}
-	rq_for_each_bio(bio, req) bio_for_each_segment(bv, bio, i) {
+	rq_for_each_segment(bv, req, iter) {
 		dst = page_address(bv->bv_page) + bv->bv_offset;
 		if (dasd_page_cache) {
 			char *copy = kmem_cache_alloc(dasd_page_cache,
@@ -1328,12 +1324,12 @@ dasd_eckd_free_cp(struct dasd_ccw_req *c
 {
 	struct dasd_eckd_private *private;
 	struct ccw1 *ccw;
-	struct bio *bio;
+	struct req_iterator iter;
 	struct bio_vec *bv;
 	char *dst, *cda;
 	unsigned int blksize, blk_per_trk, off;
 	sector_t recid;
-	int i, status;
+	int status;
 
 	if (!dasd_page_cache)
 		goto out;
@@ -1346,7 +1342,7 @@ dasd_eckd_free_cp(struct dasd_ccw_req *c
 	ccw++;
 	if (private->uses_cdl == 0 || recid > 2*blk_per_trk)
 		ccw++;
-	rq_for_each_bio(bio, req) bio_for_each_segment(bv, bio, i) {
+	rq_for_each_segment(bv, req, iter) {
 		dst = page_address(bv->bv_page) + bv->bv_offset;
 		for (off = 0; off < bv->bv_len; off += blksize) {
 			/* Skip locate record. */
diff -puN drivers/s390/block/dasd_fba.c~git-block drivers/s390/block/dasd_fba.c
--- a/drivers/s390/block/dasd_fba.c~git-block
+++ a/drivers/s390/block/dasd_fba.c
@@ -234,14 +234,13 @@ dasd_fba_build_cp(struct dasd_device * d
 	struct LO_fba_data *LO_data;
 	struct dasd_ccw_req *cqr;
 	struct ccw1 *ccw;
-	struct bio *bio;
+	struct req_iterator iter;
 	struct bio_vec *bv;
 	char *dst;
 	int count, cidaw, cplength, datasize;
 	sector_t recid, first_rec, last_rec;
 	unsigned int blksize, off;
 	unsigned char cmd;
-	int i;
 
 	private = (struct dasd_fba_private *) device->private;
 	if (rq_data_dir(req) == READ) {
@@ -257,18 +256,15 @@ dasd_fba_build_cp(struct dasd_device * d
 	/* Check struct bio and count the number of blocks for the request. */
 	count = 0;
 	cidaw = 0;
-	rq_for_each_bio(bio, req) {
-		bio_for_each_segment(bv, bio, i) {
-			if (bv->bv_len & (blksize - 1))
-				/* Fba can only do full blocks. */
-				return ERR_PTR(-EINVAL);
-			count += bv->bv_len >> (device->s2b_shift + 9);
+	rq_for_each_segment(bv, req, iter) {
+		if (bv->bv_len & (blksize - 1))
+			/* Fba can only do full blocks. */
+			return ERR_PTR(-EINVAL);
+		count += bv->bv_len >> (device->s2b_shift + 9);
 #if defined(CONFIG_64BIT)
-			if (idal_is_needed (page_address(bv->bv_page),
-					    bv->bv_len))
-				cidaw += bv->bv_len / blksize;
+		if (idal_is_needed (page_address(bv->bv_page), bv->bv_len))
+			cidaw += bv->bv_len / blksize;
 #endif
-		}
 	}
 	/* Paranoia. */
 	if (count != last_rec - first_rec + 1)
@@ -304,7 +300,7 @@ dasd_fba_build_cp(struct dasd_device * d
 		locate_record(ccw++, LO_data++, rq_data_dir(req), 0, count);
 	}
 	recid = first_rec;
-	rq_for_each_bio(bio, req) bio_for_each_segment(bv, bio, i) {
+	rq_for_each_segment(bv, req, iter) {
 		dst = page_address(bv->bv_page) + bv->bv_offset;
 		if (dasd_page_cache) {
 			char *copy = kmem_cache_alloc(dasd_page_cache,
@@ -359,11 +355,11 @@ dasd_fba_free_cp(struct dasd_ccw_req *cq
 {
 	struct dasd_fba_private *private;
 	struct ccw1 *ccw;
-	struct bio *bio;
+	struct req_iterator iter;
 	struct bio_vec *bv;
 	char *dst, *cda;
 	unsigned int blksize, off;
-	int i, status;
+	int status;
 
 	if (!dasd_page_cache)
 		goto out;
@@ -374,7 +370,7 @@ dasd_fba_free_cp(struct dasd_ccw_req *cq
 	ccw++;
 	if (private->rdc_data.mode.bits.data_chain != 0)
 		ccw++;
-	rq_for_each_bio(bio, req) bio_for_each_segment(bv, bio, i) {
+	rq_for_each_segment(bv, req, iter) {
 		dst = page_address(bv->bv_page) + bv->bv_offset;
 		for (off = 0; off < bv->bv_len; off += blksize) {
 			/* Skip locate record. */
diff -puN drivers/s390/char/tape_34xx.c~git-block drivers/s390/char/tape_34xx.c
--- a/drivers/s390/char/tape_34xx.c~git-block
+++ a/drivers/s390/char/tape_34xx.c
@@ -1134,21 +1134,18 @@ tape_34xx_bread(struct tape_device *devi
 {
 	struct tape_request *request;
 	struct ccw1 *ccw;
-	int count = 0, i;
+	int count = 0;
 	unsigned off;
 	char *dst;
 	struct bio_vec *bv;
-	struct bio *bio;
+	struct req_iterator iter;
 	struct tape_34xx_block_id *	start_block;
 
 	DBF_EVENT(6, "xBREDid:");
 
 	/* Count the number of blocks for the request. */
-	rq_for_each_bio(bio, req) {
-		bio_for_each_segment(bv, bio, i) {
-			count += bv->bv_len >> (TAPEBLOCK_HSEC_S2B + 9);
-		}
-	}
+	rq_for_each_segment(bv, req, iter)
+		count += bv->bv_len >> (TAPEBLOCK_HSEC_S2B + 9);
 
 	/* Allocate the ccw request. */
 	request = tape_alloc_request(3+count+1, 8);
@@ -1175,18 +1172,15 @@ tape_34xx_bread(struct tape_device *devi
 	ccw = tape_ccw_cc(ccw, NOP, 0, NULL);
 	ccw = tape_ccw_cc(ccw, NOP, 0, NULL);
 
-	rq_for_each_bio(bio, req) {
-		bio_for_each_segment(bv, bio, i) {
-			dst = kmap(bv->bv_page) + bv->bv_offset;
-			for (off = 0; off < bv->bv_len;
-			     off += TAPEBLOCK_HSEC_SIZE) {
-				ccw->flags = CCW_FLAG_CC;
-				ccw->cmd_code = READ_FORWARD;
-				ccw->count = TAPEBLOCK_HSEC_SIZE;
-				set_normalized_cda(ccw, (void*) __pa(dst));
-				ccw++;
-				dst += TAPEBLOCK_HSEC_SIZE;
-			}
+	rq_for_each_segment(bv, req, iter) {
+		dst = kmap(bv->bv_page) + bv->bv_offset;
+		for (off = 0; off < bv->bv_len; off += TAPEBLOCK_HSEC_SIZE) {
+			ccw->flags = CCW_FLAG_CC;
+			ccw->cmd_code = READ_FORWARD;
+			ccw->count = TAPEBLOCK_HSEC_SIZE;
+			set_normalized_cda(ccw, (void*) __pa(dst));
+			ccw++;
+			dst += TAPEBLOCK_HSEC_SIZE;
 		}
 	}
 
diff -puN drivers/s390/char/tape_3590.c~git-block drivers/s390/char/tape_3590.c
--- a/drivers/s390/char/tape_3590.c~git-block
+++ a/drivers/s390/char/tape_3590.c
@@ -623,21 +623,19 @@ tape_3590_bread(struct tape_device *devi
 {
 	struct tape_request *request;
 	struct ccw1 *ccw;
-	int count = 0, start_block, i;
+	int count = 0, start_block;
 	unsigned off;
 	char *dst;
 	struct bio_vec *bv;
-	struct bio *bio;
+	struct req_iterator iter;
 
 	DBF_EVENT(6, "xBREDid:");
 	start_block = req->sector >> TAPEBLOCK_HSEC_S2B;
 	DBF_EVENT(6, "start_block = %i\n", start_block);
 
-	rq_for_each_bio(bio, req) {
-		bio_for_each_segment(bv, bio, i) {
-			count += bv->bv_len >> (TAPEBLOCK_HSEC_S2B + 9);
-		}
-	}
+	rq_for_each_segment(bv, req, iter)
+		count += bv->bv_len >> (TAPEBLOCK_HSEC_S2B + 9);
+
 	request = tape_alloc_request(2 + count + 1, 4);
 	if (IS_ERR(request))
 		return request;
@@ -653,21 +651,18 @@ tape_3590_bread(struct tape_device *devi
 	 */
 	ccw = tape_ccw_cc(ccw, NOP, 0, NULL);
 
-	rq_for_each_bio(bio, req) {
-		bio_for_each_segment(bv, bio, i) {
-			dst = page_address(bv->bv_page) + bv->bv_offset;
-			for (off = 0; off < bv->bv_len;
-			     off += TAPEBLOCK_HSEC_SIZE) {
-				ccw->flags = CCW_FLAG_CC;
-				ccw->cmd_code = READ_FORWARD;
-				ccw->count = TAPEBLOCK_HSEC_SIZE;
-				set_normalized_cda(ccw, (void *) __pa(dst));
-				ccw++;
-				dst += TAPEBLOCK_HSEC_SIZE;
-			}
-			if (off > bv->bv_len)
-				BUG();
+	rq_for_each_segment(bv, req, iter) {
+		dst = page_address(bv->bv_page) + bv->bv_offset;
+		for (off = 0; off < bv->bv_len; off += TAPEBLOCK_HSEC_SIZE) {
+			ccw->flags = CCW_FLAG_CC;
+			ccw->cmd_code = READ_FORWARD;
+			ccw->count = TAPEBLOCK_HSEC_SIZE;
+			set_normalized_cda(ccw, (void *) __pa(dst));
+			ccw++;
+			dst += TAPEBLOCK_HSEC_SIZE;
 		}
+		if (off > bv->bv_len)
+			BUG();
 	}
 	ccw = tape_ccw_end(ccw, NOP, 0, NULL);
 	DBF_EVENT(6, "xBREDccwg\n");
diff -puN drivers/s390/scsi/zfcp_def.h~git-block drivers/s390/scsi/zfcp_def.h
--- a/drivers/s390/scsi/zfcp_def.h~git-block
+++ a/drivers/s390/scsi/zfcp_def.h
@@ -34,6 +34,7 @@
 #include <linux/slab.h>
 #include <linux/mempool.h>
 #include <linux/syscalls.h>
+#include <linux/scatterlist.h>
 #include <linux/ioctl.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_tcq.h>
diff -puN drivers/s390/scsi/zfcp_qdio.c~git-block drivers/s390/scsi/zfcp_qdio.c
--- a/drivers/s390/scsi/zfcp_qdio.c~git-block
+++ a/drivers/s390/scsi/zfcp_qdio.c
@@ -590,7 +590,7 @@ zfcp_qdio_sbals_from_segment(struct zfcp
  */
 int
 zfcp_qdio_sbals_from_sg(struct zfcp_fsf_req *fsf_req, unsigned long sbtype,
-                        struct scatterlist *sg,	int sg_count, int max_sbals)
+                        struct scatterlist *sgl, int sg_count, int max_sbals)
 {
 	int sg_index;
 	struct scatterlist *sg_segment;
@@ -606,9 +606,7 @@ zfcp_qdio_sbals_from_sg(struct zfcp_fsf_
 	sbale->flags |= sbtype;
 
 	/* process all segements of scatter-gather list */
-	for (sg_index = 0, sg_segment = sg, bytes = 0;
-	     sg_index < sg_count;
-	     sg_index++, sg_segment++) {
+	for_each_sg(sgl, sg_segment, sg_count, sg_index) {
 		retval = zfcp_qdio_sbals_from_segment(
 				fsf_req,
 				sbtype,
diff -puN drivers/scsi/3w-9xxx.c~git-block drivers/scsi/3w-9xxx.c
--- a/drivers/scsi/3w-9xxx.c~git-block
+++ a/drivers/scsi/3w-9xxx.c
@@ -1990,6 +1990,7 @@ static struct scsi_host_template driver_
 	.max_sectors		= TW_MAX_SECTORS,
 	.cmd_per_lun		= TW_MAX_CMDS_PER_LUN,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.shost_attrs		= twa_host_attrs,
 	.emulated		= 1
 };
diff -puN drivers/scsi/3w-xxxx.c~git-block drivers/scsi/3w-xxxx.c
--- a/drivers/scsi/3w-xxxx.c~git-block
+++ a/drivers/scsi/3w-xxxx.c
@@ -2261,6 +2261,7 @@ static struct scsi_host_template driver_
 	.max_sectors		= TW_MAX_SECTORS,
 	.cmd_per_lun		= TW_MAX_CMDS_PER_LUN,	
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.shost_attrs		= tw_host_attrs,
 	.emulated		= 1
 };
diff -puN drivers/scsi/BusLogic.c~git-block drivers/scsi/BusLogic.c
--- a/drivers/scsi/BusLogic.c~git-block
+++ a/drivers/scsi/BusLogic.c
@@ -3575,6 +3575,7 @@ static struct scsi_host_template Bus_Log
 	.unchecked_isa_dma = 1,
 	.max_sectors = 128,
 	.use_clustering = ENABLE_CLUSTERING,
+	.use_sg_chaining = ENABLE_SG_CHAINING,
 };
 
 /*
diff -puN drivers/scsi/NCR53c406a.c~git-block drivers/scsi/NCR53c406a.c
--- a/drivers/scsi/NCR53c406a.c~git-block
+++ a/drivers/scsi/NCR53c406a.c
@@ -1066,7 +1066,8 @@ static struct scsi_host_template driver_
      .sg_tablesize      	= 32			/*SG_ALL*/ /*SG_NONE*/, 
      .cmd_per_lun       	= 1			/* commands per lun */, 
      .unchecked_isa_dma 	= 1			/* unchecked_isa_dma */,
-     .use_clustering    	= ENABLE_CLUSTERING                               
+     .use_clustering    	= ENABLE_CLUSTERING,
+     .use_sg_chaining           = ENABLE_SG_CHAINING,
 };
 
 #include "scsi_module.c"
diff -puN drivers/scsi/a100u2w.c~git-block drivers/scsi/a100u2w.c
--- a/drivers/scsi/a100u2w.c~git-block
+++ a/drivers/scsi/a100u2w.c
@@ -1071,6 +1071,7 @@ static struct scsi_host_template inia100
 	.sg_tablesize		= SG_ALL,
 	.cmd_per_lun 		= 1,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 static int __devinit inia100_probe_one(struct pci_dev *pdev,
diff -puN drivers/scsi/aacraid/linit.c~git-block drivers/scsi/aacraid/linit.c
--- a/drivers/scsi/aacraid/linit.c~git-block
+++ a/drivers/scsi/aacraid/linit.c
@@ -944,6 +944,7 @@ static struct scsi_host_template aac_dri
 	.cmd_per_lun    		= AAC_NUM_IO_FIB, 
 #endif	
 	.use_clustering			= ENABLE_CLUSTERING,
+	.use_sg_chaining		= ENABLE_SG_CHAINING,
 	.emulated                       = 1,
 };
 
diff -puN drivers/scsi/advansys.c~git-block drivers/scsi/advansys.c
--- a/drivers/scsi/advansys.c~git-block
+++ a/drivers/scsi/advansys.c
@@ -3732,7 +3732,7 @@ static int asc_build_req(asc_board_t *bo
 		 */
 		int sgcnt;
 		int use_sg;
-		struct scatterlist *slp;
+		struct scatterlist *slp, *sg;
 
 		slp = (struct scatterlist *)scp->request_buffer;
 		use_sg = dma_map_sg(boardp->dev, slp, scp->use_sg,
@@ -3768,13 +3768,13 @@ static int asc_build_req(asc_board_t *bo
 		/*
 		 * Convert scatter-gather list into ASC_SG_HEAD list.
 		 */
-		for (sgcnt = 0; sgcnt < use_sg; sgcnt++, slp++) {
+		scsi_for_each_sg(scp, sg, use_sg, sgcnt) {
 			asc_sg_head.sg_list[sgcnt].addr =
-			    cpu_to_le32(sg_dma_address(slp));
+			    cpu_to_le32(sg_dma_address(sg));
 			asc_sg_head.sg_list[sgcnt].bytes =
-			    cpu_to_le32(sg_dma_len(slp));
+			    cpu_to_le32(sg_dma_len(sg));
 			ASC_STATS_ADD(scp->device->host, sg_xfer,
-				      ASC_CEILING(sg_dma_len(slp), 512));
+				      ASC_CEILING(sg_dma_len(sg), 512));
 		}
 	}
 
@@ -4047,7 +4047,7 @@ adv_get_sglist(asc_board_t *boardp, adv_
 				sg_block->sg_ptr = 0L;	/* Last ADV_SG_BLOCK in list. */
 				return ADV_SUCCESS;
 			}
-			slp++;
+			slp = sg_next(slp);
 		}
 		sg_block->sg_cnt = NO_OF_SG_PER_BLOCK;
 		prev_sg_block = sg_block;
diff -puN drivers/scsi/aha1542.c~git-block drivers/scsi/aha1542.c
--- a/drivers/scsi/aha1542.c~git-block
+++ a/drivers/scsi/aha1542.c
@@ -61,15 +61,15 @@ static void BAD_DMA(void *address, unsig
 }
 
 static void BAD_SG_DMA(Scsi_Cmnd * SCpnt,
-		       struct scatterlist *sgpnt,
+		       struct scatterlist *sgp,
 		       int nseg,
 		       int badseg)
 {
 	printk(KERN_CRIT "sgpnt[%d:%d] page %p/0x%llx length %u\n",
 	       badseg, nseg,
-	       page_address(sgpnt[badseg].page) + sgpnt[badseg].offset,
-	       (unsigned long long)SCSI_SG_PA(&sgpnt[badseg]),
-	       sgpnt[badseg].length);
+	       page_address(sgp->page) + sgp->offset,
+	       (unsigned long long)SCSI_SG_PA(sgp),
+	       sgp->length);
 
 	/*
 	 * Not safe to continue.
@@ -691,7 +691,7 @@ static int aha1542_queuecommand(Scsi_Cmn
 	memcpy(ccb[mbo].cdb, cmd, ccb[mbo].cdblen);
 
 	if (SCpnt->use_sg) {
-		struct scatterlist *sgpnt;
+		struct scatterlist *sg;
 		struct chain *cptr;
 #ifdef DEBUG
 		unsigned char *ptr;
@@ -699,23 +699,21 @@ static int aha1542_queuecommand(Scsi_Cmn
 		int i;
 		ccb[mbo].op = 2;	/* SCSI Initiator Command  w/scatter-gather */
 		SCpnt->host_scribble = kmalloc(512, GFP_KERNEL | GFP_DMA);
-		sgpnt = (struct scatterlist *) SCpnt->request_buffer;
 		cptr = (struct chain *) SCpnt->host_scribble;
 		if (cptr == NULL) {
 			/* free the claimed mailbox slot */
 			HOSTDATA(SCpnt->device->host)->SCint[mbo] = NULL;
 			return SCSI_MLQUEUE_HOST_BUSY;
 		}
-		for (i = 0; i < SCpnt->use_sg; i++) {
-			if (sgpnt[i].length == 0 || SCpnt->use_sg > 16 ||
-			    (((int) sgpnt[i].offset) & 1) || (sgpnt[i].length & 1)) {
+		scsi_for_each_sg(SCpnt, sg, SCpnt->use_sg, i) {
+			if (sg->length == 0 || SCpnt->use_sg > 16 ||
+			    (((int) sg->offset) & 1) || (sg->length & 1)) {
 				unsigned char *ptr;
 				printk(KERN_CRIT "Bad segment list supplied to aha1542.c (%d, %d)\n", SCpnt->use_sg, i);
-				for (i = 0; i < SCpnt->use_sg; i++) {
+				scsi_for_each_sg(SCpnt, sg, SCpnt->use_sg, i) {
 					printk(KERN_CRIT "%d: %p %d\n", i,
-					       (page_address(sgpnt[i].page) +
-						sgpnt[i].offset),
-					       sgpnt[i].length);
+					       (page_address(sg->page) +
+						sg->offset), sg->length);
 				};
 				printk(KERN_CRIT "cptr %x: ", (unsigned int) cptr);
 				ptr = (unsigned char *) &cptr[i];
@@ -723,10 +721,10 @@ static int aha1542_queuecommand(Scsi_Cmn
 					printk("%02x ", ptr[i]);
 				panic("Foooooooood fight!");
 			};
-			any2scsi(cptr[i].dataptr, SCSI_SG_PA(&sgpnt[i]));
-			if (SCSI_SG_PA(&sgpnt[i]) + sgpnt[i].length - 1 > ISA_DMA_THRESHOLD)
-				BAD_SG_DMA(SCpnt, sgpnt, SCpnt->use_sg, i);
-			any2scsi(cptr[i].datalen, sgpnt[i].length);
+			any2scsi(cptr[i].dataptr, SCSI_SG_PA(sg));
+			if (SCSI_SG_PA(sg) + sg->length - 1 > ISA_DMA_THRESHOLD)
+				BAD_SG_DMA(SCpnt, sg, SCpnt->use_sg, i);
+			any2scsi(cptr[i].datalen, sg->length);
 		};
 		any2scsi(ccb[mbo].datalen, SCpnt->use_sg * sizeof(struct chain));
 		any2scsi(ccb[mbo].dataptr, SCSI_BUF_PA(cptr));
diff -puN drivers/scsi/aha1740.c~git-block drivers/scsi/aha1740.c
--- a/drivers/scsi/aha1740.c~git-block
+++ a/drivers/scsi/aha1740.c
@@ -563,6 +563,7 @@ static struct scsi_host_template aha1740
 	.sg_tablesize     = AHA1740_SCATTER,
 	.cmd_per_lun      = AHA1740_CMDLUN,
 	.use_clustering   = ENABLE_CLUSTERING,
+	.use_sg_chaining  = ENABLE_SG_CHAINING,
 	.eh_abort_handler = aha1740_eh_abort_handler,
 };
 
diff -puN drivers/scsi/aic7xxx/aic79xx_osm.c~git-block drivers/scsi/aic7xxx/aic79xx_osm.c
--- a/drivers/scsi/aic7xxx/aic79xx_osm.c~git-block
+++ a/drivers/scsi/aic7xxx/aic79xx_osm.c
@@ -766,6 +766,7 @@ struct scsi_host_template aic79xx_driver
 	.max_sectors		= 8192,
 	.cmd_per_lun		= 2,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.slave_alloc		= ahd_linux_slave_alloc,
 	.slave_configure	= ahd_linux_slave_configure,
 	.target_alloc		= ahd_linux_target_alloc,
diff -puN drivers/scsi/aic7xxx/aic7xxx_osm.c~git-block drivers/scsi/aic7xxx/aic7xxx_osm.c
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c~git-block
+++ a/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -747,6 +747,7 @@ struct scsi_host_template aic7xxx_driver
 	.max_sectors		= 8192,
 	.cmd_per_lun		= 2,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.slave_alloc		= ahc_linux_slave_alloc,
 	.slave_configure	= ahc_linux_slave_configure,
 	.target_alloc		= ahc_linux_target_alloc,
diff -puN drivers/scsi/aic7xxx_old.c~git-block drivers/scsi/aic7xxx_old.c
--- a/drivers/scsi/aic7xxx_old.c~git-block
+++ a/drivers/scsi/aic7xxx_old.c
@@ -11142,6 +11142,7 @@ static struct scsi_host_template driver_
 	.max_sectors		= 2048,
 	.cmd_per_lun		= 3,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 #include "scsi_module.c"
diff -puN drivers/scsi/aic94xx/aic94xx_task.c~git-block drivers/scsi/aic94xx/aic94xx_task.c
--- a/drivers/scsi/aic94xx/aic94xx_task.c~git-block
+++ a/drivers/scsi/aic94xx/aic94xx_task.c
@@ -94,7 +94,7 @@ static inline int asd_map_scatterlist(st
 			res = -ENOMEM;
 			goto err_unmap;
 		}
-		for (sc = task->scatter, i = 0; i < num_sg; i++, sc++) {
+		for_each_sg(task->scatter, sc, num_sg, i) {
 			struct sg_el *sg =
 				&((struct sg_el *)ascb->sg_arr->vaddr)[i];
 			sg->bus_addr = cpu_to_le64((u64)sg_dma_address(sc));
@@ -103,7 +103,7 @@ static inline int asd_map_scatterlist(st
 				sg->flags |= ASD_SG_EL_LIST_EOL;
 		}
 
-		for (sc = task->scatter, i = 0; i < 2; i++, sc++) {
+		for_each_sg(task->scatter, sc, 2, i) {
 			sg_arr[i].bus_addr =
 				cpu_to_le64((u64)sg_dma_address(sc));
 			sg_arr[i].size = cpu_to_le32((u32)sg_dma_len(sc));
@@ -115,7 +115,7 @@ static inline int asd_map_scatterlist(st
 		sg_arr[2].bus_addr=cpu_to_le64((u64)ascb->sg_arr->dma_handle);
 	} else {
 		int i;
-		for (sc = task->scatter, i = 0; i < num_sg; i++, sc++) {
+		for_each_sg(task->scatter, sc, num_sg, i) {
 			sg_arr[i].bus_addr =
 				cpu_to_le64((u64)sg_dma_address(sc));
 			sg_arr[i].size = cpu_to_le32((u32)sg_dma_len(sc));
diff -puN drivers/scsi/arcmsr/arcmsr_hba.c~git-block drivers/scsi/arcmsr/arcmsr_hba.c
--- a/drivers/scsi/arcmsr/arcmsr_hba.c~git-block
+++ a/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -122,6 +122,7 @@ static struct scsi_host_template arcmsr_
 	.max_sectors    	= ARCMSR_MAX_XFER_SECTORS,
 	.cmd_per_lun		= ARCMSR_MAX_CMD_PERLUN,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.shost_attrs		= arcmsr_host_attrs,
 };
 #ifdef CONFIG_SCSI_ARCMSR_AER
diff -puN drivers/scsi/dc395x.c~git-block drivers/scsi/dc395x.c
--- a/drivers/scsi/dc395x.c~git-block
+++ a/drivers/scsi/dc395x.c
@@ -4765,6 +4765,7 @@ static struct scsi_host_template dc395x_
 	.eh_bus_reset_handler   = dc395x_eh_bus_reset,
 	.unchecked_isa_dma      = 0,
 	.use_clustering         = DISABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 
diff -puN drivers/scsi/dpt_i2o.c~git-block drivers/scsi/dpt_i2o.c
--- a/drivers/scsi/dpt_i2o.c~git-block
+++ a/drivers/scsi/dpt_i2o.c
@@ -3295,6 +3295,7 @@ static struct scsi_host_template adpt_te
 	.this_id		= 7,
 	.cmd_per_lun		= 1,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 static s32 adpt_scsi_register(adpt_hba* pHba)
diff -puN drivers/scsi/eata.c~git-block drivers/scsi/eata.c
--- a/drivers/scsi/eata.c~git-block
+++ a/drivers/scsi/eata.c
@@ -523,7 +523,8 @@ static struct scsi_host_template driver_
 	.slave_configure = eata2x_slave_configure,
 	.this_id = 7,
 	.unchecked_isa_dma = 1,
-	.use_clustering = ENABLE_CLUSTERING
+	.use_clustering = ENABLE_CLUSTERING,
+	.use_sg_chaining = ENABLE_SG_CHAINING,
 };
 
 #if !defined(__BIG_ENDIAN_BITFIELD) && !defined(__LITTLE_ENDIAN_BITFIELD)
diff -puN drivers/scsi/gdth.c~git-block drivers/scsi/gdth.c
--- a/drivers/scsi/gdth.c~git-block
+++ a/drivers/scsi/gdth.c
@@ -2651,7 +2651,7 @@ static void gdth_copy_internal_data(int 
 {
     ushort cpcount,i;
     ushort cpsum,cpnow;
-    struct scatterlist *sl;
+    struct scatterlist *sl, *sg;
     gdth_ha_str *ha;
     char *address;
 
@@ -2660,29 +2660,30 @@ static void gdth_copy_internal_data(int 
 
     if (scp->use_sg) {
         sl = (struct scatterlist *)scp->request_buffer;
-        for (i=0,cpsum=0; i<scp->use_sg; ++i,++sl) {
+	cpsum = 0;
+	for_each_sg(sl, sg, scp->use_sg, i) {
             unsigned long flags;
-            cpnow = (ushort)sl->length;
+            cpnow = (ushort)sg->length;
             TRACE(("copy_internal() now %d sum %d count %d %d\n",
                           cpnow,cpsum,cpcount,(ushort)scp->bufflen));
             if (cpsum+cpnow > cpcount) 
                 cpnow = cpcount - cpsum;
             cpsum += cpnow;
-            if (!sl->page) {
+            if (!sg->page) {
                 printk("GDT-HA %d: invalid sc/gt element in gdth_copy_internal_data()\n",
                        hanum);
                 return;
             }
             local_irq_save(flags);
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-            address = kmap_atomic(sl->page, KM_BIO_SRC_IRQ) + sl->offset;
+            address = kmap_atomic(sg->page, KM_BIO_SRC_IRQ) + sg->offset;
             memcpy(address,buffer,cpnow);
-            flush_dcache_page(sl->page);
+            flush_dcache_page(sg->page);
             kunmap_atomic(address, KM_BIO_SRC_IRQ);
 #else
-            address = kmap_atomic(sl->page, KM_BH_IRQ) + sl->offset;
+            address = kmap_atomic(sg->page, KM_BH_IRQ) + sg->offset;
             memcpy(address,buffer,cpnow);
-            flush_dcache_page(sl->page);
+            flush_dcache_page(sg->page);
             kunmap_atomic(address, KM_BH_IRQ);
 #endif
             local_irq_restore(flags);
@@ -2802,7 +2803,7 @@ static int gdth_fill_cache_cmd(int hanum
 {
     register gdth_ha_str *ha;
     register gdth_cmd_str *cmdp;
-    struct scatterlist *sl;
+    struct scatterlist *sl, *sg;
     ulong32 cnt, blockcnt;
     ulong64 no, blockno;
     dma_addr_t phys_addr;
@@ -2908,25 +2909,25 @@ static int gdth_fill_cache_cmd(int hanum
             if (mode64) {
                 cmdp->u.cache64.DestAddr= (ulong64)-1;
                 cmdp->u.cache64.sg_canz = sgcnt;
-                for (i=0; i<sgcnt; ++i,++sl) {
-                    cmdp->u.cache64.sg_lst[i].sg_ptr = sg_dma_address(sl);
+		for_each_sg(sl, sg, sgcnt, i) {
+                    cmdp->u.cache64.sg_lst[i].sg_ptr = sg_dma_address(sg);
 #ifdef GDTH_DMA_STATISTICS
                     if (cmdp->u.cache64.sg_lst[i].sg_ptr > (ulong64)0xffffffff)
                         ha->dma64_cnt++;
                     else
                         ha->dma32_cnt++;
 #endif
-                    cmdp->u.cache64.sg_lst[i].sg_len = sg_dma_len(sl);
+                    cmdp->u.cache64.sg_lst[i].sg_len = sg_dma_len(sg);
                 }
             } else {
                 cmdp->u.cache.DestAddr= 0xffffffff;
                 cmdp->u.cache.sg_canz = sgcnt;
-                for (i=0; i<sgcnt; ++i,++sl) {
-                    cmdp->u.cache.sg_lst[i].sg_ptr = sg_dma_address(sl);
+		for_each_sg(sl, sg, sgcnt, i) {
+                    cmdp->u.cache.sg_lst[i].sg_ptr = sg_dma_address(sg);
 #ifdef GDTH_DMA_STATISTICS
                     ha->dma32_cnt++;
 #endif
-                    cmdp->u.cache.sg_lst[i].sg_len = sg_dma_len(sl);
+                    cmdp->u.cache.sg_lst[i].sg_len = sg_dma_len(sg);
                 }
             }
 
@@ -3012,7 +3013,7 @@ static int gdth_fill_raw_cmd(int hanum,S
 {
     register gdth_ha_str *ha;
     register gdth_cmd_str *cmdp;
-    struct scatterlist *sl;
+    struct scatterlist *sl, *sg;
     ushort i;
     dma_addr_t phys_addr, sense_paddr;
     int cmd_index, sgcnt, mode64;
@@ -3115,25 +3116,25 @@ static int gdth_fill_raw_cmd(int hanum,S
             if (mode64) {
                 cmdp->u.raw64.sdata = (ulong64)-1;
                 cmdp->u.raw64.sg_ranz = sgcnt;
-                for (i=0; i<sgcnt; ++i,++sl) {
-                    cmdp->u.raw64.sg_lst[i].sg_ptr = sg_dma_address(sl);
+		for_each_sg(sl, sg, sgcnt, i) {
+                    cmdp->u.raw64.sg_lst[i].sg_ptr = sg_dma_address(sg);
 #ifdef GDTH_DMA_STATISTICS
                     if (cmdp->u.raw64.sg_lst[i].sg_ptr > (ulong64)0xffffffff)
                         ha->dma64_cnt++;
                     else
                         ha->dma32_cnt++;
 #endif
-                    cmdp->u.raw64.sg_lst[i].sg_len = sg_dma_len(sl);
+                    cmdp->u.raw64.sg_lst[i].sg_len = sg_dma_len(sg);
                 }
             } else {
                 cmdp->u.raw.sdata = 0xffffffff;
                 cmdp->u.raw.sg_ranz = sgcnt;
-                for (i=0; i<sgcnt; ++i,++sl) {
-                    cmdp->u.raw.sg_lst[i].sg_ptr = sg_dma_address(sl);
+		for_each_sg(sl, sg, sgcnt, i) {
+                    cmdp->u.raw.sg_lst[i].sg_ptr = sg_dma_address(sg);
 #ifdef GDTH_DMA_STATISTICS
                     ha->dma32_cnt++;
 #endif
-                    cmdp->u.raw.sg_lst[i].sg_len = sg_dma_len(sl);
+                    cmdp->u.raw.sg_lst[i].sg_len = sg_dma_len(sg);
                 }
             }
 
diff -puN drivers/scsi/hosts.c~git-block drivers/scsi/hosts.c
diff -puN drivers/scsi/hptiop.c~git-block drivers/scsi/hptiop.c
--- a/drivers/scsi/hptiop.c~git-block
+++ a/drivers/scsi/hptiop.c
@@ -655,6 +655,7 @@ static struct scsi_host_template driver_
 	.unchecked_isa_dma          = 0,
 	.emulated                   = 0,
 	.use_clustering             = ENABLE_CLUSTERING,
+	.use_sg_chaining            = ENABLE_SG_CHAINING,
 	.proc_name                  = driver_name,
 	.shost_attrs                = hptiop_attrs,
 	.this_id                    = -1,
diff -puN drivers/scsi/ibmmca.c~git-block drivers/scsi/ibmmca.c
--- a/drivers/scsi/ibmmca.c~git-block
+++ a/drivers/scsi/ibmmca.c
@@ -1501,6 +1501,7 @@ static struct scsi_host_template ibmmca_
           .sg_tablesize   = 16,
           .cmd_per_lun    = 1,
           .use_clustering = ENABLE_CLUSTERING,
+          .use_sg_chaining = ENABLE_SG_CHAINING,
 };
 
 static int ibmmca_probe(struct device *dev)
diff -puN drivers/scsi/ibmvscsi/ibmvscsi.c~git-block drivers/scsi/ibmvscsi/ibmvscsi.c
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c~git-block
+++ a/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -1548,6 +1548,7 @@ static struct scsi_host_template driver_
 	.this_id = -1,
 	.sg_tablesize = SG_ALL,
 	.use_clustering = ENABLE_CLUSTERING,
+	.use_sg_chaining = ENABLE_SG_CHAINING,
 	.shost_attrs = ibmvscsi_attrs,
 };
 
diff -puN drivers/scsi/ide-scsi.c~git-block drivers/scsi/ide-scsi.c
--- a/drivers/scsi/ide-scsi.c~git-block
+++ a/drivers/scsi/ide-scsi.c
@@ -70,6 +70,7 @@ typedef struct idescsi_pc_s {
 	u8 *buffer;				/* Data buffer */
 	u8 *current_position;			/* Pointer into the above buffer */
 	struct scatterlist *sg;			/* Scatter gather table */
+	struct scatterlist *last_sg;		/* Last sg element */
 	int b_count;				/* Bytes transferred from current entry */
 	struct scsi_cmnd *scsi_cmd;		/* SCSI command */
 	void (*done)(struct scsi_cmnd *);	/* Scsi completion routine */
@@ -197,10 +198,17 @@ static void idescsi_input_buffers (ide_d
 		}
 		bcount -= count; pc->b_count += count;
 		if (pc->b_count == pc->sg->length) {
-			pc->sg++;
+			if (pc->sg == pc->last_sg)
+				break;
+			pc->sg = sg_next(pc->sg);
 			pc->b_count = 0;
 		}
 	}
+
+	if (bcount) {
+		printk (KERN_ERR "ide-scsi: scatter gather table too small, discarding data\n");
+		idescsi_discard_data (drive, bcount);
+	}
 }
 
 static void idescsi_output_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsigned int bcount)
@@ -233,10 +241,17 @@ static void idescsi_output_buffers (ide_
 		}
 		bcount -= count; pc->b_count += count;
 		if (pc->b_count == pc->sg->length) {
-			pc->sg++;
+			if (pc->sg == pc->last_sg)
+				break;
+			pc->sg = sg_next(pc->sg);
 			pc->b_count = 0;
 		}
 	}
+
+	if (bcount) {
+		printk (KERN_ERR "ide-scsi: scatter gather table too small, padding with zeros\n");
+		idescsi_output_zeros (drive, bcount);
+	}
 }
 
 static void hexdump(u8 *x, int len)
diff -puN drivers/scsi/initio.c~git-block drivers/scsi/initio.c
--- a/drivers/scsi/initio.c~git-block
+++ a/drivers/scsi/initio.c
@@ -2831,6 +2831,7 @@ static struct scsi_host_template initio_
 	.sg_tablesize		= SG_ALL,
 	.cmd_per_lun		= 1,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 static int initio_probe_one(struct pci_dev *pdev,
diff -puN drivers/scsi/ips.c~git-block drivers/scsi/ips.c
--- a/drivers/scsi/ips.c~git-block
+++ a/drivers/scsi/ips.c
@@ -3252,7 +3252,7 @@ ips_done(ips_ha_t * ha, ips_scb_t * scb)
 		 */
 		if ((scb->breakup) || (scb->sg_break)) {
                         struct scatterlist *sg;
-                        int sg_dma_index, ips_sg_index = 0;
+                        int i, sg_dma_index, ips_sg_index = 0;
 
 			/* we had a data breakup */
 			scb->data_len = 0;
@@ -3261,20 +3261,22 @@ ips_done(ips_ha_t * ha, ips_scb_t * scb)
 
                         /* Spin forward to last dma chunk */
                         sg_dma_index = scb->breakup;
+                        for (i = 0; i < scb->breakup; i++)
+                                sg = sg_next(sg);
 
 			/* Take care of possible partial on last chunk */
                         ips_fill_scb_sg_single(ha,
-                                               sg_dma_address(&sg[sg_dma_index]),
+                                               sg_dma_address(sg),
                                                scb, ips_sg_index++,
-                                               sg_dma_len(&sg[sg_dma_index]));
+                                               sg_dma_len(sg));
 
                         for (; sg_dma_index < scsi_sg_count(scb->scsi_cmd);
-                             sg_dma_index++) {
+                             sg_dma_index++, sg = sg_next(sg)) {
                                 if (ips_fill_scb_sg_single
                                     (ha,
-                                     sg_dma_address(&sg[sg_dma_index]),
+                                     sg_dma_address(sg),
                                      scb, ips_sg_index++,
-                                     sg_dma_len(&sg[sg_dma_index])) < 0)
+                                     sg_dma_len(sg)) < 0)
                                         break;
                         }
 
diff -puN drivers/scsi/lpfc/lpfc_scsi.c~git-block drivers/scsi/lpfc/lpfc_scsi.c
--- a/drivers/scsi/lpfc/lpfc_scsi.c~git-block
+++ a/drivers/scsi/lpfc/lpfc_scsi.c
@@ -1438,6 +1438,7 @@ struct scsi_host_template lpfc_template 
 	.scan_finished		= lpfc_scan_finished,
 	.this_id		= -1,
 	.sg_tablesize		= LPFC_SG_SEG_CNT,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.cmd_per_lun		= LPFC_CMD_PER_LUN,
 	.use_clustering		= ENABLE_CLUSTERING,
 	.shost_attrs		= lpfc_hba_attrs,
@@ -1460,6 +1461,7 @@ struct scsi_host_template lpfc_vport_tem
 	.sg_tablesize		= LPFC_SG_SEG_CNT,
 	.cmd_per_lun		= LPFC_CMD_PER_LUN,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.shost_attrs		= lpfc_vport_attrs,
 	.max_sectors		= 0xFFFF,
 };
diff -puN drivers/scsi/mac53c94.c~git-block drivers/scsi/mac53c94.c
--- a/drivers/scsi/mac53c94.c~git-block
+++ a/drivers/scsi/mac53c94.c
@@ -402,6 +402,7 @@ static struct scsi_host_template mac53c9
 	.sg_tablesize	= SG_ALL,
 	.cmd_per_lun	= 1,
 	.use_clustering	= DISABLE_CLUSTERING,
+	.use_sg_chaining = ENABLE_SG_CHAINING,
 };
 
 static int mac53c94_probe(struct macio_dev *mdev, const struct of_device_id *match)
diff -puN drivers/scsi/megaraid.c~git-block drivers/scsi/megaraid.c
--- a/drivers/scsi/megaraid.c~git-block
+++ a/drivers/scsi/megaraid.c
@@ -4484,6 +4484,7 @@ static struct scsi_host_template megarai
 	.sg_tablesize			= MAX_SGLIST,
 	.cmd_per_lun			= DEF_CMD_PER_LUN,
 	.use_clustering			= ENABLE_CLUSTERING,
+	.use_sg_chaining		= ENABLE_SG_CHAINING,
 	.eh_abort_handler		= megaraid_abort,
 	.eh_device_reset_handler	= megaraid_reset,
 	.eh_bus_reset_handler		= megaraid_reset,
diff -puN drivers/scsi/megaraid/megaraid_mbox.c~git-block drivers/scsi/megaraid/megaraid_mbox.c
--- a/drivers/scsi/megaraid/megaraid_mbox.c~git-block
+++ a/drivers/scsi/megaraid/megaraid_mbox.c
@@ -361,6 +361,7 @@ static struct scsi_host_template megarai
 	.eh_host_reset_handler		= megaraid_reset_handler,
 	.change_queue_depth		= megaraid_change_queue_depth,
 	.use_clustering			= ENABLE_CLUSTERING,
+	.use_sg_chaining		= ENABLE_SG_CHAINING,
 	.sdev_attrs			= megaraid_sdev_attrs,
 	.shost_attrs			= megaraid_shost_attrs,
 };
diff -puN drivers/scsi/megaraid/megaraid_sas.c~git-block drivers/scsi/megaraid/megaraid_sas.c
--- a/drivers/scsi/megaraid/megaraid_sas.c~git-block
+++ a/drivers/scsi/megaraid/megaraid_sas.c
@@ -1110,6 +1110,7 @@ static struct scsi_host_template megasas
 	.eh_timed_out = megasas_reset_timer,
 	.bios_param = megasas_bios_param,
 	.use_clustering = ENABLE_CLUSTERING,
+	.use_sg_chaining = ENABLE_SG_CHAINING,
 };
 
 /**
diff -puN drivers/scsi/mesh.c~git-block drivers/scsi/mesh.c
--- a/drivers/scsi/mesh.c~git-block
+++ a/drivers/scsi/mesh.c
@@ -1843,6 +1843,7 @@ static struct scsi_host_template mesh_te
 	.sg_tablesize			= SG_ALL,
 	.cmd_per_lun			= 2,
 	.use_clustering			= DISABLE_CLUSTERING,
+	.use_sg_chaining		= ENABLE_SG_CHAINING,
 };
 
 static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match)
diff -puN drivers/scsi/nsp32.c~git-block drivers/scsi/nsp32.c
--- a/drivers/scsi/nsp32.c~git-block
+++ a/drivers/scsi/nsp32.c
@@ -281,6 +281,7 @@ static struct scsi_host_template nsp32_t
 	.cmd_per_lun			= 1,
 	.this_id			= NSP32_HOST_SCSIID,
 	.use_clustering			= DISABLE_CLUSTERING,
+	.use_sg_chaining		= ENABLE_SG_CHAINING,
 	.eh_abort_handler       	= nsp32_eh_abort,
 	.eh_bus_reset_handler		= nsp32_eh_bus_reset,
 	.eh_host_reset_handler		= nsp32_eh_host_reset,
diff -puN drivers/scsi/pcmcia/sym53c500_cs.c~git-block drivers/scsi/pcmcia/sym53c500_cs.c
--- a/drivers/scsi/pcmcia/sym53c500_cs.c~git-block
+++ a/drivers/scsi/pcmcia/sym53c500_cs.c
@@ -694,6 +694,7 @@ static struct scsi_host_template sym53c5
      .sg_tablesize		= 32,
      .cmd_per_lun		= 1,
      .use_clustering		= ENABLE_CLUSTERING,
+     .use_sg_chaining		= ENABLE_SG_CHAINING,
      .shost_attrs		= SYM53C500_shost_attrs
 };
 
diff -puN drivers/scsi/qla1280.c~git-block drivers/scsi/qla1280.c
--- a/drivers/scsi/qla1280.c~git-block
+++ a/drivers/scsi/qla1280.c
@@ -2775,7 +2775,7 @@ qla1280_64bit_start_scsi(struct scsi_qla
 	struct device_reg __iomem *reg = ha->iobase;
 	struct scsi_cmnd *cmd = sp->cmd;
 	cmd_a64_entry_t *pkt;
-	struct scatterlist *sg = NULL;
+	struct scatterlist *sg = NULL, *s;
 	__le32 *dword_ptr;
 	dma_addr_t dma_handle;
 	int status = 0;
@@ -2889,13 +2889,16 @@ qla1280_64bit_start_scsi(struct scsi_qla
 	 * Load data segments.
 	 */
 	if (seg_cnt) {	/* If data transfer. */
+		int remseg = seg_cnt;
 		/* Setup packet address segment pointer. */
 		dword_ptr = (u32 *)&pkt->dseg_0_address;
 
 		if (cmd->use_sg) {	/* If scatter gather */
 			/* Load command entry data segments. */
-			for (cnt = 0; cnt < 2 && seg_cnt; cnt++, seg_cnt--) {
-				dma_handle = sg_dma_address(sg);
+			for_each_sg(sg, s, seg_cnt, cnt) {
+				if (cnt == 2)
+					break;
+				dma_handle = sg_dma_address(s);
 #if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_SGI_SN2)
 				if (ha->flags.use_pci_vchannel)
 					sn_pci_set_vchan(ha->pdev,
@@ -2906,12 +2909,12 @@ qla1280_64bit_start_scsi(struct scsi_qla
 					cpu_to_le32(pci_dma_lo32(dma_handle));
 				*dword_ptr++ =
 					cpu_to_le32(pci_dma_hi32(dma_handle));
-				*dword_ptr++ = cpu_to_le32(sg_dma_len(sg));
-				sg++;
+				*dword_ptr++ = cpu_to_le32(sg_dma_len(s));
 				dprintk(3, "S/G Segment phys_addr=%x %x, len=0x%x\n",
 					cpu_to_le32(pci_dma_hi32(dma_handle)),
 					cpu_to_le32(pci_dma_lo32(dma_handle)),
-					cpu_to_le32(sg_dma_len(sg)));
+					cpu_to_le32(sg_dma_len(sg_next(s))));
+				remseg--;
 			}
 			dprintk(5, "qla1280_64bit_start_scsi: Scatter/gather "
 				"command packet data - b %i, t %i, l %i \n",
@@ -2926,7 +2929,9 @@ qla1280_64bit_start_scsi(struct scsi_qla
 			dprintk(3, "S/G Building Continuation...seg_cnt=0x%x "
 				"remains\n", seg_cnt);
 
-			while (seg_cnt > 0) {
+			while (remseg > 0) {
+				/* Update sg start */
+				sg = s;
 				/* Adjust ring index. */
 				ha->req_ring_index++;
 				if (ha->req_ring_index == REQUEST_ENTRY_CNT) {
@@ -2952,9 +2957,10 @@ qla1280_64bit_start_scsi(struct scsi_qla
 					(u32 *)&((struct cont_a64_entry *) pkt)->dseg_0_address;
 
 				/* Load continuation entry data segments. */
-				for (cnt = 0; cnt < 5 && seg_cnt;
-				     cnt++, seg_cnt--) {
-					dma_handle = sg_dma_address(sg);
+				for_each_sg(sg, s, remseg, cnt) {
+					if (cnt == 5)
+						break;
+					dma_handle = sg_dma_address(s);
 #if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_SGI_SN2)
 				if (ha->flags.use_pci_vchannel)
 					sn_pci_set_vchan(ha->pdev, 
@@ -2966,13 +2972,13 @@ qla1280_64bit_start_scsi(struct scsi_qla
 					*dword_ptr++ =
 						cpu_to_le32(pci_dma_hi32(dma_handle));
 					*dword_ptr++ =
-						cpu_to_le32(sg_dma_len(sg));
+						cpu_to_le32(sg_dma_len(s));
 					dprintk(3, "S/G Segment Cont. phys_addr=%x %x, len=0x%x\n",
 						cpu_to_le32(pci_dma_hi32(dma_handle)),
 						cpu_to_le32(pci_dma_lo32(dma_handle)),
-						cpu_to_le32(sg_dma_len(sg)));
-					sg++;
+						cpu_to_le32(sg_dma_len(s)));
 				}
+				remseg -= cnt;
 				dprintk(5, "qla1280_64bit_start_scsi: "
 					"continuation packet data - b %i, t "
 					"%i, l %i \n", SCSI_BUS_32(cmd),
@@ -3062,7 +3068,7 @@ qla1280_32bit_start_scsi(struct scsi_qla
 	struct device_reg __iomem *reg = ha->iobase;
 	struct scsi_cmnd *cmd = sp->cmd;
 	struct cmd_entry *pkt;
-	struct scatterlist *sg = NULL;
+	struct scatterlist *sg = NULL, *s;
 	__le32 *dword_ptr;
 	int status = 0;
 	int cnt;
@@ -3188,6 +3194,7 @@ qla1280_32bit_start_scsi(struct scsi_qla
 	 * Load data segments.
 	 */
 	if (seg_cnt) {
+		int remseg = seg_cnt;
 		/* Setup packet address segment pointer. */
 		dword_ptr = &pkt->dseg_0_address;
 
@@ -3196,22 +3203,25 @@ qla1280_32bit_start_scsi(struct scsi_qla
 			qla1280_dump_buffer(1, (char *)sg, 4 * 16);
 
 			/* Load command entry data segments. */
-			for (cnt = 0; cnt < 4 && seg_cnt; cnt++, seg_cnt--) {
+			for_each_sg(sg, s, seg_cnt, cnt) {
+				if (cnt == 4)
+					break;
 				*dword_ptr++ =
-					cpu_to_le32(pci_dma_lo32(sg_dma_address(sg)));
-				*dword_ptr++ =
-					cpu_to_le32(sg_dma_len(sg));
+					cpu_to_le32(pci_dma_lo32(sg_dma_address(s)));
+				*dword_ptr++ = cpu_to_le32(sg_dma_len(s));
 				dprintk(3, "S/G Segment phys_addr=0x%lx, len=0x%x\n",
-					(pci_dma_lo32(sg_dma_address(sg))),
-					(sg_dma_len(sg)));
-				sg++;
+					(pci_dma_lo32(sg_dma_address(s))),
+					(sg_dma_len(s)));
+				remseg--;
 			}
 			/*
 			 * Build continuation packets.
 			 */
 			dprintk(3, "S/G Building Continuation"
 				"...seg_cnt=0x%x remains\n", seg_cnt);
-			while (seg_cnt > 0) {
+			while (remseg > 0) {
+				/* Continue from end point */
+				sg = s;
 				/* Adjust ring index. */
 				ha->req_ring_index++;
 				if (ha->req_ring_index == REQUEST_ENTRY_CNT) {
@@ -3239,19 +3249,20 @@ qla1280_32bit_start_scsi(struct scsi_qla
 					&((struct cont_entry *) pkt)->dseg_0_address;
 
 				/* Load continuation entry data segments. */
-				for (cnt = 0; cnt < 7 && seg_cnt;
-				     cnt++, seg_cnt--) {
+				for_each_sg(sg, s, remseg, cnt) {
+					if (cnt == 7)
+						break;
 					*dword_ptr++ =
-						cpu_to_le32(pci_dma_lo32(sg_dma_address(sg)));
+						cpu_to_le32(pci_dma_lo32(sg_dma_address(s)));
 					*dword_ptr++ =
-						cpu_to_le32(sg_dma_len(sg));
+						cpu_to_le32(sg_dma_len(s));
 					dprintk(1,
 						"S/G Segment Cont. phys_addr=0x%x, "
 						"len=0x%x\n",
-						cpu_to_le32(pci_dma_lo32(sg_dma_address(sg))),
-						cpu_to_le32(sg_dma_len(sg)));
-					sg++;
+						cpu_to_le32(pci_dma_lo32(sg_dma_address(s))),
+						cpu_to_le32(sg_dma_len(s)));
 				}
+				remseg -= cnt;
 				dprintk(5, "qla1280_32bit_start_scsi: "
 					"continuation packet data - "
 					"scsi(%i:%i:%i)\n", SCSI_BUS_32(cmd),
@@ -4248,6 +4259,7 @@ static struct scsi_host_template qla1280
 	.sg_tablesize		= SG_ALL,
 	.cmd_per_lun		= 1,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 
diff -puN drivers/scsi/qla2xxx/qla_os.c~git-block drivers/scsi/qla2xxx/qla_os.c
--- a/drivers/scsi/qla2xxx/qla_os.c~git-block
+++ a/drivers/scsi/qla2xxx/qla_os.c
@@ -132,6 +132,7 @@ struct scsi_host_template qla2x00_driver
 	.this_id		= -1,
 	.cmd_per_lun		= 3,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.sg_tablesize		= SG_ALL,
 
 	/*
@@ -163,6 +164,7 @@ struct scsi_host_template qla24xx_driver
 	.this_id		= -1,
 	.cmd_per_lun		= 3,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.sg_tablesize		= SG_ALL,
 
 	.max_sectors		= 0xFFFF,
diff -puN drivers/scsi/qla4xxx/ql4_os.c~git-block drivers/scsi/qla4xxx/ql4_os.c
--- a/drivers/scsi/qla4xxx/ql4_os.c~git-block
+++ a/drivers/scsi/qla4xxx/ql4_os.c
@@ -94,6 +94,7 @@ static struct scsi_host_template qla4xxx
 	.this_id		= -1,
 	.cmd_per_lun		= 3,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.sg_tablesize		= SG_ALL,
 
 	.max_sectors		= 0xFFFF,
diff -puN drivers/scsi/qlogicfas.c~git-block drivers/scsi/qlogicfas.c
--- a/drivers/scsi/qlogicfas.c~git-block
+++ a/drivers/scsi/qlogicfas.c
@@ -197,6 +197,7 @@ static struct scsi_host_template qlogicf
 	.sg_tablesize		= SG_ALL,
 	.cmd_per_lun		= 1,
 	.use_clustering		= DISABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 static __init int qlogicfas_init(void)
diff -puN drivers/scsi/qlogicpti.c~git-block drivers/scsi/qlogicpti.c
--- a/drivers/scsi/qlogicpti.c~git-block
+++ a/drivers/scsi/qlogicpti.c
@@ -870,7 +870,7 @@ static inline int load_cmd(struct scsi_c
 			   struct qlogicpti *qpti, u_int in_ptr, u_int out_ptr)
 {
 	struct dataseg *ds;
-	struct scatterlist *sg;
+	struct scatterlist *sg, *s;
 	int i, n;
 
 	if (Cmnd->use_sg) {
@@ -886,11 +886,12 @@ static inline int load_cmd(struct scsi_c
 		n = sg_count;
 		if (n > 4)
 			n = 4;
-		for (i = 0; i < n; i++, sg++) {
-			ds[i].d_base = sg_dma_address(sg);
-			ds[i].d_count = sg_dma_len(sg);
+		for_each_sg(sg, s, n, i) {
+			ds[i].d_base = sg_dma_address(s);
+			ds[i].d_count = sg_dma_len(s);
 		}
 		sg_count -= 4;
+		sg = s;
 		while (sg_count > 0) {
 			struct Continuation_Entry *cont;
 
@@ -909,9 +910,9 @@ static inline int load_cmd(struct scsi_c
 			n = sg_count;
 			if (n > 7)
 				n = 7;
-			for (i = 0; i < n; i++, sg++) {
-				ds[i].d_base = sg_dma_address(sg);
-				ds[i].d_count = sg_dma_len(sg);
+			for_each_sg(sg, s, n, i) {
+				ds[i].d_base = sg_dma_address(s);
+				ds[i].d_count = sg_dma_len(s);
 			}
 			sg_count -= n;
 		}
diff -puN drivers/scsi/scsi_debug.c~git-block drivers/scsi/scsi_debug.c
--- a/drivers/scsi/scsi_debug.c~git-block
+++ a/drivers/scsi/scsi_debug.c
@@ -38,6 +38,7 @@
 #include <linux/proc_fs.h>
 #include <linux/vmalloc.h>
 #include <linux/moduleparam.h>
+#include <linux/scatterlist.h>
 
 #include <linux/blkdev.h>
 #include "scsi.h"
@@ -600,7 +601,7 @@ static int fill_from_dev_buffer(struct s
 	int k, req_len, act_len, len, active;
 	void * kaddr;
 	void * kaddr_off;
-	struct scatterlist * sgpnt;
+	struct scatterlist * sg;
 
 	if (0 == scp->request_bufflen)
 		return 0;
@@ -619,16 +620,16 @@ static int fill_from_dev_buffer(struct s
 			scp->resid = req_len - act_len;
 		return 0;
 	}
-	sgpnt = (struct scatterlist *)scp->request_buffer;
 	active = 1;
-	for (k = 0, req_len = 0, act_len = 0; k < scp->use_sg; ++k, ++sgpnt) {
+	req_len = act_len = 0;
+	scsi_for_each_sg(scp, sg, scp->use_sg, k) {
 		if (active) {
 			kaddr = (unsigned char *)
-				kmap_atomic(sgpnt->page, KM_USER0);
+				kmap_atomic(sg->page, KM_USER0);
 			if (NULL == kaddr)
 				return (DID_ERROR << 16);
-			kaddr_off = (unsigned char *)kaddr + sgpnt->offset;
-			len = sgpnt->length;
+			kaddr_off = (unsigned char *)kaddr + sg->offset;
+			len = sg->length;
 			if ((req_len + len) > arr_len) {
 				active = 0;
 				len = arr_len - req_len;
@@ -637,7 +638,7 @@ static int fill_from_dev_buffer(struct s
 			kunmap_atomic(kaddr, KM_USER0);
 			act_len += len;
 		}
-		req_len += sgpnt->length;
+		req_len += sg->length;
 	}
 	if (scp->resid)
 		scp->resid -= act_len;
@@ -653,7 +654,7 @@ static int fetch_to_dev_buffer(struct sc
 	int k, req_len, len, fin;
 	void * kaddr;
 	void * kaddr_off;
-	struct scatterlist * sgpnt;
+	struct scatterlist * sg;
 
 	if (0 == scp->request_bufflen)
 		return 0;
@@ -668,13 +669,14 @@ static int fetch_to_dev_buffer(struct sc
 		memcpy(arr, scp->request_buffer, len);
 		return len;
 	}
-	sgpnt = (struct scatterlist *)scp->request_buffer;
-	for (k = 0, req_len = 0, fin = 0; k < scp->use_sg; ++k, ++sgpnt) {
-		kaddr = (unsigned char *)kmap_atomic(sgpnt->page, KM_USER0);
+	sg = scsi_sglist(scp);
+	req_len = fin = 0;
+	for (k = 0; k < scp->use_sg; ++k, sg = sg_next(sg)) {
+		kaddr = (unsigned char *)kmap_atomic(sg->page, KM_USER0);
 		if (NULL == kaddr)
 			return -1;
-		kaddr_off = (unsigned char *)kaddr + sgpnt->offset;
-		len = sgpnt->length;
+		kaddr_off = (unsigned char *)kaddr + sg->offset;
+		len = sg->length;
 		if ((req_len + len) > max_arr_len) {
 			len = max_arr_len - req_len;
 			fin = 1;
@@ -683,7 +685,7 @@ static int fetch_to_dev_buffer(struct sc
 		kunmap_atomic(kaddr, KM_USER0);
 		if (fin)
 			return req_len + len;
-		req_len += sgpnt->length;
+		req_len += sg->length;
 	}
 	return req_len;
 }
diff -puN drivers/scsi/scsi_lib.c~git-block drivers/scsi/scsi_lib.c
--- a/drivers/scsi/scsi_lib.c~git-block
+++ a/drivers/scsi/scsi_lib.c
@@ -17,6 +17,7 @@
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/hardirq.h>
+#include <linux/scatterlist.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -33,35 +34,34 @@
 #define SG_MEMPOOL_NR		ARRAY_SIZE(scsi_sg_pools)
 #define SG_MEMPOOL_SIZE		2
 
+/*
+ * The maximum number of SG segments that we will put inside a scatterlist
+ * (unless chaining is used). Should ideally fit inside a single page, to
+ * avoid a higher order allocation.
+ */
+#define SCSI_MAX_SG_SEGMENTS	128
+
 struct scsi_host_sg_pool {
 	size_t		size;
-	char		*name; 
+	char		*name;
 	struct kmem_cache	*slab;
 	mempool_t	*pool;
 };
 
-#if (SCSI_MAX_PHYS_SEGMENTS < 32)
-#error SCSI_MAX_PHYS_SEGMENTS is too small
-#endif
-
-#define SP(x) { x, "sgpool-" #x } 
+#define SP(x) { x, "sgpool-" #x }
 static struct scsi_host_sg_pool scsi_sg_pools[] = {
 	SP(8),
 	SP(16),
+#if (SCSI_MAX_SG_SEGMENTS > 16)
 	SP(32),
-#if (SCSI_MAX_PHYS_SEGMENTS > 32)
+#if (SCSI_MAX_SG_SEGMENTS > 32)
 	SP(64),
-#if (SCSI_MAX_PHYS_SEGMENTS > 64)
+#if (SCSI_MAX_SG_SEGMENTS > 64)
 	SP(128),
-#if (SCSI_MAX_PHYS_SEGMENTS > 128)
-	SP(256),
-#if (SCSI_MAX_PHYS_SEGMENTS > 256)
-#error SCSI_MAX_PHYS_SEGMENTS is too large
 #endif
 #endif
 #endif
-#endif
-}; 	
+};
 #undef SP
 
 static void scsi_run_queue(struct request_queue *q);
@@ -263,16 +263,7 @@ static int scsi_merge_bio(struct request
 		bio->bi_rw |= (1 << BIO_RW);
 	blk_queue_bounce(q, &bio);
 
-	if (!rq->bio)
-		blk_rq_bio_prep(q, rq, bio);
-	else if (!ll_back_merge_fn(q, rq, bio))
-		return -EINVAL;
-	else {
-		rq->biotail->bi_next = bio;
-		rq->biotail = bio;
-	}
-
-	return 0;
+	return blk_rq_append_bio(q, rq, bio);
 }
 
 static int scsi_bi_endio(struct bio *bio, unsigned int bytes_done, int error)
@@ -708,56 +699,170 @@ static struct scsi_cmnd *scsi_end_reques
 	return NULL;
 }
 
-struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
-{
-	struct scsi_host_sg_pool *sgp;
-	struct scatterlist *sgl;
+/*
+ * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
+ * is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
+ */
+#define SCSI_MAX_SG_CHAIN_SEGMENTS	2048
 
-	BUG_ON(!cmd->use_sg);
+static inline unsigned int scsi_sgtable_index(unsigned short nents)
+{
+	unsigned int index;
 
-	switch (cmd->use_sg) {
+	switch (nents) {
 	case 1 ... 8:
-		cmd->sglist_len = 0;
+		index = 0;
 		break;
 	case 9 ... 16:
-		cmd->sglist_len = 1;
+		index = 1;
 		break;
+#if (SCSI_MAX_SG_SEGMENTS > 16)
 	case 17 ... 32:
-		cmd->sglist_len = 2;
+		index = 2;
 		break;
-#if (SCSI_MAX_PHYS_SEGMENTS > 32)
+#if (SCSI_MAX_SG_SEGMENTS > 32)
 	case 33 ... 64:
-		cmd->sglist_len = 3;
+		index = 3;
 		break;
-#if (SCSI_MAX_PHYS_SEGMENTS > 64)
+#if (SCSI_MAX_SG_SEGMENTS > 64)
 	case 65 ... 128:
-		cmd->sglist_len = 4;
-		break;
-#if (SCSI_MAX_PHYS_SEGMENTS  > 128)
-	case 129 ... 256:
-		cmd->sglist_len = 5;
+		index = 4;
 		break;
 #endif
 #endif
 #endif
 	default:
-		return NULL;
+		printk(KERN_ERR "scsi: bad segment count=%d\n", nents);
+		BUG();
 	}
 
-	sgp = scsi_sg_pools + cmd->sglist_len;
-	sgl = mempool_alloc(sgp->pool, gfp_mask);
-	return sgl;
+	return index;
+}
+
+struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
+{
+	struct scsi_host_sg_pool *sgp;
+	struct scatterlist *sgl, *prev, *ret;
+	unsigned int index;
+	int this, left;
+
+	BUG_ON(!cmd->use_sg);
+
+	left = cmd->use_sg;
+	ret = prev = NULL;
+	do {
+		this = left;
+		if (this > SCSI_MAX_SG_SEGMENTS) {
+			this = SCSI_MAX_SG_SEGMENTS - 1;
+			index = SG_MEMPOOL_NR - 1;
+		} else
+			index = scsi_sgtable_index(this);
+
+		left -= this;
+
+		sgp = scsi_sg_pools + index;
+
+		sgl = mempool_alloc(sgp->pool, gfp_mask);
+		if (unlikely(!sgl))
+			goto enomem;
+
+		memset(sgl, 0, sizeof(*sgl) * sgp->size);
+
+		/*
+		 * first loop through, set initial index and return value
+		 */
+		if (!ret)
+			ret = sgl;
+
+		/*
+		 * chain previous sglist, if any. we know the previous
+		 * sglist must be the biggest one, or we would not have
+		 * ended up doing another loop.
+		 */
+		if (prev)
+			sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl);
+
+		/*
+		 * don't allow subsequent mempool allocs to sleep, it would
+		 * violate the mempool principle.
+		 */
+		gfp_mask &= ~__GFP_WAIT;
+		gfp_mask |= __GFP_HIGH;
+		prev = sgl;
+	} while (left);
+
+	/*
+	 * ->use_sg may get modified after dma mapping has potentially
+	 * shrunk the number of segments, so keep a copy of it for free.
+	 */
+	cmd->__use_sg = cmd->use_sg;
+	return ret;
+enomem:
+	if (ret) {
+		/*
+		 * Free entries chained off ret. Since we were trying to
+		 * allocate another sglist, we know that all entries are of
+		 * the max size.
+		 */
+		sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1;
+		prev = ret;
+		ret = &ret[SCSI_MAX_SG_SEGMENTS - 1];
+
+		while ((sgl = sg_chain_ptr(ret)) != NULL) {
+			ret = &sgl[SCSI_MAX_SG_SEGMENTS - 1];
+			mempool_free(sgl, sgp->pool);
+		}
+
+		mempool_free(prev, sgp->pool);
+	}
+	return NULL;
 }
 
 EXPORT_SYMBOL(scsi_alloc_sgtable);
 
-void scsi_free_sgtable(struct scatterlist *sgl, int index)
+void scsi_free_sgtable(struct scsi_cmnd *cmd)
 {
+	struct scatterlist *sgl = cmd->request_buffer;
 	struct scsi_host_sg_pool *sgp;
 
-	BUG_ON(index >= SG_MEMPOOL_NR);
+	/*
+	 * if this is the biggest size sglist, check if we have
+	 * chained parts we need to free
+	 */
+	if (cmd->__use_sg > SCSI_MAX_SG_SEGMENTS) {
+		unsigned short this, left;
+		struct scatterlist *next;
+		unsigned int index;
+
+		left = cmd->__use_sg - (SCSI_MAX_SG_SEGMENTS - 1);
+		next = sg_chain_ptr(&sgl[SCSI_MAX_SG_SEGMENTS - 1]);
+		while (left && next) {
+			sgl = next;
+			this = left;
+			if (this > SCSI_MAX_SG_SEGMENTS) {
+				this = SCSI_MAX_SG_SEGMENTS - 1;
+				index = SG_MEMPOOL_NR - 1;
+			} else
+				index = scsi_sgtable_index(this);
+
+			left -= this;
+
+			sgp = scsi_sg_pools + index;
+
+			if (left)
+				next = sg_chain_ptr(&sgl[sgp->size - 1]);
+
+			mempool_free(sgl, sgp->pool);
+		}
+
+		/*
+		 * Restore original, will be freed below
+		 */
+		sgl = cmd->request_buffer;
+		sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1;
+	} else
+		sgp = scsi_sg_pools + scsi_sgtable_index(cmd->__use_sg);
 
-	sgp = scsi_sg_pools + index;
 	mempool_free(sgl, sgp->pool);
 }
 
@@ -783,7 +888,7 @@ EXPORT_SYMBOL(scsi_free_sgtable);
 static void scsi_release_buffers(struct scsi_cmnd *cmd)
 {
 	if (cmd->use_sg)
-		scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
+		scsi_free_sgtable(cmd);
 
 	/*
 	 * Zero these out.  They now point to freed memory, and it is
@@ -998,7 +1103,6 @@ EXPORT_SYMBOL(scsi_io_completion);
 static int scsi_init_io(struct scsi_cmnd *cmd)
 {
 	struct request     *req = cmd->request;
-	struct scatterlist *sgpnt;
 	int		   count;
 
 	/*
@@ -1011,14 +1115,13 @@ static int scsi_init_io(struct scsi_cmnd
 	/*
 	 * If sg table allocation fails, requeue request later.
 	 */
-	sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
-	if (unlikely(!sgpnt)) {
+	cmd->request_buffer = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
+	if (unlikely(!cmd->request_buffer)) {
 		scsi_unprep_request(req);
 		return BLKPREP_DEFER;
 	}
 
 	req->buffer = NULL;
-	cmd->request_buffer = (char *) sgpnt;
 	if (blk_pc_request(req))
 		cmd->request_bufflen = req->data_len;
 	else
@@ -1556,8 +1659,25 @@ struct request_queue *__scsi_alloc_queue
 	if (!q)
 		return NULL;
 
+	/*
+	 * this limit is imposed by hardware restrictions
+	 */
 	blk_queue_max_hw_segments(q, shost->sg_tablesize);
-	blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS);
+
+	/*
+	 * In the future, sg chaining support will be mandatory and this
+	 * ifdef can then go away. Right now we don't have all archs
+	 * converted, so better keep it safe.
+	 */
+#ifdef ARCH_HAS_SG_CHAIN
+	if (shost->use_sg_chaining)
+		blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);
+	else
+		blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
+#else
+	blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
+#endif
+
 	blk_queue_max_sectors(q, shost->max_sectors);
 	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
 	blk_queue_segment_boundary(q, shost->dma_boundary);
@@ -2236,18 +2356,19 @@ EXPORT_SYMBOL_GPL(scsi_target_unblock);
  *
  * Returns virtual address of the start of the mapped page
  */
-void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
+void *scsi_kmap_atomic_sg(struct scatterlist *sgl, int sg_count,
 			  size_t *offset, size_t *len)
 {
 	int i;
 	size_t sg_len = 0, len_complete = 0;
+	struct scatterlist *sg;
 	struct page *page;
 
 	WARN_ON(!irqs_disabled());
 
-	for (i = 0; i < sg_count; i++) {
+	for_each_sg(sgl, sg, sg_count, i) {
 		len_complete = sg_len; /* Complete sg-entries */
-		sg_len += sg[i].length;
+		sg_len += sg->length;
 		if (sg_len > *offset)
 			break;
 	}
@@ -2261,10 +2382,10 @@ void *scsi_kmap_atomic_sg(struct scatter
 	}
 
 	/* Offset starting from the beginning of first page in this sg-entry */
-	*offset = *offset - len_complete + sg[i].offset;
+	*offset = *offset - len_complete + sg->offset;
 
 	/* Assumption: contiguous pages can be accessed as "page + i" */
-	page = nth_page(sg[i].page, (*offset >> PAGE_SHIFT));
+	page = nth_page(sg->page, (*offset >> PAGE_SHIFT));
 	*offset &= ~PAGE_MASK;
 
 	/* Bytes in this sg-entry from *offset to the end of the page */
diff -puN drivers/scsi/scsi_tgt_lib.c~git-block drivers/scsi/scsi_tgt_lib.c
--- a/drivers/scsi/scsi_tgt_lib.c~git-block
+++ a/drivers/scsi/scsi_tgt_lib.c
@@ -332,7 +332,7 @@ static void scsi_tgt_cmd_done(struct scs
 	scsi_tgt_uspace_send_status(cmd, tcmd->itn_id, tcmd->tag);
 
 	if (cmd->request_buffer)
-		scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
+		scsi_free_sgtable(cmd);
 
 	queue_work(scsi_tgtd, &tcmd->work);
 }
@@ -373,7 +373,7 @@ static int scsi_tgt_init_cmd(struct scsi
 	}
 
 	eprintk("cmd %p cnt %d\n", cmd, cmd->use_sg);
-	scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
+	scsi_free_sgtable(cmd);
 	return -EINVAL;
 }
 
diff -puN drivers/scsi/sd.c~git-block drivers/scsi/sd.c
--- a/drivers/scsi/sd.c~git-block
+++ a/drivers/scsi/sd.c
@@ -824,27 +824,6 @@ static int sd_sync_cache(struct scsi_dis
 	return 0;
 }
 
-static int sd_issue_flush(struct request_queue *q, struct gendisk *disk,
-			  sector_t *error_sector)
-{
-	int ret = 0;
-	struct scsi_device *sdp = q->queuedata;
-	struct scsi_disk *sdkp;
-
-	if (sdp->sdev_state != SDEV_RUNNING)
-		return -ENXIO;
-
-	sdkp = scsi_disk_get_from_dev(&sdp->sdev_gendev);
-
-	if (!sdkp)
-               return -ENODEV;
-
-	if (sdkp->WCE)
-		ret = sd_sync_cache(sdkp);
-	scsi_disk_put(sdkp);
-	return ret;
-}
-
 static void sd_prepare_flush(struct request_queue *q, struct request *rq)
 {
 	memset(rq->cmd, 0, sizeof(rq->cmd));
diff -puN drivers/scsi/sg.c~git-block drivers/scsi/sg.c
--- a/drivers/scsi/sg.c~git-block
+++ a/drivers/scsi/sg.c
@@ -1165,7 +1165,7 @@ sg_vma_nopage(struct vm_area_struct *vma
 	sg = rsv_schp->buffer;
 	sa = vma->vm_start;
 	for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end);
-	     ++k, ++sg) {
+	     ++k, sg = sg_next(sg)) {
 		len = vma->vm_end - sa;
 		len = (len < sg->length) ? len : sg->length;
 		if (offset < len) {
@@ -1209,7 +1209,7 @@ sg_mmap(struct file *filp, struct vm_are
 	sa = vma->vm_start;
 	sg = rsv_schp->buffer;
 	for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end);
-	     ++k, ++sg) {
+	     ++k, sg = sg_next(sg)) {
 		len = vma->vm_end - sa;
 		len = (len < sg->length) ? len : sg->length;
 		sa += len;
@@ -1840,7 +1840,7 @@ sg_build_indirect(Sg_scatter_hold * schp
 	}
 	for (k = 0, sg = schp->buffer, rem_sz = blk_size;
 	     (rem_sz > 0) && (k < mx_sc_elems);
-	     ++k, rem_sz -= ret_sz, ++sg) {
+	     ++k, rem_sz -= ret_sz, sg = sg_next(sg)) {
 		
 		num = (rem_sz > scatter_elem_sz_prev) ?
 		      scatter_elem_sz_prev : rem_sz;
@@ -1913,7 +1913,7 @@ sg_write_xfer(Sg_request * srp)
 		if (res)
 			return res;
 
-		for (; p; ++sg, ksglen = sg->length,
+		for (; p; sg = sg_next(sg), ksglen = sg->length,
 		     p = page_address(sg->page)) {
 			if (usglen <= 0)
 				break;
@@ -1992,7 +1992,7 @@ sg_remove_scat(Sg_scatter_hold * schp)
 			int k;
 
 			for (k = 0; (k < schp->k_use_sg) && sg->page;
-			     ++k, ++sg) {
+			     ++k, sg = sg_next(sg)) {
 				SCSI_LOG_TIMEOUT(5, printk(
 				    "sg_remove_scat: k=%d, pg=0x%p, len=%d\n",
 				    k, sg->page, sg->length));
@@ -2045,7 +2045,7 @@ sg_read_xfer(Sg_request * srp)
 		if (res)
 			return res;
 
-		for (; p; ++sg, ksglen = sg->length,
+		for (; p; sg = sg_next(sg), ksglen = sg->length,
 		     p = page_address(sg->page)) {
 			if (usglen <= 0)
 				break;
@@ -2092,7 +2092,7 @@ sg_read_oxfer(Sg_request * srp, char __u
 	if ((!outp) || (num_read_xfer <= 0))
 		return 0;
 
-	for (k = 0; (k < schp->k_use_sg) && sg->page; ++k, ++sg) {
+	for (k = 0; (k < schp->k_use_sg) && sg->page; ++k, sg = sg_next(sg)) {
 		num = sg->length;
 		if (num > num_read_xfer) {
 			if (__copy_to_user(outp, page_address(sg->page),
@@ -2142,7 +2142,7 @@ sg_link_reserve(Sg_fd * sfp, Sg_request 
 	SCSI_LOG_TIMEOUT(4, printk("sg_link_reserve: size=%d\n", size));
 	rem = size;
 
-	for (k = 0; k < rsv_schp->k_use_sg; ++k, ++sg) {
+	for (k = 0; k < rsv_schp->k_use_sg; ++k, sg = sg_next(sg)) {
 		num = sg->length;
 		if (rem <= num) {
 			sfp->save_scat_len = num;
diff -puN drivers/scsi/stex.c~git-block drivers/scsi/stex.c
--- a/drivers/scsi/stex.c~git-block
+++ a/drivers/scsi/stex.c
@@ -1123,6 +1123,7 @@ static struct scsi_host_template driver_
 	.this_id			= -1,
 	.sg_tablesize			= ST_MAX_SG,
 	.cmd_per_lun			= ST_CMD_PER_LUN,
+	.use_sg_chaining		= ENABLE_SG_CHAINING,
 };
 
 static int stex_set_dma_mask(struct pci_dev * pdev)
diff -puN drivers/scsi/sym53c416.c~git-block drivers/scsi/sym53c416.c
--- a/drivers/scsi/sym53c416.c~git-block
+++ a/drivers/scsi/sym53c416.c
@@ -854,5 +854,6 @@ static struct scsi_host_template driver_
 	.cmd_per_lun =		1,
 	.unchecked_isa_dma =	1,
 	.use_clustering =	ENABLE_CLUSTERING,
+	.use_sg_chaining =	ENABLE_SG_CHAINING,
 };
 #include "scsi_module.c"
diff -puN drivers/scsi/sym53c8xx_2/sym_glue.c~git-block drivers/scsi/sym53c8xx_2/sym_glue.c
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c~git-block
+++ a/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -1827,6 +1827,7 @@ static struct scsi_host_template sym2_te
 	.eh_host_reset_handler	= sym53c8xx_eh_host_reset_handler,
 	.this_id		= 7,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.max_sectors		= 0xFFFF,
 #ifdef SYM_LINUX_PROC_INFO_SUPPORT
 	.proc_info		= sym53c8xx_proc_info,
diff -puN drivers/scsi/u14-34f.c~git-block drivers/scsi/u14-34f.c
--- a/drivers/scsi/u14-34f.c~git-block
+++ a/drivers/scsi/u14-34f.c
@@ -450,7 +450,8 @@ static struct scsi_host_template driver_
                 .slave_configure         = u14_34f_slave_configure,
                 .this_id                 = 7,
                 .unchecked_isa_dma       = 1,
-                .use_clustering          = ENABLE_CLUSTERING
+                .use_clustering          = ENABLE_CLUSTERING,
+                .use_sg_chaining         = ENABLE_SG_CHAINING,
                 };
 
 #if !defined(__BIG_ENDIAN_BITFIELD) && !defined(__LITTLE_ENDIAN_BITFIELD)
diff -puN drivers/scsi/ultrastor.c~git-block drivers/scsi/ultrastor.c
--- a/drivers/scsi/ultrastor.c~git-block
+++ a/drivers/scsi/ultrastor.c
@@ -1197,5 +1197,6 @@ static struct scsi_host_template driver_
 	.cmd_per_lun       = ULTRASTOR_MAX_CMDS_PER_LUN,
 	.unchecked_isa_dma = 1,
 	.use_clustering    = ENABLE_CLUSTERING,
+	.use_sg_chaining   = ENABLE_SG_CHAINING,
 };
 #include "scsi_module.c"
diff -puN drivers/scsi/wd7000.c~git-block drivers/scsi/wd7000.c
--- a/drivers/scsi/wd7000.c~git-block
+++ a/drivers/scsi/wd7000.c
@@ -1671,6 +1671,7 @@ static struct scsi_host_template driver_
 	.cmd_per_lun		= 1,
 	.unchecked_isa_dma	= 1,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 #include "scsi_module.c"
diff -puN drivers/usb/storage/alauda.c~git-block drivers/usb/storage/alauda.c
--- a/drivers/usb/storage/alauda.c~git-block
+++ a/drivers/usb/storage/alauda.c
@@ -798,12 +798,13 @@ static int alauda_read_data(struct us_da
 {
 	unsigned char *buffer;
 	u16 lba, max_lba;
-	unsigned int page, len, index, offset;
+	unsigned int page, len, offset;
 	unsigned int blockshift = MEDIA_INFO(us).blockshift;
 	unsigned int pageshift = MEDIA_INFO(us).pageshift;
 	unsigned int blocksize = MEDIA_INFO(us).blocksize;
 	unsigned int pagesize = MEDIA_INFO(us).pagesize;
 	unsigned int uzonesize = MEDIA_INFO(us).uzonesize;
+	struct scatterlist *sg;
 	int result;
 
 	/*
@@ -827,7 +828,8 @@ static int alauda_read_data(struct us_da
 	max_lba = MEDIA_INFO(us).capacity >> (blockshift + pageshift);
 
 	result = USB_STOR_TRANSPORT_GOOD;
-	index = offset = 0;
+	offset = 0;
+	sg = NULL;
 
 	while (sectors > 0) {
 		unsigned int zone = lba / uzonesize; /* integer division */
@@ -873,7 +875,7 @@ static int alauda_read_data(struct us_da
 
 		/* Store the data in the transfer buffer */
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				&index, &offset, TO_XFER_BUF);
+				&sg, &offset, TO_XFER_BUF);
 
 		page = 0;
 		lba++;
@@ -891,11 +893,12 @@ static int alauda_write_data(struct us_d
 		unsigned int sectors)
 {
 	unsigned char *buffer, *blockbuffer;
-	unsigned int page, len, index, offset;
+	unsigned int page, len, offset;
 	unsigned int blockshift = MEDIA_INFO(us).blockshift;
 	unsigned int pageshift = MEDIA_INFO(us).pageshift;
 	unsigned int blocksize = MEDIA_INFO(us).blocksize;
 	unsigned int pagesize = MEDIA_INFO(us).pagesize;
+	struct scatterlist *sg;
 	u16 lba, max_lba;
 	int result;
 
@@ -929,7 +932,8 @@ static int alauda_write_data(struct us_d
 	max_lba = MEDIA_INFO(us).capacity >> (pageshift + blockshift);
 
 	result = USB_STOR_TRANSPORT_GOOD;
-	index = offset = 0;
+	offset = 0;
+	sg = NULL;
 
 	while (sectors > 0) {
 		/* Write as many sectors as possible in this block */
@@ -946,7 +950,7 @@ static int alauda_write_data(struct us_d
 
 		/* Get the data from the transfer buffer */
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				&index, &offset, FROM_XFER_BUF);
+				&sg, &offset, FROM_XFER_BUF);
 
 		result = alauda_write_lba(us, lba, page, pages, buffer,
 			blockbuffer);
diff -puN drivers/usb/storage/datafab.c~git-block drivers/usb/storage/datafab.c
--- a/drivers/usb/storage/datafab.c~git-block
+++ a/drivers/usb/storage/datafab.c
@@ -98,7 +98,8 @@ static int datafab_read_data(struct us_d
 	unsigned char  thistime;
 	unsigned int totallen, alloclen;
 	int len, result;
-	unsigned int sg_idx = 0, sg_offset = 0;
+	unsigned int sg_offset = 0;
+	struct scatterlist *sg = NULL;
 
 	// we're working in LBA mode.  according to the ATA spec, 
 	// we can support up to 28-bit addressing.  I don't know if Datafab
@@ -155,7 +156,7 @@ static int datafab_read_data(struct us_d
 
 		// Store the data in the transfer buffer
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				 &sg_idx, &sg_offset, TO_XFER_BUF);
+				 &sg, &sg_offset, TO_XFER_BUF);
 
 		sector += thistime;
 		totallen -= len;
@@ -181,7 +182,8 @@ static int datafab_write_data(struct us_
 	unsigned char thistime;
 	unsigned int totallen, alloclen;
 	int len, result;
-	unsigned int sg_idx = 0, sg_offset = 0;
+	unsigned int sg_offset = 0;
+	struct scatterlist *sg = NULL;
 
 	// we're working in LBA mode.  according to the ATA spec, 
 	// we can support up to 28-bit addressing.  I don't know if Datafab
@@ -217,7 +219,7 @@ static int datafab_write_data(struct us_
 
 		// Get the data from the transfer buffer
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				&sg_idx, &sg_offset, FROM_XFER_BUF);
+				&sg, &sg_offset, FROM_XFER_BUF);
 
 		command[0] = 0;
 		command[1] = thistime;
diff -puN drivers/usb/storage/jumpshot.c~git-block drivers/usb/storage/jumpshot.c
--- a/drivers/usb/storage/jumpshot.c~git-block
+++ a/drivers/usb/storage/jumpshot.c
@@ -119,7 +119,8 @@ static int jumpshot_read_data(struct us_
 	unsigned char  thistime;
 	unsigned int totallen, alloclen;
 	int len, result;
-	unsigned int sg_idx = 0, sg_offset = 0;
+	unsigned int sg_offset = 0;
+	struct scatterlist *sg = NULL;
 
 	// we're working in LBA mode.  according to the ATA spec, 
 	// we can support up to 28-bit addressing.  I don't know if Jumpshot
@@ -170,7 +171,7 @@ static int jumpshot_read_data(struct us_
 
 		// Store the data in the transfer buffer
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				 &sg_idx, &sg_offset, TO_XFER_BUF);
+				 &sg, &sg_offset, TO_XFER_BUF);
 
 		sector += thistime;
 		totallen -= len;
@@ -195,7 +196,8 @@ static int jumpshot_write_data(struct us
 	unsigned char  thistime;
 	unsigned int totallen, alloclen;
 	int len, result, waitcount;
-	unsigned int sg_idx = 0, sg_offset = 0;
+	unsigned int sg_offset = 0;
+	struct scatterlist *sg = NULL;
 
 	// we're working in LBA mode.  according to the ATA spec, 
 	// we can support up to 28-bit addressing.  I don't know if Jumpshot
@@ -225,7 +227,7 @@ static int jumpshot_write_data(struct us
 
 		// Get the data from the transfer buffer
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				&sg_idx, &sg_offset, FROM_XFER_BUF);
+				&sg, &sg_offset, FROM_XFER_BUF);
 
 		command[0] = 0;
 		command[1] = thistime;
diff -puN drivers/usb/storage/protocol.c~git-block drivers/usb/storage/protocol.c
--- a/drivers/usb/storage/protocol.c~git-block
+++ a/drivers/usb/storage/protocol.c
@@ -157,7 +157,7 @@ void usb_stor_transparent_scsi_command(s
  * pick up from where this one left off. */
 
 unsigned int usb_stor_access_xfer_buf(unsigned char *buffer,
-	unsigned int buflen, struct scsi_cmnd *srb, unsigned int *index,
+	unsigned int buflen, struct scsi_cmnd *srb, struct scatterlist **sgptr,
 	unsigned int *offset, enum xfer_buf_dir dir)
 {
 	unsigned int cnt;
@@ -184,16 +184,17 @@ unsigned int usb_stor_access_xfer_buf(un
 	 * located in high memory -- then kmap() will map it to a temporary
 	 * position in the kernel's virtual address space. */
 	} else {
-		struct scatterlist *sg =
-				(struct scatterlist *) srb->request_buffer
-				+ *index;
+		struct scatterlist *sg = *sgptr;
+
+		if (!sg)
+			sg = (struct scatterlist *) srb->request_buffer;
 
 		/* This loop handles a single s-g list entry, which may
 		 * include multiple pages.  Find the initial page structure
 		 * and the starting offset within the page, and update
 		 * the *offset and *index values for the next loop. */
 		cnt = 0;
-		while (cnt < buflen && *index < srb->use_sg) {
+		while (cnt < buflen) {
 			struct page *page = sg->page +
 					((sg->offset + *offset) >> PAGE_SHIFT);
 			unsigned int poff =
@@ -209,8 +210,7 @@ unsigned int usb_stor_access_xfer_buf(un
 
 				/* Transfer continues to next s-g entry */
 				*offset = 0;
-				++*index;
-				++sg;
+				sg = sg_next(sg);
 			}
 
 			/* Transfer the data for all the pages in this
@@ -234,6 +234,7 @@ unsigned int usb_stor_access_xfer_buf(un
 				sglen -= plen;
 			}
 		}
+		*sgptr = sg;
 	}
 
 	/* Return the amount actually transferred */
@@ -245,9 +246,10 @@ unsigned int usb_stor_access_xfer_buf(un
 void usb_stor_set_xfer_buf(unsigned char *buffer,
 	unsigned int buflen, struct scsi_cmnd *srb)
 {
-	unsigned int index = 0, offset = 0;
+	unsigned int offset = 0;
+	struct scatterlist *sg = NULL;
 
-	usb_stor_access_xfer_buf(buffer, buflen, srb, &index, &offset,
+	usb_stor_access_xfer_buf(buffer, buflen, srb, &sg, &offset,
 			TO_XFER_BUF);
 	if (buflen < srb->request_bufflen)
 		srb->resid = srb->request_bufflen - buflen;
diff -puN drivers/usb/storage/protocol.h~git-block drivers/usb/storage/protocol.h
--- a/drivers/usb/storage/protocol.h~git-block
+++ a/drivers/usb/storage/protocol.h
@@ -52,7 +52,7 @@ extern void usb_stor_transparent_scsi_co
 enum xfer_buf_dir	{TO_XFER_BUF, FROM_XFER_BUF};
 
 extern unsigned int usb_stor_access_xfer_buf(unsigned char *buffer,
-	unsigned int buflen, struct scsi_cmnd *srb, unsigned int *index,
+	unsigned int buflen, struct scsi_cmnd *srb, struct scatterlist **,
 	unsigned int *offset, enum xfer_buf_dir dir);
 
 extern void usb_stor_set_xfer_buf(unsigned char *buffer,
diff -puN drivers/usb/storage/sddr09.c~git-block drivers/usb/storage/sddr09.c
--- a/drivers/usb/storage/sddr09.c~git-block
+++ a/drivers/usb/storage/sddr09.c
@@ -705,7 +705,8 @@ sddr09_read_data(struct us_data *us,
 	unsigned char *buffer;
 	unsigned int lba, maxlba, pba;
 	unsigned int page, pages;
-	unsigned int len, index, offset;
+	unsigned int len, offset;
+	struct scatterlist *sg;
 	int result;
 
 	// Figure out the initial LBA and page
@@ -730,7 +731,8 @@ sddr09_read_data(struct us_data *us,
 	// contiguous LBA's. Another exercise left to the student.
 
 	result = 0;
-	index = offset = 0;
+	offset = 0;
+	sg = NULL;
 
 	while (sectors > 0) {
 
@@ -777,7 +779,7 @@ sddr09_read_data(struct us_data *us,
 
 		// Store the data in the transfer buffer
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				&index, &offset, TO_XFER_BUF);
+				&sg, &offset, TO_XFER_BUF);
 
 		page = 0;
 		lba++;
@@ -931,7 +933,8 @@ sddr09_write_data(struct us_data *us,
 	unsigned int pagelen, blocklen;
 	unsigned char *blockbuffer;
 	unsigned char *buffer;
-	unsigned int len, index, offset;
+	unsigned int len, offset;
+	struct scatterlist *sg;
 	int result;
 
 	// Figure out the initial LBA and page
@@ -968,7 +971,8 @@ sddr09_write_data(struct us_data *us,
 	}
 
 	result = 0;
-	index = offset = 0;
+	offset = 0;
+	sg = NULL;
 
 	while (sectors > 0) {
 
@@ -987,7 +991,7 @@ sddr09_write_data(struct us_data *us,
 
 		// Get the data from the transfer buffer
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				&index, &offset, FROM_XFER_BUF);
+				&sg, &offset, FROM_XFER_BUF);
 
 		result = sddr09_write_lba(us, lba, page, pages,
 				buffer, blockbuffer);
diff -puN drivers/usb/storage/sddr55.c~git-block drivers/usb/storage/sddr55.c
--- a/drivers/usb/storage/sddr55.c~git-block
+++ a/drivers/usb/storage/sddr55.c
@@ -167,7 +167,8 @@ static int sddr55_read_data(struct us_da
 	unsigned long address;
 
 	unsigned short pages;
-	unsigned int len, index, offset;
+	unsigned int len, offset;
+	struct scatterlist *sg;
 
 	// Since we only read in one block at a time, we have to create
 	// a bounce buffer and move the data a piece at a time between the
@@ -178,7 +179,8 @@ static int sddr55_read_data(struct us_da
 	buffer = kmalloc(len, GFP_NOIO);
 	if (buffer == NULL)
 		return USB_STOR_TRANSPORT_ERROR; /* out of memory */
-	index = offset = 0;
+	offset = 0;
+	sg = NULL;
 
 	while (sectors>0) {
 
@@ -255,7 +257,7 @@ static int sddr55_read_data(struct us_da
 
 		// Store the data in the transfer buffer
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				&index, &offset, TO_XFER_BUF);
+				&sg, &offset, TO_XFER_BUF);
 
 		page = 0;
 		lba++;
@@ -287,7 +289,8 @@ static int sddr55_write_data(struct us_d
 
 	unsigned short pages;
 	int i;
-	unsigned int len, index, offset;
+	unsigned int len, offset;
+	struct scatterlist *sg;
 
 	/* check if we are allowed to write */
 	if (info->read_only || info->force_read_only) {
@@ -304,7 +307,8 @@ static int sddr55_write_data(struct us_d
 	buffer = kmalloc(len, GFP_NOIO);
 	if (buffer == NULL)
 		return USB_STOR_TRANSPORT_ERROR;
-	index = offset = 0;
+	offset = 0;
+	sg = NULL;
 
 	while (sectors > 0) {
 
@@ -322,7 +326,7 @@ static int sddr55_write_data(struct us_d
 
 		// Get the data from the transfer buffer
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				&index, &offset, FROM_XFER_BUF);
+				&sg, &offset, FROM_XFER_BUF);
 
 		US_DEBUGP("Write %02X pages, to PBA %04X"
 			" (LBA %04X) page %02X\n",
diff -puN drivers/usb/storage/shuttle_usbat.c~git-block drivers/usb/storage/shuttle_usbat.c
--- a/drivers/usb/storage/shuttle_usbat.c~git-block
+++ a/drivers/usb/storage/shuttle_usbat.c
@@ -996,7 +996,8 @@ static int usbat_flash_read_data(struct 
 	unsigned char  thistime;
 	unsigned int totallen, alloclen;
 	int len, result;
-	unsigned int sg_idx = 0, sg_offset = 0;
+	unsigned int sg_offset = 0;
+	struct scatterlist *sg = NULL;
 
 	result = usbat_flash_check_media(us, info);
 	if (result != USB_STOR_TRANSPORT_GOOD)
@@ -1050,7 +1051,7 @@ static int usbat_flash_read_data(struct 
 	
 		/* Store the data in the transfer buffer */
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-					 &sg_idx, &sg_offset, TO_XFER_BUF);
+					 &sg, &sg_offset, TO_XFER_BUF);
 
 		sector += thistime;
 		totallen -= len;
@@ -1086,7 +1087,8 @@ static int usbat_flash_write_data(struct
 	unsigned char  thistime;
 	unsigned int totallen, alloclen;
 	int len, result;
-	unsigned int sg_idx = 0, sg_offset = 0;
+	unsigned int sg_offset = 0;
+	struct scatterlist *sg = NULL;
 
 	result = usbat_flash_check_media(us, info);
 	if (result != USB_STOR_TRANSPORT_GOOD)
@@ -1125,7 +1127,7 @@ static int usbat_flash_write_data(struct
 
 		/* Get the data from the transfer buffer */
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-					 &sg_idx, &sg_offset, FROM_XFER_BUF);
+					 &sg, &sg_offset, FROM_XFER_BUF);
 
 		/* ATA command 0x30 (WRITE SECTORS) */
 		usbat_pack_ata_sector_cmd(command, thistime, sector, 0x30);
@@ -1165,8 +1167,8 @@ static int usbat_hp8200e_handle_read10(s
 	unsigned char *buffer;
 	unsigned int len;
 	unsigned int sector;
-	unsigned int sg_segment = 0;
 	unsigned int sg_offset = 0;
+	struct scatterlist *sg = NULL;
 
 	US_DEBUGP("handle_read10: transfersize %d\n",
 		srb->transfersize);
@@ -1223,9 +1225,6 @@ static int usbat_hp8200e_handle_read10(s
 	sector |= short_pack(data[7+5], data[7+4]);
 	transferred = 0;
 
-	sg_segment = 0; /* for keeping track of where we are in */
-	sg_offset = 0;  /* the scatter/gather list */
-
 	while (transferred != srb->request_bufflen) {
 
 		if (len > srb->request_bufflen - transferred)
@@ -1258,7 +1257,7 @@ static int usbat_hp8200e_handle_read10(s
 
 		/* Store the data in the transfer buffer */
 		usb_stor_access_xfer_buf(buffer, len, srb,
-				 &sg_segment, &sg_offset, TO_XFER_BUF);
+				 &sg, &sg_offset, TO_XFER_BUF);
 
 		/* Update the amount transferred and the sector number */
 
diff -puN fs/bio.c~git-block fs/bio.c
--- a/fs/bio.c~git-block
+++ a/fs/bio.c
@@ -109,11 +109,14 @@ static inline struct bio_vec *bvec_alloc
 
 void bio_free(struct bio *bio, struct bio_set *bio_set)
 {
-	const int pool_idx = BIO_POOL_IDX(bio);
+	if (bio->bi_io_vec) {
+		const int pool_idx = BIO_POOL_IDX(bio);
 
-	BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
+		BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
+
+		mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
+	}
 
-	mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
 	mempool_free(bio, bio_set->bio_pool);
 }
 
@@ -127,21 +130,9 @@ static void bio_fs_destructor(struct bio
 
 void bio_init(struct bio *bio)
 {
-	bio->bi_next = NULL;
-	bio->bi_bdev = NULL;
+	memset(bio, 0, sizeof(*bio));
 	bio->bi_flags = 1 << BIO_UPTODATE;
-	bio->bi_rw = 0;
-	bio->bi_vcnt = 0;
-	bio->bi_idx = 0;
-	bio->bi_phys_segments = 0;
-	bio->bi_hw_segments = 0;
-	bio->bi_hw_front_size = 0;
-	bio->bi_hw_back_size = 0;
-	bio->bi_size = 0;
-	bio->bi_max_vecs = 0;
-	bio->bi_end_io = NULL;
 	atomic_set(&bio->bi_cnt, 1);
-	bio->bi_private = NULL;
 }
 
 /**
diff -puN fs/fs-writeback.c~git-block fs/fs-writeback.c
--- a/fs/fs-writeback.c~git-block
+++ a/fs/fs-writeback.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
 #include <linux/fs.h>
diff -puN fs/splice.c~git-block fs/splice.c
--- a/fs/splice.c~git-block
+++ a/fs/splice.c
@@ -29,6 +29,7 @@
 #include <linux/syscalls.h>
 #include <linux/uio.h>
 #include <linux/security.h>
+#include <linux/mman.h>
 
 /*
  * Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -254,11 +255,16 @@ ssize_t splice_to_pipe(struct pipe_inode
 	}
 
 	while (page_nr < spd_pages)
-		page_cache_release(spd->pages[page_nr++]);
+		spd->spd_release(spd, page_nr++);
 
 	return ret;
 }
 
+static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
+{
+	page_cache_release(spd->pages[i]);
+}
+
 static int
 __generic_file_splice_read(struct file *in, loff_t *ppos,
 			   struct pipe_inode_info *pipe, size_t len,
@@ -277,6 +283,7 @@ __generic_file_splice_read(struct file *
 		.partial = partial,
 		.flags = flags,
 		.ops = &page_cache_pipe_buf_ops,
+		.spd_release = spd_release_page,
 	};
 
 	index = *ppos >> PAGE_CACHE_SHIFT;
@@ -1224,6 +1231,218 @@ static long do_splice(struct file *in, l
 }
 
 /*
+ * Just copy the data to user space
+ */
+static int pipe_to_user_copy(struct pipe_inode_info *pipe,
+			     struct pipe_buffer *buf, struct splice_desc *sd)
+{
+	char *src;
+	int ret;
+
+	ret = buf->ops->confirm(pipe, buf);
+	if (unlikely(ret))
+		return ret;
+
+	/*
+	 * See if we can use the atomic maps, by prefaulting in the
+	 * pages and doing an atomic copy
+	 */
+	if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
+		src = buf->ops->map(pipe, buf, 1);
+		ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
+							sd->len);
+		buf->ops->unmap(pipe, buf, src);
+		if (!ret) {
+			ret = sd->len;
+			goto out;
+		}
+	}
+
+	/*
+	 * No dice, use slow non-atomic map and copy
+	 */
+	src = buf->ops->map(pipe, buf, 0);
+
+	ret = sd->len;
+	if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
+		ret = -EFAULT;
+
+	buf->ops->unmap(pipe, buf, src);
+out:
+	if (ret > 0)
+		sd->u.userptr += ret;
+	return ret;
+}
+
+/*
+ * This actor doesn't really do anything interesting, it merely settles
+ * the pipe page and adds it to the work list for insertion when the entire
+ * pipe has been processed.
+ */
+static int pipe_to_user_map(struct pipe_inode_info *pipe,
+			    struct pipe_buffer *buf, struct splice_desc *sd)
+{
+	struct splice_pipe_desc *spd = sd->u.data;
+	int error;
+
+	if (buf->len & ~PAGE_MASK)
+		return -EINVAL;
+
+	error = buf->ops->confirm(pipe, buf);
+	if (!error) {
+		spd->pages[spd->nr_pages++] = buf->page;
+		return buf->len;
+	}
+
+	return error;
+}
+
+/*
+ * Setup a vma for this address range, and let pipe_to_user_map() insert
+ * pages into that.
+ */
+static int vmsplice_pipe_map(struct pipe_inode_info *pipe,
+			     struct splice_desc *sd)
+{
+	struct mm_struct *mm = current->mm;
+	struct page *pages[PIPE_BUFFERS];
+	struct splice_pipe_desc spd = {
+		.pages = pages,
+	};
+	struct vm_area_struct *vma;
+	unsigned long addr;
+	int ret, i, err;
+
+	if (sd->total_len & ~PAGE_MASK)
+		return -EINVAL;
+
+	/*
+	 * Run through the pipe buffers and settle the contents. The number
+	 * of processed pages will be put in spd.nr_pages.
+	 */
+	addr = (unsigned long) sd->u.userptr;
+	sd->pos = 0;
+	sd->u.data = &spd;
+	err = __splice_from_pipe(pipe, sd, pipe_to_user_map);
+	if (unlikely(err <= 0))
+		return err;
+	else if (unlikely(!spd.nr_pages))
+		return 0;
+
+	/*
+	 * We have a non-zero number of pages available. Now find the
+	 * associated vma so we can establish pages mappings there.
+	 */
+	ret = -EINVAL;
+	down_read(&mm->mmap_sem);
+
+	vma = find_vma(mm, addr);
+	if (unlikely(!vma))
+		goto out;
+
+	for (i = ret = err = 0; i < spd.nr_pages; i++) {
+		err = vm_insert_page(vma, addr, spd.pages[i]);
+		if (unlikely(err))
+			break;
+
+		addr += PAGE_SIZE;
+		ret += PAGE_SIZE;
+	}
+
+out:
+	up_read(&mm->mmap_sem);
+
+	if (err && !ret)
+		ret = err;
+
+	return ret;
+}
+
+/*
+ * vmsplice a pipe to user memory. If SPLICE_F_MOVE is set, we will attempt
+ * to move the pipe pages to the user address space. Otherwise a simple
+ * copy is done.
+ */
+static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
+			     unsigned long nr_segs, unsigned int flags)
+{
+	struct pipe_inode_info *pipe;
+	struct splice_desc sd;
+	long spliced, ret;
+
+	pipe = pipe_info(file->f_path.dentry->d_inode);
+	if (!pipe)
+		return -EBADF;
+
+	if (pipe->inode)
+		mutex_lock(&pipe->inode->i_mutex);
+
+	spliced = ret = 0;
+	while (nr_segs) {
+		void __user *base;
+		size_t len;
+
+		/*
+		 * Get user address base and length for this iovec.
+		 */
+		ret = get_user(base, &iov->iov_base);
+		if (unlikely(ret))
+			break;
+		ret = get_user(len, &iov->iov_len);
+		if (unlikely(ret))
+			break;
+
+		/*
+		 * Sanity check this iovec. 0 read succeeds.
+		 */
+		if (unlikely(!len))
+			break;
+		if (unlikely(!base)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		sd.len = 0;
+		sd.total_len = len;
+		sd.flags = flags;
+		sd.u.userptr = base;
+		sd.pos = 0;
+
+		/*
+		 * SPLICE_F_MOVE is set, don't copy the data but attempt
+		 * to map it into the app address space.
+		 */
+		if (flags & SPLICE_F_MOVE)
+			ret = vmsplice_pipe_map(pipe, &sd);
+		else
+			ret = __splice_from_pipe(pipe, &sd, pipe_to_user_copy);
+
+		if (ret < 0)
+			break;
+
+		spliced += ret;
+
+		/*
+		 * If we transferred less than a pipe buffer length, break
+		 * out of the loop and let the caller retry.
+		 */
+		if (ret < len)
+			break;
+
+		nr_segs--;
+		iov++;
+	}
+
+	if (pipe->inode)
+		mutex_unlock(&pipe->inode->i_mutex);
+
+	if (!spliced)
+		spliced = ret;
+
+	return spliced;
+}
+
+/*
  * Map an iov into an array of pages and offset/length tupples. With the
  * partial_page structure, we can map several non-contiguous ranges into
  * our ones pages[] map instead of splitting that operation into pieces.
@@ -1334,124 +1553,6 @@ static int get_iovec_page_array(const st
 	return error;
 }
 
-static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
-			struct splice_desc *sd)
-{
-	char *src;
-	int ret;
-
-	ret = buf->ops->confirm(pipe, buf);
-	if (unlikely(ret))
-		return ret;
-
-	/*
-	 * See if we can use the atomic maps, by prefaulting in the
-	 * pages and doing an atomic copy
-	 */
-	if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
-		src = buf->ops->map(pipe, buf, 1);
-		ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
-							sd->len);
-		buf->ops->unmap(pipe, buf, src);
-		if (!ret) {
-			ret = sd->len;
-			goto out;
-		}
-	}
-
-	/*
-	 * No dice, use slow non-atomic map and copy
- 	 */
-	src = buf->ops->map(pipe, buf, 0);
-
-	ret = sd->len;
-	if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
-		ret = -EFAULT;
-
-out:
-	if (ret > 0)
-		sd->u.userptr += ret;
-	buf->ops->unmap(pipe, buf, src);
-	return ret;
-}
-
-/*
- * For lack of a better implementation, implement vmsplice() to userspace
- * as a simple copy of the pipes pages to the user iov.
- */
-static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
-			     unsigned long nr_segs, unsigned int flags)
-{
-	struct pipe_inode_info *pipe;
-	struct splice_desc sd;
-	ssize_t size;
-	int error;
-	long ret;
-
-	pipe = pipe_info(file->f_path.dentry->d_inode);
-	if (!pipe)
-		return -EBADF;
-
-	if (pipe->inode)
-		mutex_lock(&pipe->inode->i_mutex);
-
-	error = ret = 0;
-	while (nr_segs) {
-		void __user *base;
-		size_t len;
-
-		/*
-		 * Get user address base and length for this iovec.
-		 */
-		error = get_user(base, &iov->iov_base);
-		if (unlikely(error))
-			break;
-		error = get_user(len, &iov->iov_len);
-		if (unlikely(error))
-			break;
-
-		/*
-		 * Sanity check this iovec. 0 read succeeds.
-		 */
-		if (unlikely(!len))
-			break;
-		if (unlikely(!base)) {
-			error = -EFAULT;
-			break;
-		}
-
-		sd.len = 0;
-		sd.total_len = len;
-		sd.flags = flags;
-		sd.u.userptr = base;
-		sd.pos = 0;
-
-		size = __splice_from_pipe(pipe, &sd, pipe_to_user);
-		if (size < 0) {
-			if (!ret)
-				ret = size;
-
-			break;
-		}
-
-		ret += size;
-
-		if (size < len)
-			break;
-
-		nr_segs--;
-		iov++;
-	}
-
-	if (pipe->inode)
-		mutex_unlock(&pipe->inode->i_mutex);
-
-	if (!ret)
-		ret = error;
-
-	return ret;
-}
-
 /*
  * vmsplice splices a user address range into a pipe. It can be thought of
  * as splice-from-memory, where the regular splice is splice-from-file (or
@@ -1468,6 +1569,7 @@ static long vmsplice_to_pipe(struct file
 		.partial = partial,
 		.flags = flags,
 		.ops = &user_page_pipe_buf_ops,
+		.spd_release = spd_release_page,
 	};
 
 	pipe = pipe_info(file->f_path.dentry->d_inode);
diff -puN include/asm-i386/dma-mapping.h~git-block include/asm-i386/dma-mapping.h
--- a/include/asm-i386/dma-mapping.h~git-block
+++ a/include/asm-i386/dma-mapping.h
@@ -2,10 +2,10 @@
 #define _ASM_I386_DMA_MAPPING_H
 
 #include <linux/mm.h>
+#include <linux/scatterlist.h>
 
 #include <asm/cache.h>
 #include <asm/io.h>
-#include <asm/scatterlist.h>
 #include <asm/bug.h>
 
 #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
@@ -35,18 +35,19 @@ dma_unmap_single(struct device *dev, dma
 }
 
 static inline int
-dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
 	   enum dma_data_direction direction)
 {
+	struct scatterlist *sg;
 	int i;
 
 	BUG_ON(!valid_dma_direction(direction));
-	WARN_ON(nents == 0 || sg[0].length == 0);
+	WARN_ON(nents == 0 || sglist[0].length == 0);
 
-	for (i = 0; i < nents; i++ ) {
-		BUG_ON(!sg[i].page);
+	for_each_sg(sglist, sg, nents, i) {
+		BUG_ON(!sg->page);
 
-		sg[i].dma_address = page_to_phys(sg[i].page) + sg[i].offset;
+		sg->dma_address = page_to_phys(sg->page) + sg->offset;
 	}
 
 	flush_write_buffers();
diff -puN include/asm-i386/scatterlist.h~git-block include/asm-i386/scatterlist.h
--- a/include/asm-i386/scatterlist.h~git-block
+++ a/include/asm-i386/scatterlist.h
@@ -10,6 +10,8 @@ struct scatterlist {
     unsigned int	length;
 };
 
+#define ARCH_HAS_SG_CHAIN
+
 /* These macros should be used after a pci_map_sg call has been done
  * to get bus addresses of each of the SG entries and their lengths.
  * You should only work with the number of sg entries pci_map_sg
diff -puN include/asm-ia64/dma-mapping.h~git-block include/asm-ia64/dma-mapping.h
--- a/include/asm-ia64/dma-mapping.h~git-block
+++ a/include/asm-ia64/dma-mapping.h
@@ -5,6 +5,7 @@
  * Copyright (C) 2003-2004 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
+#include <linux/scatterlist.h>
 #include <asm/machvec.h>
 
 #define dma_alloc_coherent	platform_dma_alloc_coherent
diff -puN include/asm-ia64/scatterlist.h~git-block include/asm-ia64/scatterlist.h
--- a/include/asm-ia64/scatterlist.h~git-block
+++ a/include/asm-ia64/scatterlist.h
@@ -30,4 +30,6 @@ struct scatterlist {
 #define sg_dma_len(sg)		((sg)->dma_length)
 #define sg_dma_address(sg)	((sg)->dma_address)
 
+#define	ARCH_HAS_SG_CHAIN
+
 #endif /* _ASM_IA64_SCATTERLIST_H */
diff -puN include/asm-powerpc/dma-mapping.h~git-block include/asm-powerpc/dma-mapping.h
--- a/include/asm-powerpc/dma-mapping.h~git-block
+++ a/include/asm-powerpc/dma-mapping.h
@@ -12,7 +12,7 @@
 #include <linux/cache.h>
 /* need struct page definitions */
 #include <linux/mm.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
 #include <asm/io.h>
 
 #define DMA_ERROR_CODE		(~(dma_addr_t)0x0)
@@ -276,14 +276,15 @@ static inline void dma_unmap_page(struct
 }
 
 static inline int
-dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+dma_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
 	   enum dma_data_direction direction)
 {
+	struct scatterlist *sg;
 	int i;
 
 	BUG_ON(direction == DMA_NONE);
 
-	for (i = 0; i < nents; i++, sg++) {
+	for_each_sg(sgl, sg, nents, i) {
 		BUG_ON(!sg->page);
 		__dma_sync_page(sg->page, sg->offset, sg->length, direction);
 		sg->dma_address = page_to_bus(sg->page) + sg->offset;
@@ -318,26 +319,28 @@ static inline void dma_sync_single_for_d
 }
 
 static inline void dma_sync_sg_for_cpu(struct device *dev,
-		struct scatterlist *sg, int nents,
+		struct scatterlist *sgl, int nents,
 		enum dma_data_direction direction)
 {
+	struct scatterlist *sg;
 	int i;
 
 	BUG_ON(direction == DMA_NONE);
 
-	for (i = 0; i < nents; i++, sg++)
+	for_each_sg(sgl, sg, nents, i)
 		__dma_sync_page(sg->page, sg->offset, sg->length, direction);
 }
 
 static inline void dma_sync_sg_for_device(struct device *dev,
-		struct scatterlist *sg, int nents,
+		struct scatterlist *sgl, int nents,
 		enum dma_data_direction direction)
 {
+	struct scatterlist *sg;
 	int i;
 
 	BUG_ON(direction == DMA_NONE);
 
-	for (i = 0; i < nents; i++, sg++)
+	for_each_sg(sgl, sg, nents, i)
 		__dma_sync_page(sg->page, sg->offset, sg->length, direction);
 }
 
diff -puN include/asm-powerpc/scatterlist.h~git-block include/asm-powerpc/scatterlist.h
--- a/include/asm-powerpc/scatterlist.h~git-block
+++ a/include/asm-powerpc/scatterlist.h
@@ -41,5 +41,7 @@ struct scatterlist {
 #define ISA_DMA_THRESHOLD	(~0UL)
 #endif
 
+#define ARCH_HAS_SG_CHAIN
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SCATTERLIST_H */
diff -puN include/asm-sparc/scatterlist.h~git-block include/asm-sparc/scatterlist.h
--- a/include/asm-sparc/scatterlist.h~git-block
+++ a/include/asm-sparc/scatterlist.h
@@ -19,4 +19,6 @@ struct scatterlist {
 
 #define ISA_DMA_THRESHOLD (~0UL)
 
+#define ARCH_HAS_SG_CHAIN
+
 #endif /* !(_SPARC_SCATTERLIST_H) */
diff -puN include/asm-sparc64/scatterlist.h~git-block include/asm-sparc64/scatterlist.h
--- a/include/asm-sparc64/scatterlist.h~git-block
+++ a/include/asm-sparc64/scatterlist.h
@@ -20,4 +20,6 @@ struct scatterlist {
 
 #define ISA_DMA_THRESHOLD	(~0UL)
 
+#define ARCH_HAS_SG_CHAIN
+
 #endif /* !(_SPARC64_SCATTERLIST_H) */
diff -puN include/asm-x86_64/dma-mapping.h~git-block include/asm-x86_64/dma-mapping.h
--- a/include/asm-x86_64/dma-mapping.h~git-block
+++ a/include/asm-x86_64/dma-mapping.h
@@ -6,8 +6,7 @@
  * documentation.
  */
 
-
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
 #include <asm/io.h>
 #include <asm/swiotlb.h>
 
diff -puN include/asm-x86_64/scatterlist.h~git-block include/asm-x86_64/scatterlist.h
--- a/include/asm-x86_64/scatterlist.h~git-block
+++ a/include/asm-x86_64/scatterlist.h
@@ -11,6 +11,8 @@ struct scatterlist {
     unsigned int        dma_length;
 };
 
+#define ARCH_HAS_SG_CHAIN
+
 #define ISA_DMA_THRESHOLD (0x00ffffff)
 
 /* These macros should be used after a pci_map_sg call has been done
diff -puN include/linux/bio.h~git-block include/linux/bio.h
--- a/include/linux/bio.h~git-block
+++ a/include/linux/bio.h
@@ -176,13 +176,28 @@ struct bio {
 #define bio_offset(bio)		bio_iovec((bio))->bv_offset
 #define bio_segments(bio)	((bio)->bi_vcnt - (bio)->bi_idx)
 #define bio_sectors(bio)	((bio)->bi_size >> 9)
-#define bio_cur_sectors(bio)	(bio_iovec(bio)->bv_len >> 9)
-#define bio_data(bio)		(page_address(bio_page((bio))) + bio_offset((bio)))
 #define bio_barrier(bio)	((bio)->bi_rw & (1 << BIO_RW_BARRIER))
 #define bio_sync(bio)		((bio)->bi_rw & (1 << BIO_RW_SYNC))
 #define bio_failfast(bio)	((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
 #define bio_rw_ahead(bio)	((bio)->bi_rw & (1 << BIO_RW_AHEAD))
 #define bio_rw_meta(bio)	((bio)->bi_rw & (1 << BIO_RW_META))
+#define bio_empty_barrier(bio)	(bio_barrier(bio) && !(bio)->bi_size)
+
+static inline unsigned int bio_cur_sectors(struct bio *bio)
+{
+	if (bio->bi_vcnt)
+		return bio_iovec(bio)->bv_len >> 9;
+
+	return 0;
+}
+
+static inline void *bio_data(struct bio *bio)
+{
+	if (bio->bi_vcnt)
+		return page_address(bio_page(bio)) + bio_offset(bio);
+
+	return NULL;
+}
 
 /*
  * will die
diff -puN include/linux/blkdev.h~git-block include/linux/blkdev.h
--- a/include/linux/blkdev.h~git-block
+++ a/include/linux/blkdev.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_BLKDEV_H
 #define _LINUX_BLKDEV_H
 
+#ifdef CONFIG_BLOCK
+
 #include <linux/sched.h>
 #include <linux/major.h>
 #include <linux/genhd.h>
@@ -32,8 +34,6 @@
 )
 #endif
 
-#ifdef CONFIG_BLOCK
-
 struct scsi_ioctl_command;
 
 struct request_queue;
@@ -344,7 +344,6 @@ typedef void (unplug_fn) (struct request
 
 struct bio_vec;
 typedef int (merge_bvec_fn) (struct request_queue *, struct bio *, struct bio_vec *);
-typedef int (issue_flush_fn) (struct request_queue *, struct gendisk *, sector_t *);
 typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
 typedef void (softirq_done_fn)(struct request *);
 
@@ -382,7 +381,6 @@ struct request_queue
 	prep_rq_fn		*prep_rq_fn;
 	unplug_fn		*unplug_fn;
 	merge_bvec_fn		*merge_bvec_fn;
-	issue_flush_fn		*issue_flush_fn;
 	prepare_flush_fn	*prepare_flush_fn;
 	softirq_done_fn		*softirq_done_fn;
 
@@ -555,6 +553,7 @@ enum {
 #define blk_barrier_rq(rq)	((rq)->cmd_flags & REQ_HARDBARRIER)
 #define blk_fua_rq(rq)		((rq)->cmd_flags & REQ_FUA)
 #define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
+#define blk_empty_barrier(rq)	(blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
 
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 
@@ -637,10 +636,23 @@ static inline void blk_queue_bounce(stru
 }
 #endif /* CONFIG_MMU */
 
-#define rq_for_each_bio(_bio, rq)	\
+struct req_iterator {
+	int i;
+	struct bio *bio;
+};
+
+/* This should not be used directly - use rq_for_each_segment */
+#define __rq_for_each_bio(_bio, rq)	\
 	if ((rq->bio))			\
 		for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
 
+#define rq_for_each_segment(bvl, _rq, _iter)			\
+	__rq_for_each_bio(_iter.bio, _rq)			\
+		bio_for_each_segment(bvl, _iter.bio, _iter.i)
+
+#define rq_iter_last(rq, _iter)					\
+		(_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1)
+
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
 extern void register_disk(struct gendisk *dev);
@@ -662,8 +674,8 @@ extern int sg_scsi_ioctl(struct file *, 
 /*
  * Temporary export, until SCSI gets fixed up.
  */
-extern int ll_back_merge_fn(struct request_queue *, struct request *,
-		struct bio *);
+extern int blk_rq_append_bio(struct request_queue *q, struct request *rq,
+			     struct bio *bio);
 
 /*
  * A queue has just exitted congestion.  Note this in the global counter of
@@ -731,7 +743,9 @@ static inline void blk_run_address_space
 extern int end_that_request_first(struct request *, int, int);
 extern int end_that_request_chunk(struct request *, int, int);
 extern void end_that_request_last(struct request *, int);
-extern void end_request(struct request *req, int uptodate);
+extern void end_request(struct request *, int);
+extern void end_queued_request(struct request *, int);
+extern void end_dequeued_request(struct request *, int);
 extern void blk_complete_request(struct request *);
 
 /*
@@ -769,7 +783,6 @@ extern void blk_queue_dma_alignment(stru
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
-extern void blk_queue_issue_flush_fn(struct request_queue *, issue_flush_fn *);
 extern int blk_do_ordered(struct request_queue *, struct request **);
 extern unsigned blk_ordered_cur_seq(struct request_queue *);
 extern unsigned blk_ordered_req_seq(struct request *);
@@ -810,7 +823,6 @@ static inline struct request *blk_map_qu
 	return bqt->tag_index[tag];
 }
 
-extern void blk_rq_bio_prep(struct request_queue *, struct request *, struct bio *);
 extern int blkdev_issue_flush(struct block_device *, sector_t *);
 
 #define MAX_PHYS_SEGMENTS 128
diff -puN include/linux/i2o.h~git-block include/linux/i2o.h
--- a/include/linux/i2o.h~git-block
+++ a/include/linux/i2o.h
@@ -32,6 +32,7 @@
 #include <linux/workqueue.h>	/* work_struct */
 #include <linux/mempool.h>
 #include <linux/mutex.h>
+#include <linux/scatterlist.h>
 
 #include <asm/io.h>
 #include <asm/semaphore.h>	/* Needed for MUTEX init macros */
@@ -837,7 +838,7 @@ static inline int i2o_dma_map_sg(struct 
 		if ((sizeof(dma_addr_t) > 4) && c->pae_support)
 			*mptr++ = cpu_to_le32(i2o_dma_high(sg_dma_address(sg)));
 #endif
-		sg++;
+		sg = sg_next(sg);
 	}
 	*sg_ptr = mptr;
 
diff -puN include/linux/ide.h~git-block include/linux/ide.h
--- a/include/linux/ide.h~git-block
+++ a/include/linux/ide.h
@@ -764,7 +764,7 @@ typedef struct hwif_s {
 
 	unsigned int nsect;
 	unsigned int nleft;
-	unsigned int cursg;
+	struct scatterlist *cursg;
 	unsigned int cursg_ofs;
 
 	int		rqsize;		/* max sectors per request */
@@ -1054,11 +1054,6 @@ extern ide_startstop_t ide_do_reset (ide
 extern void ide_init_drive_cmd (struct request *rq);
 
 /*
- * this function returns error location sector offset in case of a write error
- */
-extern u64 ide_get_error_location(ide_drive_t *, char *);
-
-/*
  * "action" parameter type for ide_do_drive_cmd() below.
  */
 typedef enum {
diff -puN include/linux/libata.h~git-block include/linux/libata.h
--- a/include/linux/libata.h~git-block
+++ a/include/linux/libata.h
@@ -30,7 +30,7 @@
 #include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
 #include <linux/io.h>
 #include <linux/ata.h>
 #include <linux/workqueue.h>
@@ -430,6 +430,7 @@ struct ata_queued_cmd {
 	unsigned long		flags;		/* ATA_QCFLAG_xxx */
 	unsigned int		tag;
 	unsigned int		n_elem;
+	unsigned int		n_iter;
 	unsigned int		orig_n_elem;
 
 	int			dma_dir;
@@ -440,7 +441,7 @@ struct ata_queued_cmd {
 	unsigned int		nbytes;
 	unsigned int		curbytes;
 
-	unsigned int		cursg;
+	struct scatterlist	*cursg;
 	unsigned int		cursg_ofs;
 
 	struct scatterlist	sgent;
@@ -1042,7 +1043,7 @@ ata_sg_is_last(struct scatterlist *sg, s
 		return 1;
 	if (qc->pad_len)
 		return 0;
-	if (((sg - qc->__sg) + 1) == qc->n_elem)
+	if (qc->n_iter == qc->n_elem)
 		return 1;
 	return 0;
 }
@@ -1050,6 +1051,7 @@ ata_sg_is_last(struct scatterlist *sg, s
 static inline struct scatterlist *
 ata_qc_first_sg(struct ata_queued_cmd *qc)
 {
+	qc->n_iter = 0;
 	if (qc->n_elem)
 		return qc->__sg;
 	if (qc->pad_len)
@@ -1062,8 +1064,8 @@ ata_qc_next_sg(struct scatterlist *sg, s
 {
 	if (sg == &qc->pad_sgent)
 		return NULL;
-	if (++sg - qc->__sg < qc->n_elem)
-		return sg;
+	if (++qc->n_iter < qc->n_elem)
+		return sg_next(sg);
 	if (qc->pad_len)
 		return &qc->pad_sgent;
 	return NULL;
@@ -1308,9 +1310,11 @@ static inline void ata_qc_reinit(struct 
 	qc->dma_dir = DMA_NONE;
 	qc->__sg = NULL;
 	qc->flags = 0;
-	qc->cursg = qc->cursg_ofs = 0;
+	qc->cursg = NULL;
+	qc->cursg_ofs = 0;
 	qc->nbytes = qc->curbytes = 0;
 	qc->n_elem = 0;
+	qc->n_iter = 0;
 	qc->err_mask = 0;
 	qc->pad_len = 0;
 	qc->sect_size = ATA_SECT_SIZE;
diff -puN include/linux/net.h~git-block include/linux/net.h
--- a/include/linux/net.h~git-block
+++ a/include/linux/net.h
@@ -22,6 +22,7 @@
 #include <asm/socket.h>
 
 struct poll_table_struct;
+struct pipe_inode_info;
 struct inode;
 struct net;
 
@@ -166,6 +167,8 @@ struct proto_ops {
 				      struct vm_area_struct * vma);
 	ssize_t		(*sendpage)  (struct socket *sock, struct page *page,
 				      int offset, size_t size, int flags);
+	ssize_t 	(*splice_read)(struct socket *sock,  loff_t *ppos,
+				       struct pipe_inode_info *pipe, size_t len, unsigned int flags);
 };
 
 struct net_proto_family {
diff -puN include/linux/scatterlist.h~git-block include/linux/scatterlist.h
--- a/include/linux/scatterlist.h~git-block
+++ a/include/linux/scatterlist.h
@@ -20,4 +20,88 @@ static inline void sg_init_one(struct sc
 	sg_set_buf(sg, buf, buflen);
 }
 
+/*
+ * We overload the LSB of the page pointer to indicate whether it's
+ * a valid sg entry, or whether it points to the start of a new scatterlist.
+ * Those low bits are there for everyone! (thanks mason :-)
+ */
+#define sg_is_chain(sg)		((unsigned long) (sg)->page & 0x01)
+#define sg_chain_ptr(sg)	\
+	((struct scatterlist *) ((unsigned long) (sg)->page & ~0x01))
+
+/**
+ * sg_next - return the next scatterlist entry in a list
+ * @sg:		The current sg entry
+ *
+ * Usually the next entry will be @sg@ + 1, but if this sg element is part
+ * of a chained scatterlist, it could jump to the start of a new
+ * scatterlist array.
+ *
+ * Note that the caller must ensure that there are further entries after
+ * the current entry, this function will NOT return NULL for an end-of-list.
+ *
+ */
+static inline struct scatterlist *sg_next(struct scatterlist *sg)
+{
+	sg++;
+
+	if (unlikely(sg_is_chain(sg)))
+		sg = sg_chain_ptr(sg);
+
+	return sg;
+}
+
+/*
+ * Loop over each sg element, following the pointer to a new list if necessary
+ */
+#define for_each_sg(sglist, sg, nr, __i)	\
+	for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg))
+
+/**
+ * sg_last - return the last scatterlist entry in a list
+ * @sgl:	First entry in the scatterlist
+ * @nents:	Number of entries in the scatterlist
+ *
+ * Should only be used casually, it (currently) scan the entire list
+ * to get the last entry.
+ *
+ * Note that the @sgl@ pointer passed in need not be the first one,
+ * the important bit is that @nents@ denotes the number of entries that
+ * exist from @sgl@.
+ *
+ */
+static inline struct scatterlist *sg_last(struct scatterlist *sgl,
+					  unsigned int nents)
+{
+#ifndef ARCH_HAS_SG_CHAIN
+	struct scatterlist *ret = &sgl[nents - 1];
+#else
+	struct scatterlist *sg, *ret = NULL;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i)
+		ret = sg;
+
+#endif
+	return ret;
+}
+
+/**
+ * sg_chain - Chain two sglists together
+ * @prv:	First scatterlist
+ * @prv_nents:	Number of entries in prv
+ * @sgl:	Second scatterlist
+ *
+ * Links @prv@ and @sgl@ together, to form a longer scatterlist.
+ *
+ */
+static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
+			    struct scatterlist *sgl)
+{
+#ifndef ARCH_HAS_SG_CHAIN
+	BUG();
+#endif
+	prv[prv_nents - 1].page = (struct page *) ((unsigned long) sgl | 0x01);
+}
+
 #endif /* _LINUX_SCATTERLIST_H */
diff -puN include/linux/skbuff.h~git-block include/linux/skbuff.h
--- a/include/linux/skbuff.h~git-block
+++ a/include/linux/skbuff.h
@@ -96,6 +96,7 @@
 
 struct net_device;
 struct scatterlist;
+struct pipe_inode_info;
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 struct nf_conntrack {
@@ -1557,6 +1558,11 @@ extern int	       skb_store_bits(struct 
 extern __wsum	       skb_copy_and_csum_bits(const struct sk_buff *skb,
 					      int offset, u8 *to, int len,
 					      __wsum csum);
+extern int             skb_splice_bits(struct sk_buff *skb,
+						unsigned int offset,
+						struct pipe_inode_info *pipe,
+						unsigned int len,
+						unsigned int flags);
 extern void	       skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 extern void	       skb_split(struct sk_buff *skb,
 				 struct sk_buff *skb1, const u32 len);
diff -puN include/linux/splice.h~git-block include/linux/splice.h
--- a/include/linux/splice.h~git-block
+++ a/include/linux/splice.h
@@ -53,6 +53,7 @@ struct splice_pipe_desc {
 	int nr_pages;			/* number of pages in map */
 	unsigned int flags;		/* splice flags */
 	const struct pipe_buf_operations *ops;/* ops associated with output pipe */
+	void (*spd_release)(struct splice_pipe_desc *, unsigned int);
 };
 
 typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
diff -puN include/linux/writeback.h~git-block include/linux/writeback.h
--- a/include/linux/writeback.h~git-block
+++ a/include/linux/writeback.h
@@ -5,6 +5,7 @@
 #define WRITEBACK_H
 
 #include <linux/sched.h>
+#include <linux/fs.h>
 
 struct backing_dev_info;
 
diff -puN include/net/tcp.h~git-block include/net/tcp.h
--- a/include/net/tcp.h~git-block
+++ a/include/net/tcp.h
@@ -309,6 +309,9 @@ extern int			tcp_twsk_unique(struct sock
 
 extern void			tcp_twsk_destructor(struct sock *sk);
 
+extern ssize_t			tcp_splice_read(struct socket *sk, loff_t *ppos,
+					        struct pipe_inode_info *pipe, size_t len, unsigned int flags);
+
 static inline void tcp_dec_quickack_mode(struct sock *sk,
 					 const unsigned int pkts)
 {
diff -puN include/scsi/scsi.h~git-block include/scsi/scsi.h
--- a/include/scsi/scsi.h~git-block
+++ a/include/scsi/scsi.h
@@ -11,13 +11,6 @@
 #include <linux/types.h>
 
 /*
- *	The maximum sg list length SCSI can cope with
- *	(currently must be a power of 2 between 32 and 256)
- */
-#define SCSI_MAX_PHYS_SEGMENTS	MAX_PHYS_SEGMENTS
-
-
-/*
  *	SCSI command lengths
  */
 
diff -puN include/scsi/scsi_cmnd.h~git-block include/scsi/scsi_cmnd.h
--- a/include/scsi/scsi_cmnd.h~git-block
+++ a/include/scsi/scsi_cmnd.h
@@ -5,6 +5,7 @@
 #include <linux/list.h>
 #include <linux/types.h>
 #include <linux/timer.h>
+#include <linux/scatterlist.h>
 
 struct request;
 struct scatterlist;
@@ -71,7 +72,7 @@ struct scsi_cmnd {
 
 	/* These elements define the operation we ultimately want to perform */
 	unsigned short use_sg;	/* Number of pieces of scatter-gather */
-	unsigned short sglist_len;	/* size of malloc'd scatter-gather list */
+	unsigned short __use_sg;
 
 	unsigned underflow;	/* Return error if less than
 				   this amount is transferred */
@@ -133,7 +134,7 @@ extern void *scsi_kmap_atomic_sg(struct 
 extern void scsi_kunmap_atomic_sg(void *virt);
 
 extern struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *, gfp_t);
-extern void scsi_free_sgtable(struct scatterlist *, int);
+extern void scsi_free_sgtable(struct scsi_cmnd *);
 
 extern int scsi_dma_map(struct scsi_cmnd *cmd);
 extern void scsi_dma_unmap(struct scsi_cmnd *cmd);
@@ -153,6 +154,6 @@ static inline int scsi_get_resid(struct 
 }
 
 #define scsi_for_each_sg(cmd, sg, nseg, __i)			\
-	for (__i = 0, sg = scsi_sglist(cmd); __i < (nseg); __i++, (sg)++)
+	for_each_sg(scsi_sglist(cmd), sg, nseg, __i)
 
 #endif /* _SCSI_SCSI_CMND_H */
diff -puN include/scsi/scsi_host.h~git-block include/scsi/scsi_host.h
--- a/include/scsi/scsi_host.h~git-block
+++ a/include/scsi/scsi_host.h
@@ -39,6 +39,9 @@ struct blk_queue_tags;
 #define DISABLE_CLUSTERING 0
 #define ENABLE_CLUSTERING 1
 
+#define DISABLE_SG_CHAINING 0
+#define ENABLE_SG_CHAINING 1
+
 enum scsi_eh_timer_return {
 	EH_NOT_HANDLED,
 	EH_HANDLED,
@@ -443,6 +446,15 @@ struct scsi_host_template {
 	unsigned ordered_tag:1;
 
 	/*
+	 * true if the low-level driver can support sg chaining. this
+	 * will be removed eventually when all the drivers are
+	 * converted to support sg chaining.
+	 *
+	 * Status: OBSOLETE
+	 */
+	unsigned use_sg_chaining:1;
+
+	/*
 	 * Countdown for host blocking with no commands outstanding
 	 */
 	unsigned int max_host_blocked;
@@ -586,6 +598,7 @@ struct Scsi_Host {
 	unsigned unchecked_isa_dma:1;
 	unsigned use_clustering:1;
 	unsigned use_blk_tcq:1;
+	unsigned use_sg_chaining:1;
 
 	/*
 	 * Host has requested that no further requests come through for the
diff -puN kernel/sched.c~git-block kernel/sched.c
--- a/kernel/sched.c~git-block
+++ a/kernel/sched.c
@@ -61,6 +61,7 @@
 #include <linux/delayacct.h>
 #include <linux/reciprocal_div.h>
 #include <linux/unistd.h>
+#include <linux/pagemap.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
diff -puN lib/swiotlb.c~git-block lib/swiotlb.c
--- a/lib/swiotlb.c~git-block
+++ a/lib/swiotlb.c
@@ -677,16 +677,17 @@ swiotlb_sync_single_range_for_device(str
  * same here.
  */
 int
-swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
+swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
 	       int dir)
 {
+	struct scatterlist *sg;
 	void *addr;
 	dma_addr_t dev_addr;
 	int i;
 
 	BUG_ON(dir == DMA_NONE);
 
-	for (i = 0; i < nelems; i++, sg++) {
+	for_each_sg(sgl, sg, nelems, i) {
 		addr = SG_ENT_VIRT_ADDRESS(sg);
 		dev_addr = virt_to_bus(addr);
 		if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) {
@@ -696,7 +697,7 @@ swiotlb_map_sg(struct device *hwdev, str
 				   to do proper error handling. */
 				swiotlb_full(hwdev, sg->length, dir, 0);
 				swiotlb_unmap_sg(hwdev, sg - i, i, dir);
-				sg[0].dma_length = 0;
+				sgl[0].dma_length = 0;
 				return 0;
 			}
 			sg->dma_address = virt_to_bus(map);
@@ -712,19 +713,21 @@ swiotlb_map_sg(struct device *hwdev, str
  * concerning calls here are the same as for swiotlb_unmap_single() above.
  */
 void
-swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
+swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
 		 int dir)
 {
+	struct scatterlist *sg;
 	int i;
 
 	BUG_ON(dir == DMA_NONE);
 
-	for (i = 0; i < nelems; i++, sg++)
+	for_each_sg(sgl, sg, nelems, i) {
 		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
 			unmap_single(hwdev, bus_to_virt(sg->dma_address),
 				     sg->dma_length, dir);
 		else if (dir == DMA_FROM_DEVICE)
 			dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
+	}
 }
 
 /*
@@ -735,19 +738,21 @@ swiotlb_unmap_sg(struct device *hwdev, s
  * and usage.
  */
 static void
-swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sg,
+swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
 		int nelems, int dir, int target)
 {
+	struct scatterlist *sg;
 	int i;
 
 	BUG_ON(dir == DMA_NONE);
 
-	for (i = 0; i < nelems; i++, sg++)
+	for_each_sg(sgl, sg, nelems, i) {
 		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
 			sync_single(hwdev, bus_to_virt(sg->dma_address),
 				    sg->dma_length, dir, target);
 		else if (dir == DMA_FROM_DEVICE)
 			dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
+	}
 }
 
 void
diff -puN mm/bounce.c~git-block mm/bounce.c
--- a/mm/bounce.c~git-block
+++ a/mm/bounce.c
@@ -280,6 +280,12 @@ void blk_queue_bounce(struct request_que
 	mempool_t *pool;
 
 	/*
+	 * Data-less bio, nothing to bounce
+	 */
+	if (bio_empty_barrier(*bio_orig))
+		return;
+
+	/*
 	 * for non-isa bounce case, just check if the bounce pfn is equal
 	 * to or bigger than the highest pfn in the system -- in that case,
 	 * don't waste time iterating over bio segments
diff -puN mm/readahead.c~git-block mm/readahead.c
--- a/mm/readahead.c~git-block
+++ a/mm/readahead.c
@@ -15,6 +15,7 @@
 #include <linux/backing-dev.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/pagevec.h>
+#include <linux/pagemap.h>
 
 void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
 {
diff -puN net/core/skbuff.c~git-block net/core/skbuff.c
--- a/net/core/skbuff.c~git-block
+++ a/net/core/skbuff.c
@@ -52,6 +52,7 @@
 #endif
 #include <linux/string.h>
 #include <linux/skbuff.h>
+#include <linux/splice.h>
 #include <linux/cache.h>
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
@@ -71,6 +72,40 @@
 static struct kmem_cache *skbuff_head_cache __read_mostly;
 static struct kmem_cache *skbuff_fclone_cache __read_mostly;
 
+static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
+				  struct pipe_buffer *buf)
+{
+	struct sk_buff *skb = (struct sk_buff *) buf->private;
+
+	kfree_skb(skb);
+}
+
+static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
+				struct pipe_buffer *buf)
+{
+	struct sk_buff *skb = (struct sk_buff *) buf->private;
+
+	skb_get(skb);
+}
+
+static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
+			       struct pipe_buffer *buf)
+{
+	return 1;
+}
+
+
+/* Pipe buffer operations for a socket. */
+static struct pipe_buf_operations sock_pipe_buf_ops = {
+	.can_merge = 0,
+	.map = generic_pipe_buf_map,
+	.unmap = generic_pipe_buf_unmap,
+	.confirm = generic_pipe_buf_confirm,
+	.release = sock_pipe_buf_release,
+	.steal = sock_pipe_buf_steal,
+	.get = sock_pipe_buf_get,
+};
+
 /*
  *	Keep out-of-line to prevent kernel bloat.
  *	__builtin_return_address is not used because it is not always
@@ -1126,6 +1161,217 @@ fault:
 	return -EFAULT;
 }
 
+/*
+ * Callback from splice_to_pipe(), if we need to release some pages
+ * at the end of the spd in case we error'ed out in filling the pipe.
+ */
+static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
+{
+	struct sk_buff *skb = (struct sk_buff *) spd->partial[i].private;
+
+	kfree_skb(skb);
+}
+
+/*
+ * Fill page/offset/length into spd, if it can hold more pages.
+ */
+static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
+				unsigned int len, unsigned int offset,
+				struct sk_buff *skb)
+{
+	if (unlikely(spd->nr_pages == PIPE_BUFFERS))
+		return 1;
+
+	spd->pages[spd->nr_pages] = page;
+	spd->partial[spd->nr_pages].len = len;
+	spd->partial[spd->nr_pages].offset = offset;
+	spd->partial[spd->nr_pages].private = (unsigned long) skb_get(skb);
+	spd->nr_pages++;
+	return 0;
+}
+
+/*
+ * Map linear and fragment data from the skb to spd. Returns number of
+ * pages mapped.
+ */
+static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
+			     unsigned int *total_len,
+			     struct splice_pipe_desc *spd)
+{
+	unsigned int nr_pages = spd->nr_pages;
+	unsigned int poff, plen, len, toff, tlen;
+	int headlen, seg;
+
+	toff = *offset;
+	tlen = *total_len;
+	if (!tlen)
+		goto err;
+
+	/*
+	 * if the offset is greater than the linear part, go directly to
+	 * the fragments.
+	 */
+	headlen = skb_headlen(skb);
+	if (toff >= headlen) {
+		toff -= headlen;
+		goto map_frag;
+	}
+
+	/*
+	 * first map the linear region into the pages/partial map, skipping
+	 * any potential initial offset.
+	 */
+	len = 0;
+	while (len < headlen) {
+		void *p = skb->data + len;
+
+		poff = (unsigned long) p & (PAGE_SIZE - 1);
+		plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff);
+		len += plen;
+
+		if (toff) {
+			if (plen <= toff) {
+				toff -= plen;
+				continue;
+			}
+			plen -= toff;
+			poff += toff;
+			toff = 0;
+		}
+
+		plen = min(plen, tlen);
+		if (!plen)
+			break;
+
+		/*
+		 * just jump directly to update and return, no point
+		 * in going over fragments when the output is full.
+		 */
+		if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb))
+			goto done;
+
+		tlen -= plen;
+	}
+
+	/*
+	 * then map the fragments
+	 */
+map_frag:
+	for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
+		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
+
+		plen = f->size;
+		poff = f->page_offset;
+
+		if (toff) {
+			if (plen <= toff) {
+				toff -= plen;
+				continue;
+			}
+			plen -= toff;
+			poff += toff;
+			toff = 0;
+		}
+
+		plen = min(plen, tlen);
+		if (!plen)
+			break;
+
+		if (spd_fill_page(spd, f->page, plen, poff, skb))
+			break;
+
+		tlen -= plen;
+	}
+
+done:
+	if (spd->nr_pages - nr_pages) {
+		*offset = 0;
+		*total_len = tlen;
+		return 0;
+	}
+err:
+	return 1;
+}
+
+/*
+ * Map data from the skb to a pipe. Should handle both the linear part,
+ * the fragments, and the frag list. It does NOT handle frag lists within
+ * the frag list, if such a thing exists. We'd probably need to recurse to
+ * handle that cleanly.
+ */
+int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
+		    struct pipe_inode_info *pipe, unsigned int tlen,
+		    unsigned int flags)
+{
+	struct partial_page partial[PIPE_BUFFERS];
+	struct page *pages[PIPE_BUFFERS];
+	struct splice_pipe_desc spd = {
+		.pages = pages,
+		.partial = partial,
+		.flags = flags,
+		.ops = &sock_pipe_buf_ops,
+		.spd_release = sock_spd_release,
+	};
+	struct sk_buff *skb;
+
+	/*
+	 * I'd love to avoid the clone here, but tcp_read_sock()
+	 * ignores reference counts and unconditonally kills the sk_buff
+	 * on return from the actor.
+	 */
+	skb = skb_clone(__skb, GFP_KERNEL);
+	if (unlikely(!skb))
+		return -ENOMEM;
+
+	/*
+	 * __skb_splice_bits() only fails if the output has no room left,
+	 * so no point in going over the frag_list for the error case.
+	 */
+	if (__skb_splice_bits(skb, &offset, &tlen, &spd))
+		goto done;
+	else if (!tlen)
+		goto done;
+
+	/*
+	 * now see if we have a frag_list to map
+	 */
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list && tlen; list = list->next) {
+			if (__skb_splice_bits(list, &offset, &tlen, &spd))
+				break;
+		}
+	}
+
+done:
+	/*
+	 * drop our reference to the clone, the pipe consumption will
+	 * drop the rest.
+	 */
+	kfree_skb(skb);
+
+	if (spd.nr_pages) {
+		int ret;
+
+		/*
+		 * Drop the socket lock, otherwise we have reverse
+		 * locking dependencies between sk_lock and i_mutex
+		 * here as compared to sendfile(). We enter here
+		 * with the socket lock held, and splice_to_pipe() will
+		 * grab the pipe inode lock. For sendfile() emulation,
+		 * we call into ->sendpage() with the i_mutex lock held
+		 * and networking will grab the socket lock.
+		 */
+		release_sock(__skb->sk);
+		ret = splice_to_pipe(pipe, &spd);
+		lock_sock(__skb->sk);
+		return ret;
+	}
+
+	return 0;
+}
+
 /**
  *	skb_store_bits - store bits from kernel buffer to skb
  *	@skb: destination buffer
diff -puN net/ipv4/af_inet.c~git-block net/ipv4/af_inet.c
--- a/net/ipv4/af_inet.c~git-block
+++ a/net/ipv4/af_inet.c
@@ -838,6 +838,7 @@ const struct proto_ops inet_stream_ops =
 	.recvmsg	   = sock_common_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = tcp_sendpage,
+	.splice_read	   = tcp_splice_read,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
diff -puN net/ipv4/tcp.c~git-block net/ipv4/tcp.c
--- a/net/ipv4/tcp.c~git-block
+++ a/net/ipv4/tcp.c
@@ -254,6 +254,10 @@
 #include <linux/poll.h>
 #include <linux/init.h>
 #include <linux/fs.h>
+#include <linux/skbuff.h>
+#include <linux/splice.h>
+#include <linux/net.h>
+#include <linux/socket.h>
 #include <linux/random.h>
 #include <linux/bootmem.h>
 #include <linux/cache.h>
@@ -265,6 +269,7 @@
 #include <net/xfrm.h>
 #include <net/ip.h>
 #include <net/netdma.h>
+#include <net/sock.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -292,6 +297,15 @@ EXPORT_SYMBOL(tcp_memory_allocated);
 EXPORT_SYMBOL(tcp_sockets_allocated);
 
 /*
+ * TCP splice context
+ */
+struct tcp_splice_state {
+	struct pipe_inode_info *pipe;
+	size_t len;
+	unsigned int flags;
+};
+
+/*
  * Pressure flag: try to collapse.
  * Technical note: it is used by multiple contexts non atomically.
  * All the sk_stream_mem_schedule() is of this nature: accounting
@@ -501,6 +515,120 @@ static inline void tcp_push(struct sock 
 	}
 }
 
+static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
+				unsigned int offset, size_t len)
+{
+	struct tcp_splice_state *tss = rd_desc->arg.data;
+
+	return skb_splice_bits(skb, offset, tss->pipe, tss->len, tss->flags);
+}
+
+static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss)
+{
+	/* Store TCP splice context information in read_descriptor_t. */
+	read_descriptor_t rd_desc = {
+		.arg.data = tss,
+	};
+
+	return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv);
+}
+
+/**
+ *  tcp_splice_read - splice data from TCP socket to a pipe
+ * @sock:	socket to splice from
+ * @ppos:	position (not valid)
+ * @pipe:	pipe to splice to
+ * @len:	number of bytes to splice
+ * @flags:	splice modifier flags
+ *
+ * Description:
+ *    Will read pages from given socket and fill them into a pipe.
+ *
+ **/
+ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
+			struct pipe_inode_info *pipe, size_t len,
+			unsigned int flags)
+{
+	struct sock *sk = sock->sk;
+	struct tcp_splice_state tss = {
+		.pipe = pipe,
+		.len = len,
+		.flags = flags,
+	};
+	long timeo;
+	ssize_t spliced;
+	int ret;
+
+	/*
+	 * We can't seek on a socket input
+	 */
+	if (unlikely(*ppos))
+		return -ESPIPE;
+
+	ret = spliced = 0;
+
+	lock_sock(sk);
+
+	timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
+	while (tss.len) {
+		ret = __tcp_splice_read(sk, &tss);
+		if (ret < 0)
+			break;
+		else if (!ret) {
+			if (spliced)
+				break;
+			if (flags & SPLICE_F_NONBLOCK) {
+				ret = -EAGAIN;
+				break;
+			}
+			if (sock_flag(sk, SOCK_DONE))
+				break;
+			if (sk->sk_err) {
+				ret = sock_error(sk);
+				break;
+			}
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				break;
+			if (sk->sk_state == TCP_CLOSE) {
+				/*
+				 * This occurs when user tries to read
+				 * from never connected socket.
+				 */
+				if (!sock_flag(sk, SOCK_DONE))
+					ret = -ENOTCONN;
+				break;
+			}
+			if (!timeo) {
+				ret = -EAGAIN;
+				break;
+			}
+			sk_wait_data(sk, &timeo);
+			if (signal_pending(current)) {
+				ret = sock_intr_errno(timeo);
+				break;
+			}
+			continue;
+		}
+		tss.len -= ret;
+		spliced += ret;
+
+		release_sock(sk);
+		lock_sock(sk);
+
+		if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
+		    (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo ||
+		    signal_pending(current))
+			break;
+	}
+
+	release_sock(sk);
+
+	if (spliced)
+		return spliced;
+
+	return ret;
+}
+
 static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
 			 size_t psize, int flags)
 {
@@ -2532,6 +2660,7 @@ EXPORT_SYMBOL(tcp_poll);
 EXPORT_SYMBOL(tcp_read_sock);
 EXPORT_SYMBOL(tcp_recvmsg);
 EXPORT_SYMBOL(tcp_sendmsg);
+EXPORT_SYMBOL(tcp_splice_read);
 EXPORT_SYMBOL(tcp_sendpage);
 EXPORT_SYMBOL(tcp_setsockopt);
 EXPORT_SYMBOL(tcp_shutdown);
diff -puN net/ipv6/af_inet6.c~git-block net/ipv6/af_inet6.c
--- a/net/ipv6/af_inet6.c~git-block
+++ a/net/ipv6/af_inet6.c
@@ -491,6 +491,7 @@ const struct proto_ops inet6_stream_ops 
 	.recvmsg	   = sock_common_recvmsg,	/* ok		*/
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = tcp_sendpage,
+	.splice_read	   = tcp_splice_read,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
diff -puN net/socket.c~git-block net/socket.c
--- a/net/socket.c~git-block
+++ a/net/socket.c
@@ -112,6 +112,9 @@ static long compat_sock_ioctl(struct fil
 static int sock_fasync(int fd, struct file *filp, int on);
 static ssize_t sock_sendpage(struct file *file, struct page *page,
 			     int offset, size_t size, loff_t *ppos, int more);
+static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
+			        struct pipe_inode_info *pipe, size_t len,
+				unsigned int flags);
 
 /*
  *	Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
@@ -134,6 +137,7 @@ static const struct file_operations sock
 	.fasync =	sock_fasync,
 	.sendpage =	sock_sendpage,
 	.splice_write = generic_splice_sendpage,
+	.splice_read =	sock_splice_read,
 };
 
 /*
@@ -691,6 +695,15 @@ static ssize_t sock_sendpage(struct file
 	return sock->ops->sendpage(sock, page, offset, size, flags);
 }
 
+static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
+			        struct pipe_inode_info *pipe, size_t len,
+				unsigned int flags)
+{
+	struct socket *sock = file->private_data;
+
+	return sock->ops->splice_read(sock, ppos, pipe, len, flags);
+}
+
 static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
 					 struct sock_iocb *siocb)
 {
_